diff --git a/plugins/analysis-kuromoji/src/main/java/org/opensearch/index/analysis/KuromojiTokenizerFactory.java b/plugins/analysis-kuromoji/src/main/java/org/opensearch/index/analysis/KuromojiTokenizerFactory.java index ac4def8ac9a11..2939711f6f7e1 100644 --- a/plugins/analysis-kuromoji/src/main/java/org/opensearch/index/analysis/KuromojiTokenizerFactory.java +++ b/plugins/analysis-kuromoji/src/main/java/org/opensearch/index/analysis/KuromojiTokenizerFactory.java @@ -79,6 +79,9 @@ public KuromojiTokenizerFactory(IndexSettings indexSettings, Environment env, St private static String parse(String rule, Set dup) { String[] values = CSVUtil.parse(rule); + if (values.length == 0) { + throw new IllegalArgumentException("Malformed csv in user dictionary."); + } if (dup.add(values[0]) == false) { throw new IllegalArgumentException("Found duplicate term [" + values[0] + "] in user dictionary."); } diff --git a/plugins/analysis-kuromoji/src/test/java/org/opensearch/index/analysis/KuromojiAnalysisTests.java b/plugins/analysis-kuromoji/src/test/java/org/opensearch/index/analysis/KuromojiAnalysisTests.java index 23e6ef9fea059..03d9df6ebd6b2 100644 --- a/plugins/analysis-kuromoji/src/test/java/org/opensearch/index/analysis/KuromojiAnalysisTests.java +++ b/plugins/analysis-kuromoji/src/test/java/org/opensearch/index/analysis/KuromojiAnalysisTests.java @@ -379,6 +379,15 @@ public void testKuromojiAnalyzerInvalidUserDictOption() throws Exception { ); } + public void testKuromojiAnalyzerEmptyDictRule() throws Exception { + Settings settings = Settings.builder() + .put("index.analysis.analyzer.my_analyzer.type", "kuromoji") + .putList("index.analysis.analyzer.my_analyzer.user_dictionary_rules", "\"") + .build(); + RuntimeException exc = expectThrows(RuntimeException.class, () -> createTestAnalysis(settings)); + assertThat(exc.getMessage(), equalTo("Line [1]: Malformed csv in user dictionary.")); + } + public void testKuromojiAnalyzerDuplicateUserDictRule() throws Exception { Settings settings = Settings.builder() .put("index.analysis.analyzer.my_analyzer.type", "kuromoji")