5050import org .opensearch .test .IndexSettingsModule ;
5151import org .opensearch .test .OpenSearchTestCase ;
5252import org .hamcrest .MatcherAssert ;
53+ import org .junit .Before ;
5354
5455import java .io .IOException ;
56+ import java .io .InputStream ;
57+ import java .nio .file .Files ;
58+ import java .nio .file .Path ;
5559import java .util .ArrayList ;
5660import java .util .Arrays ;
5761import java .util .List ;
6367import static org .hamcrest .Matchers .instanceOf ;
6468
6569public class CompoundAnalysisTests extends OpenSearchTestCase {
70+
71+ Settings [] settingsArr ;
72+
73+ @ Before
74+ public void initialize () throws IOException {
75+ final Path home = createTempDir ();
76+ copyHyphenationPatternsFile (home );
77+ this .settingsArr = new Settings [] { getJsonSettings (home ), getYamlSettings (home ) };
78+ }
79+
6680 public void testDefaultsCompoundAnalysis () throws Exception {
67- Settings settings = getJsonSettings ();
68- IndexSettings idxSettings = IndexSettingsModule .newIndexSettings ("test" , settings );
69- AnalysisModule analysisModule = createAnalysisModule (settings );
70- TokenFilterFactory filterFactory = analysisModule .getAnalysisRegistry ().buildTokenFilterFactories (idxSettings ).get ("dict_dec" );
71- MatcherAssert .assertThat (filterFactory , instanceOf (DictionaryCompoundWordTokenFilterFactory .class ));
81+ for (Settings settings : this .settingsArr ) {
82+ IndexSettings idxSettings = IndexSettingsModule .newIndexSettings ("test" , settings );
83+ AnalysisModule analysisModule = createAnalysisModule (settings );
84+ TokenFilterFactory filterFactory = analysisModule .getAnalysisRegistry ().buildTokenFilterFactories (idxSettings ).get ("dict_dec" );
85+ MatcherAssert .assertThat (filterFactory , instanceOf (DictionaryCompoundWordTokenFilterFactory .class ));
86+ }
7287 }
7388
7489 public void testDictionaryDecompounder () throws Exception {
75- Settings [] settingsArr = new Settings [] { getJsonSettings (), getYamlSettings () };
76- for (Settings settings : settingsArr ) {
90+ for (Settings settings : this .settingsArr ) {
7791 List <String > terms = analyze (settings , "decompoundingAnalyzer" , "donaudampfschiff spargelcremesuppe" );
7892 MatcherAssert .assertThat (terms .size (), equalTo (8 ));
7993 MatcherAssert .assertThat (
@@ -83,6 +97,26 @@ public void testDictionaryDecompounder() throws Exception {
8397 }
8498 }
8599
100+ // Hyphenation Decompounder tests mimic the behavior of lucene tests
101+ // lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestHyphenationCompoundWordTokenFilterFactory.java
102+ public void testHyphenationDecompounder () throws Exception {
103+ for (Settings settings : this .settingsArr ) {
104+ List <String > terms = analyze (settings , "hyphenationAnalyzer" , "min veninde som er lidt af en læsehest" );
105+ MatcherAssert .assertThat (terms .size (), equalTo (10 ));
106+ MatcherAssert .assertThat (terms , hasItems ("min" , "veninde" , "som" , "er" , "lidt" , "af" , "en" , "læsehest" , "læse" , "hest" ));
107+ }
108+ }
109+
110+ // Hyphenation Decompounder tests mimic the behavior of lucene tests
111+ // lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestHyphenationCompoundWordTokenFilterFactory.java
112+ public void testHyphenationDecompounderNoSubMatches () throws Exception {
113+ for (Settings settings : this .settingsArr ) {
114+ List <String > terms = analyze (settings , "hyphenationAnalyzerNoSubMatches" , "basketballkurv" );
115+ MatcherAssert .assertThat (terms .size (), equalTo (3 ));
116+ MatcherAssert .assertThat (terms , hasItems ("basketballkurv" , "basketball" , "kurv" ));
117+ }
118+ }
119+
86120 private List <String > analyze (Settings settings , String analyzerName , String text ) throws IOException {
87121 IndexSettings idxSettings = IndexSettingsModule .newIndexSettings ("test" , settings );
88122 AnalysisModule analysisModule = createAnalysisModule (settings );
@@ -111,21 +145,28 @@ public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
111145 }));
112146 }
113147
114- private Settings getJsonSettings () throws IOException {
148+ private void copyHyphenationPatternsFile (Path home ) throws IOException {
149+ InputStream hyphenation_patterns_path = getClass ().getResourceAsStream ("da_UTF8.xml" );
150+ Path config = home .resolve ("config" );
151+ Files .createDirectory (config );
152+ Files .copy (hyphenation_patterns_path , config .resolve ("da_UTF8.xml" ));
153+ }
154+
155+ private Settings getJsonSettings (Path home ) throws IOException {
115156 String json = "/org/opensearch/analysis/common/test1.json" ;
116157 return Settings .builder ()
117158 .loadFromStream (json , getClass ().getResourceAsStream (json ), false )
118159 .put (IndexMetadata .SETTING_VERSION_CREATED , Version .CURRENT )
119- .put (Environment .PATH_HOME_SETTING .getKey (), createTempDir () .toString ())
160+ .put (Environment .PATH_HOME_SETTING .getKey (), home .toString ())
120161 .build ();
121162 }
122163
123- private Settings getYamlSettings () throws IOException {
164+ private Settings getYamlSettings (Path home ) throws IOException {
124165 String yaml = "/org/opensearch/analysis/common/test1.yml" ;
125166 return Settings .builder ()
126167 .loadFromStream (yaml , getClass ().getResourceAsStream (yaml ), false )
127168 .put (IndexMetadata .SETTING_VERSION_CREATED , Version .CURRENT )
128- .put (Environment .PATH_HOME_SETTING .getKey (), createTempDir () .toString ())
169+ .put (Environment .PATH_HOME_SETTING .getKey (), home .toString ())
129170 .build ();
130171 }
131172}
0 commit comments