Skip to content

Commit 7a5c00e

Browse files
Add custom synonym_analyzer
Signed-off-by: Prudhvi Godithi <[email protected]>
1 parent 6f1b59e commit 7a5c00e

File tree

6 files changed

+61
-11
lines changed

6 files changed

+61
-11
lines changed

buildSrc/src/main/java/org/opensearch/gradle/testclusters/OpenSearchNode.java

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1216,14 +1216,18 @@ private void createConfiguration() {
12161216
);
12171217

12181218
final List<Path> configFiles;
1219-
try (Stream<Path> stream = Files.list(getDistroDir().resolve("config"))) {
1219+
try (Stream<Path> stream = Files.walk(getDistroDir().resolve("config"))) {
12201220
configFiles = stream.collect(Collectors.toList());
12211221
}
12221222
logToProcessStdout("Copying additional config files from distro " + configFiles);
12231223
for (Path file : configFiles) {
1224-
Path dest = configFile.getParent().resolve(file.getFileName());
1225-
if (Files.exists(dest) == false) {
1226-
Files.copy(file, dest);
1224+
Path relativePath = getDistroDir().resolve("config").relativize(file);
1225+
Path dest = configFile.getParent().resolve(relativePath);
1226+
if (Files.isDirectory(file)) {
1227+
Files.createDirectories(dest);
1228+
} else {
1229+
Files.createDirectories(dest.getParent());
1230+
Files.copy(file, dest, StandardCopyOption.REPLACE_EXISTING);
12271231
}
12281232
}
12291233
} catch (IOException e) {

modules/analysis-common/src/main/java/org/opensearch/analysis/common/CommonAnalysisModulePlugin.java

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@
146146
import org.opensearch.index.analysis.PreConfiguredTokenizer;
147147
import org.opensearch.index.analysis.TokenFilterFactory;
148148
import org.opensearch.index.analysis.TokenizerFactory;
149+
import org.opensearch.indices.analysis.AnalysisModule;
149150
import org.opensearch.indices.analysis.AnalysisModule.AnalysisProvider;
150151
import org.opensearch.indices.analysis.PreBuiltCacheFactory.CachingStrategy;
151152
import org.opensearch.plugins.AnalysisPlugin;
@@ -157,9 +158,11 @@
157158
import org.opensearch.threadpool.ThreadPool;
158159
import org.opensearch.watcher.ResourceWatcherService;
159160

161+
import java.io.IOException;
160162
import java.util.ArrayList;
161163
import java.util.Collection;
162164
import java.util.Collections;
165+
import java.util.HashMap;
163166
import java.util.List;
164167
import java.util.Map;
165168
import java.util.TreeMap;
@@ -176,6 +179,7 @@ public class CommonAnalysisModulePlugin extends Plugin implements AnalysisPlugin
176179

177180
private final SetOnce<ScriptService> scriptService = new SetOnce<>();
178181

182+
179183
@Override
180184
public Collection<Object> createComponents(
181185
Client client,
@@ -194,6 +198,7 @@ public Collection<Object> createComponents(
194198
return Collections.emptyList();
195199
}
196200

201+
197202
@Override
198203
public List<ScriptContext<?>> getContexts() {
199204
return Collections.singletonList(AnalysisPredicateScript.CONTEXT);
@@ -332,8 +337,6 @@ public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
332337
filters.put("sorani_normalization", SoraniNormalizationFilterFactory::new);
333338
filters.put("stemmer_override", requiresAnalysisSettings(StemmerOverrideTokenFilterFactory::new));
334339
filters.put("stemmer", StemmerTokenFilterFactory::new);
335-
filters.put("synonym", requiresAnalysisSettings(SynonymTokenFilterFactory::new));
336-
filters.put("synonym_graph", requiresAnalysisSettings(SynonymGraphTokenFilterFactory::new));
337340
filters.put("trim", TrimTokenFilterFactory::new);
338341
filters.put("truncate", requiresAnalysisSettings(TruncateTokenFilterFactory::new));
339342
filters.put("unique", UniqueTokenFilterFactory::new);
@@ -343,6 +346,18 @@ public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
343346
return filters;
344347
}
345348

349+
@Override
350+
public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters(AnalysisModule analysisModule) {
351+
Map<String, AnalysisProvider<TokenFilterFactory>> filters = getTokenFilters();
352+
filters.put("synonym", requiresAnalysisSettings((indexSettings, environment, name, settings) ->
353+
new SynonymTokenFilterFactory(indexSettings, environment, name, settings, analysisModule.getAnalysisRegistry())
354+
));
355+
filters.put("synonym_graph", requiresAnalysisSettings((indexSettings, environment, name, settings) ->
356+
new SynonymGraphTokenFilterFactory(indexSettings, environment, name, settings, analysisModule.getAnalysisRegistry())
357+
));
358+
return filters;
359+
}
360+
346361
@Override
347362
public Map<String, AnalysisProvider<CharFilterFactory>> getCharFilters() {
348363
Map<String, AnalysisProvider<CharFilterFactory>> filters = new TreeMap<>();

modules/analysis-common/src/main/java/org/opensearch/analysis/common/SynonymGraphTokenFilterFactory.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
import org.opensearch.env.Environment;
4141
import org.opensearch.index.IndexSettings;
4242
import org.opensearch.index.analysis.AnalysisMode;
43+
import org.opensearch.index.analysis.AnalysisRegistry;
4344
import org.opensearch.index.analysis.CharFilterFactory;
4445
import org.opensearch.index.analysis.TokenFilterFactory;
4546
import org.opensearch.index.analysis.TokenizerFactory;
@@ -49,8 +50,8 @@
4950

5051
public class SynonymGraphTokenFilterFactory extends SynonymTokenFilterFactory {
5152

52-
SynonymGraphTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
53-
super(indexSettings, env, name, settings);
53+
SynonymGraphTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings, AnalysisRegistry analysisRegistry) {
54+
super(indexSettings, env, name, settings, analysisRegistry);
5455
}
5556

5657
@Override

modules/analysis-common/src/main/java/org/opensearch/analysis/common/SynonymTokenFilterFactory.java

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,9 @@
4848
import org.opensearch.index.analysis.CustomAnalyzer;
4949
import org.opensearch.index.analysis.TokenFilterFactory;
5050
import org.opensearch.index.analysis.TokenizerFactory;
51+
import org.opensearch.index.analysis.AnalysisRegistry;
5152

53+
import java.io.IOException;
5254
import java.io.Reader;
5355
import java.io.StringReader;
5456
import java.util.List;
@@ -64,8 +66,10 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
6466
protected final Settings settings;
6567
protected final Environment environment;
6668
protected final AnalysisMode analysisMode;
69+
private final String synonymAnalyzer;
70+
private final AnalysisRegistry analysisRegistry;
6771

68-
SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
72+
SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings, AnalysisRegistry analysisRegistry) {
6973
super(indexSettings, name, settings);
7074
this.settings = settings;
7175

@@ -83,6 +87,8 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
8387
boolean updateable = settings.getAsBoolean("updateable", false);
8488
this.analysisMode = updateable ? AnalysisMode.SEARCH_TIME : AnalysisMode.ALL;
8589
this.environment = env;
90+
this.synonymAnalyzer = settings.get("synonym_analyzer", null);
91+
this.analysisRegistry = analysisRegistry;
8692
}
8793

8894
@Override
@@ -137,6 +143,17 @@ Analyzer buildSynonymAnalyzer(
137143
List<TokenFilterFactory> tokenFilters,
138144
Function<String, TokenFilterFactory> allFilters
139145
) {
146+
if (synonymAnalyzer != null) {
147+
Analyzer customSynonymAnalyzer;
148+
try {
149+
customSynonymAnalyzer = analysisRegistry.getAnalyzer(synonymAnalyzer);
150+
} catch (IOException e) {
151+
throw new RuntimeException(e);
152+
}
153+
if (customSynonymAnalyzer != null) {
154+
return customSynonymAnalyzer;
155+
}
156+
}
140157
return new CustomAnalyzer(
141158
tokenizer,
142159
charFilters.toArray(new CharFilterFactory[0]),
@@ -177,5 +194,4 @@ Reader getRulesFromSettings(Environment env) {
177194
}
178195
return rulesReader;
179196
}
180-
181197
}

server/src/main/java/org/opensearch/indices/analysis/AnalysisModule.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,12 @@ public boolean requiresAnalysisSettings() {
165165
)
166166
);
167167

168-
tokenFilters.extractAndRegister(plugins, AnalysisPlugin::getTokenFilters);
168+
for (AnalysisPlugin plugin : plugins) {
169+
Map<String, AnalysisProvider<TokenFilterFactory>> filters = plugin.getTokenFilters(this);
170+
for (Map.Entry<String, AnalysisProvider<TokenFilterFactory>> entry : filters.entrySet()) {
171+
tokenFilters.register(entry.getKey(), entry.getValue());
172+
}
173+
}
169174
return tokenFilters;
170175
}
171176

server/src/main/java/org/opensearch/plugins/AnalysisPlugin.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
import org.opensearch.index.analysis.PreConfiguredTokenizer;
4848
import org.opensearch.index.analysis.TokenFilterFactory;
4949
import org.opensearch.index.analysis.TokenizerFactory;
50+
import org.opensearch.indices.analysis.AnalysisModule;
5051
import org.opensearch.indices.analysis.AnalysisModule.AnalysisProvider;
5152

5253
import java.io.IOException;
@@ -84,6 +85,14 @@ default Map<String, AnalysisProvider<CharFilterFactory>> getCharFilters() {
8485
return emptyMap();
8586
}
8687

88+
/**
89+
* Override to add additional {@link TokenFilter}s that need access to the AnalysisModule.
90+
* The default implementation calls the existing getTokenFilters() method for backward compatibility.
91+
*/
92+
default Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters(AnalysisModule analysisModule) {
93+
return getTokenFilters();
94+
}
95+
8796
/**
8897
* Override to add additional {@link TokenFilter}s. See {@link #requiresAnalysisSettings(AnalysisProvider)}
8998
* how to on get the configuration from the index.

0 commit comments

Comments
 (0)