Skip to content

Commit ac51c6c

Browse files
committed
Make truncate filter a normalizer token filter
This filter may be useful with keyword fields if preferring to truncate the data rather than ignoring it (ignore above). Move and re-purpose the TruncateTokenFilterTests from the server module to analysis-common (the lucene truncate filter has moved to lucene a long time ago). Signed-off-by: David Causse <[email protected]>
1 parent 753c135 commit ac51c6c

File tree

3 files changed

+90
-78
lines changed

3 files changed

+90
-78
lines changed

modules/analysis-common/src/main/java/org/opensearch/analysis/common/TruncateTokenFilterFactory.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,9 @@
3838
import org.opensearch.env.Environment;
3939
import org.opensearch.index.IndexSettings;
4040
import org.opensearch.index.analysis.AbstractTokenFilterFactory;
41+
import org.opensearch.index.analysis.NormalizingTokenFilterFactory;
4142

42-
public class TruncateTokenFilterFactory extends AbstractTokenFilterFactory {
43+
public class TruncateTokenFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
4344

4445
private final int length;
4546

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
/*
10+
* Licensed to Elasticsearch under one or more contributor
11+
* license agreements. See the NOTICE file distributed with
12+
* this work for additional information regarding copyright
13+
* ownership. Elasticsearch licenses this file to you under
14+
* the Apache License, Version 2.0 (the "License"); you may
15+
* not use this file except in compliance with the License.
16+
* You may obtain a copy of the License at
17+
*
18+
* http://www.apache.org/licenses/LICENSE-2.0
19+
*
20+
* Unless required by applicable law or agreed to in writing,
21+
* software distributed under the License is distributed on an
22+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
23+
* KIND, either express or implied. See the License for the
24+
* specific language governing permissions and limitations
25+
* under the License.
26+
*/
27+
28+
/*
29+
* Modifications Copyright OpenSearch Contributors. See
30+
* GitHub history for details.
31+
*/
32+
33+
package org.opensearch.analysis.common;
34+
35+
import org.apache.lucene.util.BytesRef;
36+
import org.opensearch.common.settings.Settings;
37+
import org.opensearch.env.Environment;
38+
import org.opensearch.index.analysis.AnalysisTestsHelper;
39+
import org.opensearch.index.analysis.NamedAnalyzer;
40+
import org.opensearch.test.OpenSearchTestCase;
41+
import org.opensearch.test.OpenSearchTokenStreamTestCase;
42+
43+
import java.io.IOException;
44+
45+
public class TruncateTokenFilterTests extends OpenSearchTokenStreamTestCase {
46+
47+
public void testFilter() throws IOException {
48+
Settings settings = Settings.builder()
49+
.put("index.analysis.filter.truncate.type", "truncate")
50+
.put("index.analysis.filter.truncate.length", 3)
51+
.put("index.analysis.analyzer.my_analyzer.type", "custom")
52+
.put("index.analysis.analyzer.my_analyzer.tokenizer", "whitespace")
53+
.putList("index.analysis.analyzer.my_analyzer.filter", "truncate")
54+
.putList("index.analysis.normalizer.my_normalizer.filter", "truncate")
55+
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
56+
.build();
57+
OpenSearchTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(
58+
settings,
59+
new CommonAnalysisModulePlugin()
60+
);
61+
NamedAnalyzer analyzer = analysis.indexAnalyzers.get("my_analyzer");
62+
assertNotNull(analyzer);
63+
64+
assertTokenStreamContents(analyzer.tokenStream("foo", "a bb ccc dddd"), new String[] { "a", "bb", "ccc", "ddd" });
65+
}
66+
67+
public void testNormalizer() throws IOException {
68+
Settings settings = Settings.builder()
69+
.put("index.analysis.filter.truncate.type", "truncate")
70+
.put("index.analysis.filter.truncate.length", 3)
71+
.put("index.analysis.normalizer.my_normalizer.type", "custom")
72+
.putList("index.analysis.normalizer.my_normalizer.filter", "truncate")
73+
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
74+
.build();
75+
OpenSearchTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(
76+
settings,
77+
new CommonAnalysisModulePlugin()
78+
);
79+
assertNull(analysis.indexAnalyzers.get("my_normalizer"));
80+
NamedAnalyzer normalizer = analysis.indexAnalyzers.getNormalizer("my_normalizer");
81+
assertNotNull(normalizer);
82+
assertEquals("my_normalizer", normalizer.name());
83+
assertEquals(new BytesRef("a"), normalizer.normalize("foo", "a"));
84+
assertEquals(new BytesRef("bb"), normalizer.normalize("foo", "bb"));
85+
assertEquals(new BytesRef("ccc"), normalizer.normalize("foo", "ccc"));
86+
assertEquals(new BytesRef("ddd"), normalizer.normalize("foo", "dddd"));
87+
}
88+
}

server/src/test/java/org/opensearch/lucene/analysis/miscellaneous/TruncateTokenFilterTests.java

Lines changed: 0 additions & 77 deletions
This file was deleted.

0 commit comments

Comments
 (0)