Skip to content

Commit 768f629

Browse files
committed
Remove ingest processor supports field patterns and excluding fields
1 parent e9affea commit 768f629

File tree

6 files changed

+579
-35
lines changed

6 files changed

+579
-35
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
9898
- [Remote cluster state] Make index and global metadata upload timeout dynamic cluster settings ([#10814](https://github.com/opensearch-project/OpenSearch/pull/10814))
9999
- Added cluster setting cluster.restrict.index.replication_type to restrict setting of index setting replication type ([#10866](https://github.com/opensearch-project/OpenSearch/pull/10866))
100100
- Add cluster state stats ([#10670](https://github.com/opensearch-project/OpenSearch/pull/10670))
101+
- Remove ingest processor supports field patterns and excluding fields
101102

102103
### Dependencies
103104
- Bump `com.google.api.grpc:proto-google-common-protos` from 2.10.0 to 2.25.1 ([#10208](https://github.com/opensearch-project/OpenSearch/pull/10208), [#10298](https://github.com/opensearch-project/OpenSearch/pull/10298))

modules/ingest-common/src/main/java/org/opensearch/ingest/common/RemoveProcessor.java

Lines changed: 220 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@
3232

3333
package org.opensearch.ingest.common;
3434

35+
import org.opensearch.common.ValidationException;
36+
import org.opensearch.common.regex.Regex;
3537
import org.opensearch.core.common.Strings;
3638
import org.opensearch.ingest.AbstractProcessor;
3739
import org.opensearch.ingest.ConfigurationUtils;
@@ -41,10 +43,14 @@
4143
import org.opensearch.script.TemplateScript;
4244

4345
import java.util.ArrayList;
46+
import java.util.HashSet;
4447
import java.util.List;
4548
import java.util.Map;
49+
import java.util.Set;
4650
import java.util.stream.Collectors;
4751

52+
import static org.opensearch.ingest.ConfigurationUtils.newConfigurationException;
53+
4854
/**
4955
* Processor that removes existing fields. Nothing happens if the field is not present.
5056
*/
@@ -53,34 +59,118 @@ public final class RemoveProcessor extends AbstractProcessor {
5359
public static final String TYPE = "remove";
5460

5561
private final List<TemplateScript.Factory> fields;
62+
private final List<String> fieldPatterns;
63+
private final List<TemplateScript.Factory> excludeFields;
64+
private final List<String> excludeFieldPatterns;
5665
private final boolean ignoreMissing;
5766

58-
RemoveProcessor(String tag, String description, List<TemplateScript.Factory> fields, boolean ignoreMissing) {
67+
RemoveProcessor(
68+
String tag,
69+
String description,
70+
List<TemplateScript.Factory> fields,
71+
List<String> fieldPatterns,
72+
List<TemplateScript.Factory> excludeFields,
73+
List<String> excludeFieldPatterns,
74+
boolean ignoreMissing
75+
) {
5976
super(tag, description);
6077
this.fields = new ArrayList<>(fields);
78+
this.fieldPatterns = new ArrayList<>(fieldPatterns);
79+
this.excludeFields = new ArrayList<>(excludeFields);
80+
this.excludeFieldPatterns = new ArrayList<>(excludeFieldPatterns);
6181
this.ignoreMissing = ignoreMissing;
6282
}
6383

6484
public List<TemplateScript.Factory> getFields() {
6585
return fields;
6686
}
6787

88+
public List<String> getFieldPatterns() {
89+
return fieldPatterns;
90+
}
91+
92+
public List<TemplateScript.Factory> getExcludeFields() {
93+
return excludeFields;
94+
}
95+
96+
public List<String> getExcludeFieldPatterns() {
97+
return excludeFieldPatterns;
98+
}
99+
68100
@Override
69101
public IngestDocument execute(IngestDocument document) {
70-
fields.forEach(field -> {
71-
String path = document.renderTemplate(field);
72-
final boolean fieldPathIsNullOrEmpty = Strings.isNullOrEmpty(path);
73-
if (fieldPathIsNullOrEmpty || document.hasField(path) == false) {
74-
if (ignoreMissing) {
75-
return;
76-
} else if (fieldPathIsNullOrEmpty) {
77-
throw new IllegalArgumentException("field path cannot be null nor empty");
78-
} else {
79-
throw new IllegalArgumentException("field [" + path + "] doesn't exist");
102+
if (!fields.isEmpty()) {
103+
fields.forEach(field -> {
104+
String path = document.renderTemplate(field);
105+
final boolean fieldPathIsNullOrEmpty = Strings.isNullOrEmpty(path);
106+
if (fieldPathIsNullOrEmpty || document.hasField(path) == false) {
107+
if (ignoreMissing) {
108+
return;
109+
} else if (fieldPathIsNullOrEmpty) {
110+
throw new IllegalArgumentException("field path cannot be null nor empty");
111+
} else {
112+
throw new IllegalArgumentException("field [" + path + "] doesn't exist");
113+
}
80114
}
81-
}
82-
document.removeField(path);
83-
});
115+
116+
document.removeField(path);
117+
});
118+
}
119+
120+
if (!fieldPatterns.isEmpty()) {
121+
Set<String> existingFields = new HashSet<>(document.getSourceAndMetadata().keySet());
122+
Set<String> metadataFields = document.getMetadata()
123+
.keySet()
124+
.stream()
125+
.map(IngestDocument.Metadata::getFieldName)
126+
.collect(Collectors.toSet());
127+
existingFields.forEach(field -> {
128+
// ignore metadata fields such as _index, _id, etc.
129+
if (!metadataFields.contains(field)) {
130+
final boolean matched = fieldPatterns.stream().anyMatch(pattern -> Regex.simpleMatch(pattern, field));
131+
if (matched) {
132+
document.removeField(field);
133+
}
134+
}
135+
});
136+
}
137+
138+
Set<String> excludeFieldSet = new HashSet<>();
139+
if (!excludeFields.isEmpty()) {
140+
excludeFields.forEach(field -> {
141+
String path = document.renderTemplate(field);
142+
// ignore the empty or null field path
143+
if (!Strings.isNullOrEmpty(path)) {
144+
excludeFieldSet.add(path);
145+
}
146+
});
147+
}
148+
149+
if (!excludeFieldSet.isEmpty() || !excludeFieldPatterns.isEmpty()) {
150+
Set<String> existingFields = new HashSet<>(document.getSourceAndMetadata().keySet());
151+
Set<String> metadataFields = document.getMetadata()
152+
.keySet()
153+
.stream()
154+
.map(IngestDocument.Metadata::getFieldName)
155+
.collect(Collectors.toSet());
156+
existingFields.forEach(field -> {
157+
// ignore metadata fields such as _index, _id, etc.
158+
if (!metadataFields.contains(field)) {
159+
// when both exclude_field and exclude_field_pattern are not empty, remove the field if it doesn't exist in both of them
160+
// if not, remove the field if it doesn't exist in the non-empty one
161+
if (!excludeFieldPatterns.isEmpty()) {
162+
final boolean matched = excludeFieldPatterns.stream().anyMatch(pattern -> Regex.simpleMatch(pattern, field));
163+
if (!excludeFieldSet.isEmpty() && !excludeFieldSet.contains(field) && !matched
164+
|| excludeFieldSet.isEmpty() && !matched) {
165+
document.removeField(field);
166+
}
167+
} else if (!excludeFieldSet.isEmpty() && !excludeFieldSet.contains(field)) {
168+
document.removeField(field);
169+
}
170+
}
171+
});
172+
}
173+
84174
return document;
85175
}
86176

@@ -105,20 +195,125 @@ public RemoveProcessor create(
105195
Map<String, Object> config
106196
) throws Exception {
107197
final List<String> fields = new ArrayList<>();
108-
final Object field = ConfigurationUtils.readObject(TYPE, processorTag, config, "field");
109-
if (field instanceof List) {
110-
@SuppressWarnings("unchecked")
111-
List<String> stringList = (List<String>) field;
112-
fields.addAll(stringList);
113-
} else {
114-
fields.add((String) field);
198+
final List<String> fieldPatterns = new ArrayList<>();
199+
final List<String> excludeFields = new ArrayList<>();
200+
final List<String> excludeFieldPatterns = new ArrayList<>();
201+
202+
final Object field = ConfigurationUtils.readOptionalObject(config, "field");
203+
final Object fieldPattern = ConfigurationUtils.readOptionalObject(config, "field_pattern");
204+
final Object excludeField = ConfigurationUtils.readOptionalObject(config, "exclude_field");
205+
final Object excludeFieldPattern = ConfigurationUtils.readOptionalObject(config, "exclude_field_pattern");
206+
207+
if (field == null && fieldPattern == null && excludeField == null && excludeFieldPattern == null) {
208+
throw newConfigurationException(
209+
TYPE,
210+
processorTag,
211+
"field",
212+
"at least one of the parameters field, field_pattern, exclude_field and exclude_field_pattern need to be set"
213+
);
214+
}
215+
216+
if ((field != null || fieldPattern != null) && (excludeField != null || excludeFieldPattern != null)) {
217+
throw newConfigurationException(
218+
TYPE,
219+
processorTag,
220+
"field",
221+
"ether (field,field_pattern) or (exclude_field,exclude_field_pattern) can be set"
222+
);
223+
}
224+
225+
List<TemplateScript.Factory> fieldCompiledTemplates = new ArrayList<>();
226+
if (field != null) {
227+
if (field instanceof List) {
228+
@SuppressWarnings("unchecked")
229+
List<String> stringList = (List<String>) field;
230+
fields.addAll(stringList);
231+
} else {
232+
fields.add((String) field);
233+
}
234+
fieldCompiledTemplates = fields.stream()
235+
.map(f -> ConfigurationUtils.compileTemplate(TYPE, processorTag, "field", f, scriptService))
236+
.collect(Collectors.toList());
237+
}
238+
239+
if (fieldPattern != null) {
240+
if (fieldPattern instanceof List) {
241+
@SuppressWarnings("unchecked")
242+
List<String> fieldPatternList = (List<String>) fieldPattern;
243+
fieldPatterns.addAll(fieldPatternList);
244+
} else {
245+
fieldPatterns.add((String) fieldPattern);
246+
}
247+
validateFieldPatterns(processorTag, fieldPatterns, "field_pattern");
248+
}
249+
250+
List<TemplateScript.Factory> excludeFieldCompiledTemplates = new ArrayList<>();
251+
if (excludeField != null) {
252+
if (excludeField instanceof List) {
253+
@SuppressWarnings("unchecked")
254+
List<String> stringList = (List<String>) excludeField;
255+
excludeFields.addAll(stringList);
256+
} else {
257+
excludeFields.add((String) excludeField);
258+
}
259+
excludeFieldCompiledTemplates = excludeFields.stream()
260+
.map(f -> ConfigurationUtils.compileTemplate(TYPE, processorTag, "exclude_field", f, scriptService))
261+
.collect(Collectors.toList());
262+
}
263+
264+
if (excludeFieldPattern != null) {
265+
if (excludeFieldPattern instanceof List) {
266+
@SuppressWarnings("unchecked")
267+
List<String> excludeFieldPatternList = (List<String>) excludeFieldPattern;
268+
excludeFieldPatterns.addAll(excludeFieldPatternList);
269+
} else {
270+
excludeFieldPatterns.add((String) excludeFieldPattern);
271+
}
272+
validateFieldPatterns(processorTag, excludeFieldPatterns, "exclude_field_pattern");
115273
}
116274

117-
final List<TemplateScript.Factory> compiledTemplates = fields.stream()
118-
.map(f -> ConfigurationUtils.compileTemplate(TYPE, processorTag, "field", f, scriptService))
119-
.collect(Collectors.toList());
120275
boolean ignoreMissing = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "ignore_missing", false);
121-
return new RemoveProcessor(processorTag, description, compiledTemplates, ignoreMissing);
276+
return new RemoveProcessor(
277+
processorTag,
278+
description,
279+
fieldCompiledTemplates,
280+
fieldPatterns,
281+
excludeFieldCompiledTemplates,
282+
excludeFieldPatterns,
283+
ignoreMissing
284+
);
285+
}
286+
287+
private void validateFieldPatterns(String processorTag, List<String> patterns, String patternKey) {
288+
List<String> validationErrors = new ArrayList<>();
289+
for (String fieldPattern : patterns) {
290+
if (fieldPattern.contains(" ")) {
291+
validationErrors.add(patternKey + " [" + fieldPattern + "] must not contain a space");
292+
}
293+
if (fieldPattern.contains(",")) {
294+
validationErrors.add(patternKey + " [" + fieldPattern + "] must not contain a ','");
295+
}
296+
if (fieldPattern.contains("#")) {
297+
validationErrors.add(patternKey + " [" + fieldPattern + "] must not contain a '#'");
298+
}
299+
if (fieldPattern.contains(":")) {
300+
validationErrors.add(patternKey + " [" + fieldPattern + "] must not contain a ':'");
301+
}
302+
if (fieldPattern.startsWith("_")) {
303+
validationErrors.add(patternKey + " [" + fieldPattern + "] must not start with '_'");
304+
}
305+
if (Strings.validFileNameExcludingAstrix(fieldPattern) == false) {
306+
validationErrors.add(
307+
patternKey + " [" + fieldPattern + "] must not contain the following characters " + Strings.INVALID_FILENAME_CHARS
308+
);
309+
}
310+
}
311+
312+
if (validationErrors.size() > 0) {
313+
ValidationException validationException = new ValidationException();
314+
validationException.addValidationErrors(validationErrors);
315+
throw newConfigurationException(TYPE, processorTag, patternKey, validationException.getMessage());
316+
}
122317
}
123318
}
124319
}

0 commit comments

Comments
 (0)