3232
3333package org .opensearch .ingest .common ;
3434
35+ import org .opensearch .common .ValidationException ;
36+ import org .opensearch .common .regex .Regex ;
3537import org .opensearch .core .common .Strings ;
3638import org .opensearch .ingest .AbstractProcessor ;
3739import org .opensearch .ingest .ConfigurationUtils ;
4143import org .opensearch .script .TemplateScript ;
4244
4345import java .util .ArrayList ;
46+ import java .util .HashSet ;
4447import java .util .List ;
4548import java .util .Map ;
49+ import java .util .Set ;
4650import java .util .stream .Collectors ;
4751
52+ import static org .opensearch .ingest .ConfigurationUtils .newConfigurationException ;
53+
4854/**
4955 * Processor that removes existing fields. Nothing happens if the field is not present.
5056 */
@@ -53,34 +59,118 @@ public final class RemoveProcessor extends AbstractProcessor {
5359 public static final String TYPE = "remove" ;
5460
5561 private final List <TemplateScript .Factory > fields ;
62+ private final List <String > fieldPatterns ;
63+ private final List <TemplateScript .Factory > excludeFields ;
64+ private final List <String > excludeFieldPatterns ;
5665 private final boolean ignoreMissing ;
5766
58- RemoveProcessor (String tag , String description , List <TemplateScript .Factory > fields , boolean ignoreMissing ) {
67+ RemoveProcessor (
68+ String tag ,
69+ String description ,
70+ List <TemplateScript .Factory > fields ,
71+ List <String > fieldPatterns ,
72+ List <TemplateScript .Factory > excludeFields ,
73+ List <String > excludeFieldPatterns ,
74+ boolean ignoreMissing
75+ ) {
5976 super (tag , description );
6077 this .fields = new ArrayList <>(fields );
78+ this .fieldPatterns = new ArrayList <>(fieldPatterns );
79+ this .excludeFields = new ArrayList <>(excludeFields );
80+ this .excludeFieldPatterns = new ArrayList <>(excludeFieldPatterns );
6181 this .ignoreMissing = ignoreMissing ;
6282 }
6383
6484 public List <TemplateScript .Factory > getFields () {
6585 return fields ;
6686 }
6787
88+ public List <String > getFieldPatterns () {
89+ return fieldPatterns ;
90+ }
91+
92+ public List <TemplateScript .Factory > getExcludeFields () {
93+ return excludeFields ;
94+ }
95+
96+ public List <String > getExcludeFieldPatterns () {
97+ return excludeFieldPatterns ;
98+ }
99+
68100 @ Override
69101 public IngestDocument execute (IngestDocument document ) {
70- fields .forEach (field -> {
71- String path = document .renderTemplate (field );
72- final boolean fieldPathIsNullOrEmpty = Strings .isNullOrEmpty (path );
73- if (fieldPathIsNullOrEmpty || document .hasField (path ) == false ) {
74- if (ignoreMissing ) {
75- return ;
76- } else if (fieldPathIsNullOrEmpty ) {
77- throw new IllegalArgumentException ("field path cannot be null nor empty" );
78- } else {
79- throw new IllegalArgumentException ("field [" + path + "] doesn't exist" );
102+ if (!fields .isEmpty ()) {
103+ fields .forEach (field -> {
104+ String path = document .renderTemplate (field );
105+ final boolean fieldPathIsNullOrEmpty = Strings .isNullOrEmpty (path );
106+ if (fieldPathIsNullOrEmpty || document .hasField (path ) == false ) {
107+ if (ignoreMissing ) {
108+ return ;
109+ } else if (fieldPathIsNullOrEmpty ) {
110+ throw new IllegalArgumentException ("field path cannot be null nor empty" );
111+ } else {
112+ throw new IllegalArgumentException ("field [" + path + "] doesn't exist" );
113+ }
80114 }
81- }
82- document .removeField (path );
83- });
115+
116+ document .removeField (path );
117+ });
118+ }
119+
120+ if (!fieldPatterns .isEmpty ()) {
121+ Set <String > existingFields = new HashSet <>(document .getSourceAndMetadata ().keySet ());
122+ Set <String > metadataFields = document .getMetadata ()
123+ .keySet ()
124+ .stream ()
125+ .map (IngestDocument .Metadata ::getFieldName )
126+ .collect (Collectors .toSet ());
127+ existingFields .forEach (field -> {
128+ // ignore metadata fields such as _index, _id, etc.
129+ if (!metadataFields .contains (field )) {
130+ final boolean matched = fieldPatterns .stream ().anyMatch (pattern -> Regex .simpleMatch (pattern , field ));
131+ if (matched ) {
132+ document .removeField (field );
133+ }
134+ }
135+ });
136+ }
137+
138+ Set <String > excludeFieldSet = new HashSet <>();
139+ if (!excludeFields .isEmpty ()) {
140+ excludeFields .forEach (field -> {
141+ String path = document .renderTemplate (field );
142+ // ignore the empty or null field path
143+ if (!Strings .isNullOrEmpty (path )) {
144+ excludeFieldSet .add (path );
145+ }
146+ });
147+ }
148+
149+ if (!excludeFieldSet .isEmpty () || !excludeFieldPatterns .isEmpty ()) {
150+ Set <String > existingFields = new HashSet <>(document .getSourceAndMetadata ().keySet ());
151+ Set <String > metadataFields = document .getMetadata ()
152+ .keySet ()
153+ .stream ()
154+ .map (IngestDocument .Metadata ::getFieldName )
155+ .collect (Collectors .toSet ());
156+ existingFields .forEach (field -> {
157+ // ignore metadata fields such as _index, _id, etc.
158+ if (!metadataFields .contains (field )) {
159+ // when both exclude_field and exclude_field_pattern are not empty, remove the field if it doesn't exist in both of them
160+ // if not, remove the field if it doesn't exist in the non-empty one
161+ if (!excludeFieldPatterns .isEmpty ()) {
162+ final boolean matched = excludeFieldPatterns .stream ().anyMatch (pattern -> Regex .simpleMatch (pattern , field ));
163+ if (!excludeFieldSet .isEmpty () && !excludeFieldSet .contains (field ) && !matched
164+ || excludeFieldSet .isEmpty () && !matched ) {
165+ document .removeField (field );
166+ }
167+ } else if (!excludeFieldSet .isEmpty () && !excludeFieldSet .contains (field )) {
168+ document .removeField (field );
169+ }
170+ }
171+ });
172+ }
173+
84174 return document ;
85175 }
86176
@@ -105,20 +195,125 @@ public RemoveProcessor create(
105195 Map <String , Object > config
106196 ) throws Exception {
107197 final List <String > fields = new ArrayList <>();
108- final Object field = ConfigurationUtils .readObject (TYPE , processorTag , config , "field" );
109- if (field instanceof List ) {
110- @ SuppressWarnings ("unchecked" )
111- List <String > stringList = (List <String >) field ;
112- fields .addAll (stringList );
113- } else {
114- fields .add ((String ) field );
198+ final List <String > fieldPatterns = new ArrayList <>();
199+ final List <String > excludeFields = new ArrayList <>();
200+ final List <String > excludeFieldPatterns = new ArrayList <>();
201+
202+ final Object field = ConfigurationUtils .readOptionalObject (config , "field" );
203+ final Object fieldPattern = ConfigurationUtils .readOptionalObject (config , "field_pattern" );
204+ final Object excludeField = ConfigurationUtils .readOptionalObject (config , "exclude_field" );
205+ final Object excludeFieldPattern = ConfigurationUtils .readOptionalObject (config , "exclude_field_pattern" );
206+
207+ if (field == null && fieldPattern == null && excludeField == null && excludeFieldPattern == null ) {
208+ throw newConfigurationException (
209+ TYPE ,
210+ processorTag ,
211+ "field" ,
212+ "at least one of the parameters field, field_pattern, exclude_field and exclude_field_pattern need to be set"
213+ );
214+ }
215+
216+ if ((field != null || fieldPattern != null ) && (excludeField != null || excludeFieldPattern != null )) {
217+ throw newConfigurationException (
218+ TYPE ,
219+ processorTag ,
220+ "field" ,
221+ "ether (field,field_pattern) or (exclude_field,exclude_field_pattern) can be set"
222+ );
223+ }
224+
225+ List <TemplateScript .Factory > fieldCompiledTemplates = new ArrayList <>();
226+ if (field != null ) {
227+ if (field instanceof List ) {
228+ @ SuppressWarnings ("unchecked" )
229+ List <String > stringList = (List <String >) field ;
230+ fields .addAll (stringList );
231+ } else {
232+ fields .add ((String ) field );
233+ }
234+ fieldCompiledTemplates = fields .stream ()
235+ .map (f -> ConfigurationUtils .compileTemplate (TYPE , processorTag , "field" , f , scriptService ))
236+ .collect (Collectors .toList ());
237+ }
238+
239+ if (fieldPattern != null ) {
240+ if (fieldPattern instanceof List ) {
241+ @ SuppressWarnings ("unchecked" )
242+ List <String > fieldPatternList = (List <String >) fieldPattern ;
243+ fieldPatterns .addAll (fieldPatternList );
244+ } else {
245+ fieldPatterns .add ((String ) fieldPattern );
246+ }
247+ validateFieldPatterns (processorTag , fieldPatterns , "field_pattern" );
248+ }
249+
250+ List <TemplateScript .Factory > excludeFieldCompiledTemplates = new ArrayList <>();
251+ if (excludeField != null ) {
252+ if (excludeField instanceof List ) {
253+ @ SuppressWarnings ("unchecked" )
254+ List <String > stringList = (List <String >) excludeField ;
255+ excludeFields .addAll (stringList );
256+ } else {
257+ excludeFields .add ((String ) excludeField );
258+ }
259+ excludeFieldCompiledTemplates = excludeFields .stream ()
260+ .map (f -> ConfigurationUtils .compileTemplate (TYPE , processorTag , "exclude_field" , f , scriptService ))
261+ .collect (Collectors .toList ());
262+ }
263+
264+ if (excludeFieldPattern != null ) {
265+ if (excludeFieldPattern instanceof List ) {
266+ @ SuppressWarnings ("unchecked" )
267+ List <String > excludeFieldPatternList = (List <String >) excludeFieldPattern ;
268+ excludeFieldPatterns .addAll (excludeFieldPatternList );
269+ } else {
270+ excludeFieldPatterns .add ((String ) excludeFieldPattern );
271+ }
272+ validateFieldPatterns (processorTag , excludeFieldPatterns , "exclude_field_pattern" );
115273 }
116274
117- final List <TemplateScript .Factory > compiledTemplates = fields .stream ()
118- .map (f -> ConfigurationUtils .compileTemplate (TYPE , processorTag , "field" , f , scriptService ))
119- .collect (Collectors .toList ());
120275 boolean ignoreMissing = ConfigurationUtils .readBooleanProperty (TYPE , processorTag , config , "ignore_missing" , false );
121- return new RemoveProcessor (processorTag , description , compiledTemplates , ignoreMissing );
276+ return new RemoveProcessor (
277+ processorTag ,
278+ description ,
279+ fieldCompiledTemplates ,
280+ fieldPatterns ,
281+ excludeFieldCompiledTemplates ,
282+ excludeFieldPatterns ,
283+ ignoreMissing
284+ );
285+ }
286+
287+ private void validateFieldPatterns (String processorTag , List <String > patterns , String patternKey ) {
288+ List <String > validationErrors = new ArrayList <>();
289+ for (String fieldPattern : patterns ) {
290+ if (fieldPattern .contains (" " )) {
291+ validationErrors .add (patternKey + " [" + fieldPattern + "] must not contain a space" );
292+ }
293+ if (fieldPattern .contains ("," )) {
294+ validationErrors .add (patternKey + " [" + fieldPattern + "] must not contain a ','" );
295+ }
296+ if (fieldPattern .contains ("#" )) {
297+ validationErrors .add (patternKey + " [" + fieldPattern + "] must not contain a '#'" );
298+ }
299+ if (fieldPattern .contains (":" )) {
300+ validationErrors .add (patternKey + " [" + fieldPattern + "] must not contain a ':'" );
301+ }
302+ if (fieldPattern .startsWith ("_" )) {
303+ validationErrors .add (patternKey + " [" + fieldPattern + "] must not start with '_'" );
304+ }
305+ if (Strings .validFileNameExcludingAstrix (fieldPattern ) == false ) {
306+ validationErrors .add (
307+ patternKey + " [" + fieldPattern + "] must not contain the following characters " + Strings .INVALID_FILENAME_CHARS
308+ );
309+ }
310+ }
311+
312+ if (validationErrors .size () > 0 ) {
313+ ValidationException validationException = new ValidationException ();
314+ validationException .addValidationErrors (validationErrors );
315+ throw newConfigurationException (TYPE , processorTag , patternKey , validationException .getMessage ());
316+ }
122317 }
123318 }
124319}
0 commit comments