Skip to content

Commit dde4cc8

Browse files
authored
Adding support for derive source feature and implementing it for various type of field mappers (opensearch-project#17759)
* Adding support for derive source feature and implementing it for some of the field types Signed-off-by: Tanik Pansuriya <[email protected]>Signed-off-by: TJ Neuenfeldt <[email protected]>
1 parent c986a9d commit dde4cc8

31 files changed

+2468
-0
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
1212
- Update API of Message in index to add the timestamp for lag calculation in ingestion polling ([#17977](https://github.com/opensearch-project/OpenSearch/pull/17977/))
1313
- Add composite directory factory ([#17988](https://github.com/opensearch-project/OpenSearch/pull/17988))
1414
- Add pull-based ingestion error metrics and make internal queue size configurable ([#18088](https://github.com/opensearch-project/OpenSearch/pull/18088))
15+
- Adding support for derive source feature and implementing it for various type of field mappers ([#17759](https://github.com/opensearch-project/OpenSearch/pull/17759))
1516
- [Security Manager Replacement] Enhance Java Agent to intercept newByteChannel ([#17989](https://github.com/opensearch-project/OpenSearch/pull/17989))
1617
- Enabled Async Shard Batch Fetch by default ([#18139](https://github.com/opensearch-project/OpenSearch/pull/18139))
1718
- Allow to get the search request from the QueryCoordinatorContext ([#17818](https://github.com/opensearch-project/OpenSearch/pull/17818))

modules/mapper-extras/src/main/java/org/opensearch/index/mapper/ScaledFloatFieldMapper.java

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -500,6 +500,29 @@ private static double objectToDouble(Object value) {
500500
return doubleValue;
501501
}
502502

503+
@Override
504+
protected void canDeriveSourceInternal() {
505+
checkStoredAndDocValuesForDerivedSource();
506+
}
507+
508+
/**
509+
* 1. If it has doc values, build source using doc values
510+
* 2. If doc_values is disabled in field mapping, then build source using stored field
511+
* <p>
512+
* Considerations:
513+
* 1. When using doc values, for multi value field, result would be in sorted order
514+
* 2. There might be precision loss as values are stored as long after multiplying it with "scaling_factor" for
515+
* both doc values and stored field
516+
*/
517+
@Override
518+
protected DerivedFieldGenerator derivedFieldGenerator() {
519+
return new DerivedFieldGenerator(
520+
mappedFieldType,
521+
new SortedNumericDocValuesFetcher(mappedFieldType, simpleName()),
522+
new StoredFieldFetcher(mappedFieldType, simpleName())
523+
);
524+
}
525+
503526
private static class ScaledFloatIndexFieldData extends IndexNumericFieldData {
504527

505528
private final IndexNumericFieldData scaledFieldData;

modules/mapper-extras/src/test/java/org/opensearch/index/mapper/ScaledFloatFieldMapperTests.java

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,15 @@
3232

3333
package org.opensearch.index.mapper;
3434

35+
import org.apache.lucene.document.Document;
36+
import org.apache.lucene.document.SortedNumericDocValuesField;
37+
import org.apache.lucene.document.StoredField;
38+
import org.apache.lucene.index.DirectoryReader;
3539
import org.apache.lucene.index.DocValuesType;
40+
import org.apache.lucene.index.IndexWriter;
41+
import org.apache.lucene.index.IndexWriterConfig;
3642
import org.apache.lucene.index.IndexableField;
43+
import org.apache.lucene.store.Directory;
3744
import org.opensearch.cluster.metadata.IndexMetadata;
3845
import org.opensearch.common.settings.Settings;
3946
import org.opensearch.common.xcontent.XContentFactory;
@@ -54,6 +61,8 @@
5461

5562
public class ScaledFloatFieldMapperTests extends MapperTestCase {
5663

64+
private static final String FIELD_NAME = "field";
65+
5766
@Override
5867
protected Collection<? extends Plugin> getPlugins() {
5968
return singletonList(new MapperExtrasModulePlugin());
@@ -383,6 +392,78 @@ public void testNullValue() throws IOException {
383392
assertFalse(dvField.fieldType().stored());
384393
}
385394

395+
public void testPossibleToDeriveSource_WhenDocValuesAndStoredDisabled() throws IOException {
396+
ScaledFloatFieldMapper mapper = getMapper(FieldMapper.CopyTo.empty(), false, false);
397+
assertThrows(UnsupportedOperationException.class, mapper::canDeriveSource);
398+
}
399+
400+
public void testPossibleToDeriveSource_WhenCopyToPresent() throws IOException {
401+
FieldMapper.CopyTo copyTo = new FieldMapper.CopyTo.Builder().add("copy_to_field").build();
402+
ScaledFloatFieldMapper mapper = getMapper(copyTo, true, true);
403+
assertThrows(UnsupportedOperationException.class, mapper::canDeriveSource);
404+
}
405+
406+
public void testDerivedValueFetching_DocValues() throws IOException {
407+
try (Directory directory = newDirectory()) {
408+
ScaledFloatFieldMapper mapper = getMapper(FieldMapper.CopyTo.empty(), true, false);
409+
float value = 11.523f;
410+
try (IndexWriter iw = new IndexWriter(directory, new IndexWriterConfig())) {
411+
iw.addDocument(createDocument(value, true));
412+
}
413+
414+
try (DirectoryReader reader = DirectoryReader.open(directory)) {
415+
XContentBuilder builder = XContentFactory.jsonBuilder().startObject();
416+
mapper.deriveSource(builder, reader.leaves().get(0).reader(), 0);
417+
builder.endObject();
418+
String source = builder.toString();
419+
assertEquals("{\"" + FIELD_NAME + "\":" + 11.52 + "}", source);
420+
}
421+
}
422+
}
423+
424+
public void testDerivedValueFetching_StoredField() throws IOException {
425+
try (Directory directory = newDirectory()) {
426+
ScaledFloatFieldMapper mapper = getMapper(FieldMapper.CopyTo.empty(), false, true);
427+
float value = 11.523f;
428+
try (IndexWriter iw = new IndexWriter(directory, new IndexWriterConfig())) {
429+
iw.addDocument(createDocument(value, false));
430+
}
431+
432+
try (DirectoryReader reader = DirectoryReader.open(directory)) {
433+
XContentBuilder builder = XContentFactory.jsonBuilder().startObject();
434+
mapper.deriveSource(builder, reader.leaves().get(0).reader(), 0);
435+
builder.endObject();
436+
String source = builder.toString();
437+
assertEquals("{\"" + FIELD_NAME + "\":" + 11.52 + "}", source);
438+
}
439+
}
440+
}
441+
442+
private ScaledFloatFieldMapper getMapper(FieldMapper.CopyTo copyTo, boolean hasDocValues, boolean isStored) throws IOException {
443+
MapperService mapperService = createMapperService(
444+
fieldMapping(
445+
b -> b.field("type", "scaled_float").field("store", isStored).field("doc_values", hasDocValues).field("scaling_factor", 100)
446+
)
447+
);
448+
ScaledFloatFieldMapper mapper = (ScaledFloatFieldMapper) mapperService.documentMapper().mappers().getMapper(FIELD_NAME);
449+
mapper.copyTo = copyTo;
450+
return mapper;
451+
}
452+
453+
/**
454+
* Helper method to create a document with both doc values and stored fields
455+
*/
456+
private Document createDocument(double value, boolean hasDocValues) {
457+
long scaledValue = Math.round(value * 100);
458+
Document doc = new Document();
459+
if (hasDocValues) {
460+
doc.add(new SortedNumericDocValuesField(FIELD_NAME, scaledValue));
461+
} else {
462+
doc.add(new StoredField(FIELD_NAME, scaledValue));
463+
}
464+
return doc;
465+
}
466+
386467
/**
387468
* `index_options` was deprecated and is rejected as of 7.0
388469
*/

server/src/main/java/org/opensearch/index/mapper/BooleanFieldMapper.java

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -412,4 +412,34 @@ protected String contentType() {
412412
return CONTENT_TYPE;
413413
}
414414

415+
@Override
416+
protected void canDeriveSourceInternal() {
417+
checkStoredAndDocValuesForDerivedSource();
418+
}
419+
420+
/**
421+
* 1. If it has doc values, build source using doc values
422+
* 2. If doc_values is disabled in field mapping, then build source using stored field
423+
*
424+
* <p>
425+
* Considerations:
426+
* 1. Result will be in boolean type and not in the provided string value type at time of ingestion,
427+
* i.e. [false, "false", ""] will become boolean false
428+
* 2. When using doc values, for multi value field, result will be in sorted order, i.e. at start there will
429+
* be 0 or more false and at end there will be 0 or more true
430+
* 2. When using stored field, for multi value field order would be preserved
431+
*/
432+
@Override
433+
protected DerivedFieldGenerator derivedFieldGenerator() {
434+
return new DerivedFieldGenerator(mappedFieldType, new SortedNumericDocValuesFetcher(mappedFieldType, simpleName()) {
435+
@Override
436+
public Object convert(Object value) {
437+
Long val = (Long) value;
438+
if (val == null) {
439+
return null;
440+
}
441+
return val == 1;
442+
}
443+
}, new StoredFieldFetcher(mappedFieldType, simpleName()));
444+
}
415445
}

server/src/main/java/org/opensearch/index/mapper/ConstantKeywordFieldMapper.java

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
package org.opensearch.index.mapper;
1010

11+
import org.apache.lucene.index.LeafReader;
1112
import org.apache.lucene.search.MatchAllDocsQuery;
1213
import org.apache.lucene.search.MatchNoDocsQuery;
1314
import org.apache.lucene.search.MultiTermQuery;
@@ -25,6 +26,7 @@
2526
import org.opensearch.common.lucene.BytesRefs;
2627
import org.opensearch.common.regex.Regex;
2728
import org.opensearch.common.time.DateMathParser;
29+
import org.opensearch.core.xcontent.XContentBuilder;
2830
import org.opensearch.index.fielddata.IndexFieldData;
2931
import org.opensearch.index.fielddata.plain.ConstantIndexFieldData;
3032
import org.opensearch.index.query.QueryShardContext;
@@ -74,6 +76,26 @@ private static ConstantKeywordFieldMapper toType(FieldMapper in) {
7476
return (ConstantKeywordFieldMapper) in;
7577
}
7678

79+
@Override
80+
public void canDeriveSource() {
81+
if (this.copyTo() != null && !this.copyTo().copyToFields().isEmpty()) {
82+
throw new UnsupportedOperationException("Unable to derive source for fields with copy_to parameter set");
83+
}
84+
}
85+
86+
/**
87+
* For each doc, it will return constant value defined in field mapping
88+
* <p>
89+
* Note: Doc for which, field in absent, deriveSource will still consider the still to be present, and it will
90+
* return the same.
91+
*/
92+
@Override
93+
public void deriveSource(XContentBuilder builder, LeafReader leafReader, int docId) throws IOException {
94+
if (value != null) {
95+
builder.field(name(), value);
96+
}
97+
}
98+
7799
/**
78100
* Builder for the binary field mapper
79101
*

server/src/main/java/org/opensearch/index/mapper/DateFieldMapper.java

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,38 @@ private static DateFieldMapper toType(FieldMapper in) {
226226
return (DateFieldMapper) in;
227227
}
228228

229+
@Override
230+
protected void canDeriveSourceInternal() {
231+
checkStoredAndDocValuesForDerivedSource();
232+
}
233+
234+
/**
235+
* 1. If it has doc values, build source using doc values
236+
* 2. If doc_values is disabled in field mapping, then build source using stored field
237+
* <p>
238+
* Considerations:
239+
* 1. When building source using doc_values, for multi-value field, it will result values in sorted order
240+
* <p>
241+
* Date format:
242+
* 1. If "print_format" specified in field mapping, then derived source will have date in this format
243+
* 2. If multiple date formats are specified in field mapping and "print_format" is not specified then
244+
* derived source will contain date in first date format from "||" separated list of format defined in
245+
* "format"
246+
*/
247+
@Override
248+
protected DerivedFieldGenerator derivedFieldGenerator() {
249+
return new DerivedFieldGenerator(mappedFieldType, new SortedNumericDocValuesFetcher(mappedFieldType, simpleName()) {
250+
@Override
251+
public Object convert(Object value) {
252+
Long val = (Long) value;
253+
if (val == null) {
254+
return null;
255+
}
256+
return fieldType().dateTimeFormatter().format(resolution.toInstant(val).atZone(ZoneOffset.UTC));
257+
}
258+
}, new StoredFieldFetcher(mappedFieldType, simpleName()));
259+
}
260+
229261
/**
230262
* Builder for the date field mapper
231263
*
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.opensearch.index.mapper;
10+
11+
import org.apache.lucene.index.LeafReader;
12+
import org.opensearch.core.xcontent.XContentBuilder;
13+
14+
import java.io.IOException;
15+
import java.util.Objects;
16+
17+
/**
18+
* DerivedSourceGenerator is used to generate derived source field based on field mapping and how
19+
* it is stored in lucene
20+
*/
21+
public class DerivedFieldGenerator {
22+
23+
private final MappedFieldType mappedFieldType;
24+
private final FieldValueFetcher fieldValueFetcher;
25+
26+
public DerivedFieldGenerator(
27+
MappedFieldType mappedFieldType,
28+
FieldValueFetcher docValuesFetcher,
29+
FieldValueFetcher storedFieldFetcher
30+
) {
31+
this.mappedFieldType = mappedFieldType;
32+
if (Objects.requireNonNull(getDerivedFieldPreference()) == FieldValueType.DOC_VALUES) {
33+
assert docValuesFetcher != null;
34+
this.fieldValueFetcher = docValuesFetcher;
35+
} else {
36+
assert storedFieldFetcher != null;
37+
this.fieldValueFetcher = storedFieldFetcher;
38+
}
39+
}
40+
41+
/**
42+
* Get the preference of the derived field based on field mapping, should be overridden at a FieldMapper to
43+
* alter the preference of derived field
44+
*/
45+
public FieldValueType getDerivedFieldPreference() {
46+
if (mappedFieldType.hasDocValues()) {
47+
return FieldValueType.DOC_VALUES;
48+
}
49+
return FieldValueType.STORED;
50+
}
51+
52+
/**
53+
* Generate the derived field value based on the preference of derived field and field value type
54+
* @param builder - builder to store the derived source filed
55+
* @param reader - leafReader to read data from
56+
* @param docId - docId for which we want to generate the source
57+
*/
58+
public void generate(XContentBuilder builder, LeafReader reader, int docId) throws IOException {
59+
fieldValueFetcher.write(builder, fieldValueFetcher.fetch(reader, docId));
60+
}
61+
}

0 commit comments

Comments
 (0)