Skip to content

Commit e41162f

Browse files
Apply the date histogram rewrite optimization to range aggregation (#13865) (#14463)
* Refactor the ranges representation * Refactor try fast filter * Main work finished; left the handling of different numeric data types * buildRanges accepts field type * first working draft probably * add change log * accommodate geo distance agg * Fix test support all numeric types minus one on the upper range * [Refactor] range is lower inclusive, right exclusive * adding test * Adding test and refactor * refactor * add test * add test and update the compare logic in tree traversal * fix test, add random test * refactor to address comments * small potential performance update * fix precommit * refactor * refactor * set refresh_interval to -1 * address comment * address comment * address comment * Fix test To understand fully about the double and bigdecimal usage in scaled float field will take more time. --------- (cherry picked from commit 57fb50b) Signed-off-by: bowenlan-amzn <[email protected]> Signed-off-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
1 parent be53dde commit e41162f

File tree

17 files changed

+902
-232
lines changed

17 files changed

+902
-232
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
77
### Added
88
- Add fingerprint ingest processor ([#13724](https://github.com/opensearch-project/OpenSearch/pull/13724))
99
- [Remote Store] Rate limiter for remote store low priority uploads ([#14374](https://github.com/opensearch-project/OpenSearch/pull/14374/))
10+
- Apply the date histogram rewrite optimization to range aggregation ([#13865](https://github.com/opensearch-project/OpenSearch/pull/13865))
1011

1112
### Dependencies
1213
- Update to Apache Lucene 9.11.0 ([#14042](https://github.com/opensearch-project/OpenSearch/pull/14042))

modules/mapper-extras/src/main/java/org/opensearch/index/mapper/ScaledFloatFieldMapper.java

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
import com.fasterxml.jackson.core.JsonParseException;
3636

3737
import org.apache.lucene.document.Field;
38+
import org.apache.lucene.document.LongPoint;
3839
import org.apache.lucene.index.DocValues;
3940
import org.apache.lucene.index.LeafReaderContext;
4041
import org.apache.lucene.index.NumericDocValues;
@@ -165,7 +166,7 @@ public ScaledFloatFieldMapper build(BuilderContext context) {
165166

166167
public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.getSettings()));
167168

168-
public static final class ScaledFloatFieldType extends SimpleMappedFieldType {
169+
public static final class ScaledFloatFieldType extends SimpleMappedFieldType implements NumericPointEncoder {
169170

170171
private final double scalingFactor;
171172
private final Double nullValue;
@@ -188,6 +189,21 @@ public ScaledFloatFieldType(String name, double scalingFactor) {
188189
this(name, true, false, true, Collections.emptyMap(), scalingFactor, null);
189190
}
190191

192+
@Override
193+
public byte[] encodePoint(Number value) {
194+
assert value instanceof Double;
195+
double doubleValue = (Double) value;
196+
byte[] point = new byte[Long.BYTES];
197+
if (doubleValue == Double.POSITIVE_INFINITY) {
198+
LongPoint.encodeDimension(Long.MAX_VALUE, point, 0);
199+
} else if (doubleValue == Double.NEGATIVE_INFINITY) {
200+
LongPoint.encodeDimension(Long.MIN_VALUE, point, 0);
201+
} else {
202+
LongPoint.encodeDimension(Math.round(scale(value)), point, 0);
203+
}
204+
return point;
205+
}
206+
191207
public double getScalingFactor() {
192208
return scalingFactor;
193209
}

rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/40_range.yml

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@ setup:
1414
date:
1515
type: date
1616
format: epoch_second
17+
scaled_field:
18+
type: scaled_float
19+
scaling_factor: 100
1720

1821
- do:
1922
cluster.health:
@@ -528,3 +531,139 @@ setup:
528531
- is_false: aggregations.unsigned_long_range.buckets.2.to
529532

530533
- match: { aggregations.unsigned_long_range.buckets.2.doc_count: 0 }
534+
535+
---
536+
"Double range profiler shows filter rewrite info":
537+
- skip:
538+
version: " - 2.99.99"
539+
reason: debug info for filter rewrite added in 3.0.0 (to be backported to 2.15.0)
540+
541+
- do:
542+
indices.create:
543+
index: test_profile
544+
body:
545+
settings:
546+
number_of_replicas: 0
547+
refresh_interval: -1
548+
mappings:
549+
properties:
550+
ip:
551+
type: ip
552+
double:
553+
type: double
554+
date:
555+
type: date
556+
format: epoch_second
557+
558+
- do:
559+
bulk:
560+
index: test_profile
561+
refresh: true
562+
body:
563+
- '{"index": {}}'
564+
- '{"double" : 42}'
565+
- '{"index": {}}'
566+
- '{"double" : 100}'
567+
- '{"index": {}}'
568+
- '{"double" : 50}'
569+
570+
- do:
571+
search:
572+
index: test_profile
573+
body:
574+
size: 0
575+
profile: true
576+
aggs:
577+
double_range:
578+
range:
579+
field: double
580+
ranges:
581+
- to: 50
582+
- from: 50
583+
to: 150
584+
- from: 150
585+
586+
- length: { aggregations.double_range.buckets: 3 }
587+
588+
- match: { aggregations.double_range.buckets.0.key: "*-50.0" }
589+
- is_false: aggregations.double_range.buckets.0.from
590+
- match: { aggregations.double_range.buckets.0.to: 50.0 }
591+
- match: { aggregations.double_range.buckets.0.doc_count: 1 }
592+
- match: { aggregations.double_range.buckets.1.key: "50.0-150.0" }
593+
- match: { aggregations.double_range.buckets.1.from: 50.0 }
594+
- match: { aggregations.double_range.buckets.1.to: 150.0 }
595+
- match: { aggregations.double_range.buckets.1.doc_count: 2 }
596+
- match: { aggregations.double_range.buckets.2.key: "150.0-*" }
597+
- match: { aggregations.double_range.buckets.2.from: 150.0 }
598+
- is_false: aggregations.double_range.buckets.2.to
599+
- match: { aggregations.double_range.buckets.2.doc_count: 0 }
600+
601+
- match: { profile.shards.0.aggregations.0.debug.optimized_segments: 1 }
602+
- match: { profile.shards.0.aggregations.0.debug.unoptimized_segments: 0 }
603+
- match: { profile.shards.0.aggregations.0.debug.leaf_visited: 1 }
604+
- match: { profile.shards.0.aggregations.0.debug.inner_visited: 0 }
605+
606+
---
607+
"Scaled Float Range Aggregation":
608+
- do:
609+
index:
610+
index: test
611+
id: 1
612+
body: { "scaled_field": 1 }
613+
614+
- do:
615+
index:
616+
index: test
617+
id: 2
618+
body: { "scaled_field": 1.53 }
619+
620+
- do:
621+
index:
622+
index: test
623+
id: 3
624+
body: { "scaled_field": -2.1 }
625+
626+
- do:
627+
index:
628+
index: test
629+
id: 4
630+
body: { "scaled_field": 1.53 }
631+
632+
- do:
633+
indices.refresh: { }
634+
635+
- do:
636+
search:
637+
index: test
638+
body:
639+
size: 0
640+
aggs:
641+
my_range:
642+
range:
643+
field: scaled_field
644+
ranges:
645+
- to: 0
646+
- from: 0
647+
to: 1
648+
- from: 1
649+
to: 1.5
650+
- from: 1.5
651+
652+
- length: { aggregations.my_range.buckets: 4 }
653+
654+
- match: { aggregations.my_range.buckets.0.key: "*-0.0" }
655+
- is_false: aggregations.my_range.buckets.0.from
656+
- match: { aggregations.my_range.buckets.0.to: 0.0 }
657+
- match: { aggregations.my_range.buckets.0.doc_count: 1 }
658+
- match: { aggregations.my_range.buckets.1.key: "0.0-1.0" }
659+
- match: { aggregations.my_range.buckets.1.from: 0.0 }
660+
- match: { aggregations.my_range.buckets.1.to: 1.0 }
661+
- match: { aggregations.my_range.buckets.1.doc_count: 0 }
662+
- match: { aggregations.my_range.buckets.2.key: "1.0-1.5" }
663+
- match: { aggregations.my_range.buckets.2.from: 1.0 }
664+
- match: { aggregations.my_range.buckets.2.to: 1.5 }
665+
- match: { aggregations.my_range.buckets.2.doc_count: 1 }
666+
- match: { aggregations.my_range.buckets.3.key: "1.5-*" }
667+
- match: { aggregations.my_range.buckets.3.from: 1.5 }
668+
- is_false: aggregations.my_range.buckets.3.to
669+
- match: { aggregations.my_range.buckets.3.doc_count: 2 }

server/src/main/java/org/opensearch/index/mapper/DateFieldMapper.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -353,7 +353,7 @@ public DateFieldMapper build(BuilderContext context) {
353353
*
354354
* @opensearch.internal
355355
*/
356-
public static final class DateFieldType extends MappedFieldType {
356+
public static final class DateFieldType extends MappedFieldType implements NumericPointEncoder {
357357
protected final DateFormatter dateTimeFormatter;
358358
protected final DateMathParser dateMathParser;
359359
protected final Resolution resolution;
@@ -554,6 +554,13 @@ public static long parseToLong(
554554
return resolution.convert(dateParser.parse(BytesRefs.toString(value), now, roundUp, zone));
555555
}
556556

557+
@Override
558+
public byte[] encodePoint(Number value) {
559+
byte[] point = new byte[Long.BYTES];
560+
LongPoint.encodeDimension(value.longValue(), point, 0);
561+
return point;
562+
}
563+
557564
@Override
558565
public Query distanceFeatureQuery(Object origin, String pivot, float boost, QueryShardContext context) {
559566
failIfNotIndexedAndNoDocValues();

0 commit comments

Comments
 (0)