Skip to content

Commit 1e00145

Browse files
Extend Approximation Framework to other numeric types (#18530)
* Extend approximation to other numeric types Signed-off-by: Prudhvi Godithi <[email protected]> * Fix basic tests Signed-off-by: Prudhvi Godithi <[email protected]> * Fix basic tests Signed-off-by: Prudhvi Godithi <[email protected]> * Add Parameterized tests for all numeric types Signed-off-by: Prudhvi Godithi <[email protected]> * Fix FieldSortIT Signed-off-by: Prudhvi Godithi <[email protected]> * Fix FieldSortIT Signed-off-by: Prudhvi Godithi <[email protected]> * Address comments Signed-off-by: Prudhvi Godithi <[email protected]> * Update tests and changelog Signed-off-by: Prudhvi Godithi <[email protected]> * Update tests and changelog Signed-off-by: Prudhvi Godithi <[email protected]> --------- Signed-off-by: Prudhvi Godithi <[email protected]>
1 parent 46a0045 commit 1e00145

File tree

7 files changed

+844
-470
lines changed

7 files changed

+844
-470
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
1717
- Add BooleanQuery rewrite moving constant-scoring must clauses to filter clauses ([#18510](https://github.com/opensearch-project/OpenSearch/issues/18510))
1818
- Add functionality for plugins to inject QueryCollectorContext during QueryPhase ([#18637](https://github.com/opensearch-project/OpenSearch/pull/18637))
1919
- Add support for non-timing info in profiler ([#18460](https://github.com/opensearch-project/OpenSearch/issues/18460))
20+
- Extend Approximation Framework to other numeric types ([#18530](https://github.com/opensearch-project/OpenSearch/issues/18530))
2021

2122
### Changed
2223
- Update Subject interface to use CheckedRunnable ([#18570](https://github.com/opensearch-project/OpenSearch/issues/18570))

server/src/internalClusterTest/java/org/opensearch/search/sort/FieldSortIT.java

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -934,7 +934,7 @@ public void testSortMissingNumbers() throws Exception {
934934
indexRandomForConcurrentSearch("test");
935935

936936
// DOUBLE
937-
logger.info("--> sort with no missing (same as missing _last)");
937+
logger.info("--> sort with no missing");
938938
SearchResponse searchResponse = client().prepareSearch()
939939
.setQuery(matchAllQuery())
940940
.addSort(SortBuilders.fieldSort("i_value").order(SortOrder.ASC))
@@ -944,7 +944,6 @@ public void testSortMissingNumbers() throws Exception {
944944
assertThat(searchResponse.getHits().getTotalHits().value(), equalTo(3L));
945945
assertThat(searchResponse.getHits().getAt(0).getId(), equalTo("1"));
946946
assertThat(searchResponse.getHits().getAt(1).getId(), equalTo("3"));
947-
assertThat(searchResponse.getHits().getAt(2).getId(), equalTo("2"));
948947

949948
logger.info("--> sort with missing _last");
950949
searchResponse = client().prepareSearch()
@@ -983,7 +982,7 @@ public void testSortMissingNumbers() throws Exception {
983982
assertThat(searchResponse.getHits().getAt(2).getId(), equalTo("3"));
984983

985984
// FLOAT
986-
logger.info("--> sort with no missing (same as missing _last)");
985+
logger.info("--> sort with no missing");
987986
searchResponse = client().prepareSearch()
988987
.setQuery(matchAllQuery())
989988
.addSort(SortBuilders.fieldSort("d_value").order(SortOrder.ASC))
@@ -993,7 +992,6 @@ public void testSortMissingNumbers() throws Exception {
993992
assertThat(searchResponse.getHits().getTotalHits().value(), equalTo(3L));
994993
assertThat(searchResponse.getHits().getAt(0).getId(), equalTo("1"));
995994
assertThat(searchResponse.getHits().getAt(1).getId(), equalTo("3"));
996-
assertThat(searchResponse.getHits().getAt(2).getId(), equalTo("2"));
997995

998996
logger.info("--> sort with missing _last");
999997
searchResponse = client().prepareSearch()
@@ -1032,7 +1030,7 @@ public void testSortMissingNumbers() throws Exception {
10321030
assertThat(searchResponse.getHits().getAt(2).getId(), equalTo("3"));
10331031

10341032
// UNSIGNED_LONG
1035-
logger.info("--> sort with no missing (same as missing _last)");
1033+
logger.info("--> sort with no missing");
10361034
searchResponse = client().prepareSearch()
10371035
.setQuery(matchAllQuery())
10381036
.addSort(SortBuilders.fieldSort("u_value").order(SortOrder.ASC))
@@ -1042,7 +1040,6 @@ public void testSortMissingNumbers() throws Exception {
10421040
assertThat(searchResponse.getHits().getTotalHits().value(), equalTo(3L));
10431041
assertThat(searchResponse.getHits().getAt(0).getId(), equalTo("1"));
10441042
assertThat(searchResponse.getHits().getAt(1).getId(), equalTo("3"));
1045-
assertThat(searchResponse.getHits().getAt(2).getId(), equalTo("2"));
10461043

10471044
logger.info("--> sort with missing _last");
10481045
searchResponse = client().prepareSearch()
@@ -1138,7 +1135,7 @@ public void testSortMissingNumbersMinMax() throws Exception {
11381135
indexRandomForConcurrentSearch("test");
11391136

11401137
// LONG
1141-
logger.info("--> sort with no missing (same as missing _last)");
1138+
logger.info("--> sort with no missing");
11421139
SearchResponse searchResponse = client().prepareSearch()
11431140
.setQuery(matchAllQuery())
11441141
.addSort(SortBuilders.fieldSort("l_value").order(SortOrder.ASC))
@@ -1177,7 +1174,7 @@ public void testSortMissingNumbersMinMax() throws Exception {
11771174
assertThat(searchResponse.getHits().getAt(2).getId(), equalTo("3"));
11781175

11791176
// FLOAT
1180-
logger.info("--> sort with no missing (same as missing _last)");
1177+
logger.info("--> sort with no missing");
11811178
searchResponse = client().prepareSearch()
11821179
.setQuery(matchAllQuery())
11831180
.addSort(SortBuilders.fieldSort("d_value").order(SortOrder.ASC))
@@ -1187,7 +1184,6 @@ public void testSortMissingNumbersMinMax() throws Exception {
11871184
assertThat(searchResponse.getHits().getTotalHits().value(), equalTo(3L));
11881185
assertThat(searchResponse.getHits().getAt(0).getId(), equalTo("1"));
11891186
assertThat(searchResponse.getHits().getAt(1).getId(), equalTo("3"));
1190-
assertThat(searchResponse.getHits().getAt(2).getId(), equalTo("2"));
11911187

11921188
logger.info("--> sort with missing _last");
11931189
searchResponse = client().prepareSearch()
@@ -1214,7 +1210,7 @@ public void testSortMissingNumbersMinMax() throws Exception {
12141210
assertThat(searchResponse.getHits().getAt(2).getId(), equalTo("3"));
12151211

12161212
// UNSIGNED_LONG
1217-
logger.info("--> sort with no missing (same as missing _last)");
1213+
logger.info("--> sort with no missing");
12181214
searchResponse = client().prepareSearch()
12191215
.setQuery(matchAllQuery())
12201216
.addSort(SortBuilders.fieldSort("u_value").order(SortOrder.ASC))
@@ -1225,7 +1221,6 @@ public void testSortMissingNumbersMinMax() throws Exception {
12251221
assertThat(searchResponse.getHits().getAt(0).getId(), equalTo("1"));
12261222
// The order here could be unstable (depends on document order) since missing == field value
12271223
assertThat(searchResponse.getHits().getAt(1).getId(), is(oneOf("3", "2")));
1228-
assertThat(searchResponse.getHits().getAt(2).getId(), is(oneOf("2", "3")));
12291224

12301225
logger.info("--> sort with missing _last");
12311226
searchResponse = client().prepareSearch()

server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java

Lines changed: 148 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,8 @@ public class NumberFieldMapper extends ParametrizedFieldMapper {
102102

103103
public static final Setting<Boolean> COERCE_SETTING = Setting.boolSetting("index.mapping.coerce", true, Property.IndexScope);
104104

105+
private static final int APPROX_QUERY_NUMERIC_DIMS = 1;
106+
105107
private static NumberFieldMapper toType(FieldMapper in) {
106108
return (NumberFieldMapper) in;
107109
}
@@ -356,23 +358,50 @@ public Query rangeQuery(
356358
}
357359
u = HalfFloatPoint.nextDown(u);
358360
}
359-
if (isSearchable && hasDocValues) {
360-
Query query = HalfFloatPoint.newRangeQuery(field, l, u);
361-
Query dvQuery = SortedNumericDocValuesField.newSlowRangeQuery(
361+
362+
Query dvQuery = hasDocValues
363+
? SortedNumericDocValuesField.newSlowRangeQuery(
362364
field,
363365
HalfFloatPoint.halfFloatToSortableShort(l),
364366
HalfFloatPoint.halfFloatToSortableShort(u)
367+
)
368+
: null;
369+
if (isSearchable) {
370+
Query pointRangeQuery = HalfFloatPoint.newRangeQuery(field, l, u);
371+
Query query;
372+
if (dvQuery != null) {
373+
query = new IndexOrDocValuesQuery(pointRangeQuery, dvQuery);
374+
if (context.indexSortedOnField(field)) {
375+
query = new IndexSortSortedNumericDocValuesRangeQuery(
376+
field,
377+
HalfFloatPoint.halfFloatToSortableShort(l),
378+
HalfFloatPoint.halfFloatToSortableShort(u),
379+
query
380+
);
381+
}
382+
} else {
383+
query = pointRangeQuery;
384+
}
385+
return new ApproximateScoreQuery(
386+
query,
387+
new ApproximatePointRangeQuery(
388+
field,
389+
NumberType.HALF_FLOAT.encodePoint(l),
390+
NumberType.HALF_FLOAT.encodePoint(u),
391+
APPROX_QUERY_NUMERIC_DIMS,
392+
ApproximatePointRangeQuery.HALF_FLOAT_FORMAT
393+
)
365394
);
366-
return new IndexOrDocValuesQuery(query, dvQuery);
367395
}
368-
if (hasDocValues) {
369-
return SortedNumericDocValuesField.newSlowRangeQuery(
396+
if (context.indexSortedOnField(field)) {
397+
dvQuery = new IndexSortSortedNumericDocValuesRangeQuery(
370398
field,
371399
HalfFloatPoint.halfFloatToSortableShort(l),
372-
HalfFloatPoint.halfFloatToSortableShort(u)
400+
HalfFloatPoint.halfFloatToSortableShort(u),
401+
dvQuery
373402
);
374403
}
375-
return HalfFloatPoint.newRangeQuery(field, l, u);
404+
return dvQuery;
376405
}
377406

378407
@Override
@@ -503,23 +532,52 @@ public Query rangeQuery(
503532
u = FloatPoint.nextDown(u);
504533
}
505534
}
506-
if (isSearchable && hasDocValues) {
507-
Query query = FloatPoint.newRangeQuery(field, l, u);
508-
Query dvQuery = SortedNumericDocValuesField.newSlowRangeQuery(
535+
536+
Query dvQuery = hasDocValues
537+
? SortedNumericDocValuesField.newSlowRangeQuery(
509538
field,
510539
NumericUtils.floatToSortableInt(l),
511540
NumericUtils.floatToSortableInt(u)
541+
)
542+
: null;
543+
544+
if (isSearchable) {
545+
Query pointRangeQuery = FloatPoint.newRangeQuery(field, l, u);
546+
Query query;
547+
if (dvQuery != null) {
548+
query = new IndexOrDocValuesQuery(pointRangeQuery, dvQuery);
549+
if (context.indexSortedOnField(field)) {
550+
query = new IndexSortSortedNumericDocValuesRangeQuery(
551+
field,
552+
NumericUtils.floatToSortableInt(l),
553+
NumericUtils.floatToSortableInt(u),
554+
query
555+
);
556+
}
557+
} else {
558+
query = pointRangeQuery;
559+
}
560+
return new ApproximateScoreQuery(
561+
query,
562+
new ApproximatePointRangeQuery(
563+
field,
564+
FloatPoint.pack(new float[] { l }).bytes,
565+
FloatPoint.pack(new float[] { u }).bytes,
566+
APPROX_QUERY_NUMERIC_DIMS,
567+
ApproximatePointRangeQuery.FLOAT_FORMAT
568+
)
512569
);
513-
return new IndexOrDocValuesQuery(query, dvQuery);
514570
}
515-
if (hasDocValues) {
516-
return SortedNumericDocValuesField.newSlowRangeQuery(
571+
572+
if (context.indexSortedOnField(field)) {
573+
dvQuery = new IndexSortSortedNumericDocValuesRangeQuery(
517574
field,
518575
NumericUtils.floatToSortableInt(l),
519-
NumericUtils.floatToSortableInt(u)
576+
NumericUtils.floatToSortableInt(u),
577+
dvQuery
520578
);
521579
}
522-
return FloatPoint.newRangeQuery(field, l, u);
580+
return dvQuery;
523581
}
524582

525583
@Override
@@ -628,23 +686,49 @@ public Query rangeQuery(
628686
QueryShardContext context
629687
) {
630688
return doubleRangeQuery(lowerTerm, upperTerm, includeLower, includeUpper, (l, u) -> {
631-
if (isSearchable && hasDocValues) {
632-
Query query = DoublePoint.newRangeQuery(field, l, u);
633-
Query dvQuery = SortedNumericDocValuesField.newSlowRangeQuery(
689+
Query dvQuery = hasDocValues
690+
? SortedNumericDocValuesField.newSlowRangeQuery(
634691
field,
635692
NumericUtils.doubleToSortableLong(l),
636693
NumericUtils.doubleToSortableLong(u)
694+
)
695+
: null;
696+
if (isSearchable) {
697+
Query pointRangeQuery = DoublePoint.newRangeQuery(field, l, u);
698+
Query query;
699+
if (dvQuery != null) {
700+
query = new IndexOrDocValuesQuery(pointRangeQuery, dvQuery);
701+
if (context.indexSortedOnField(field)) {
702+
query = new IndexSortSortedNumericDocValuesRangeQuery(
703+
field,
704+
NumericUtils.doubleToSortableLong(l),
705+
NumericUtils.doubleToSortableLong(u),
706+
query
707+
);
708+
}
709+
} else {
710+
query = pointRangeQuery;
711+
}
712+
return new ApproximateScoreQuery(
713+
query,
714+
new ApproximatePointRangeQuery(
715+
field,
716+
DoublePoint.pack(new double[] { l }).bytes,
717+
DoublePoint.pack(new double[] { u }).bytes,
718+
APPROX_QUERY_NUMERIC_DIMS,
719+
ApproximatePointRangeQuery.DOUBLE_FORMAT
720+
)
637721
);
638-
return new IndexOrDocValuesQuery(query, dvQuery);
639722
}
640-
if (hasDocValues) {
641-
return SortedNumericDocValuesField.newSlowRangeQuery(
723+
if (context.indexSortedOnField(field)) {
724+
dvQuery = new IndexSortSortedNumericDocValuesRangeQuery(
642725
field,
643726
NumericUtils.doubleToSortableLong(l),
644-
NumericUtils.doubleToSortableLong(u)
727+
NumericUtils.doubleToSortableLong(u),
728+
dvQuery
645729
);
646730
}
647-
return DoublePoint.newRangeQuery(field, l, u);
731+
return dvQuery;
648732
});
649733
}
650734

@@ -988,23 +1072,33 @@ public Query rangeQuery(
9881072
--u;
9891073
}
9901074
}
991-
if (isSearchable && hasDocValues) {
992-
Query query = IntPoint.newRangeQuery(field, l, u);
993-
Query dvQuery = SortedNumericDocValuesField.newSlowRangeQuery(field, l, u);
994-
query = new IndexOrDocValuesQuery(query, dvQuery);
995-
if (context.indexSortedOnField(field)) {
996-
query = new IndexSortSortedNumericDocValuesRangeQuery(field, l, u, query);
1075+
Query dvQuery = hasDocValues ? SortedNumericDocValuesField.newSlowRangeQuery(field, l, u) : null;
1076+
if (isSearchable) {
1077+
Query pointRangeQuery = IntPoint.newRangeQuery(field, l, u);
1078+
Query query;
1079+
if (dvQuery != null) {
1080+
query = new IndexOrDocValuesQuery(pointRangeQuery, dvQuery);
1081+
if (context.indexSortedOnField(field)) {
1082+
query = new IndexSortSortedNumericDocValuesRangeQuery(field, l, u, query);
1083+
}
1084+
} else {
1085+
query = pointRangeQuery;
9971086
}
998-
return query;
1087+
return new ApproximateScoreQuery(
1088+
query,
1089+
new ApproximatePointRangeQuery(
1090+
field,
1091+
IntPoint.pack(new int[] { l }).bytes,
1092+
IntPoint.pack(new int[] { u }).bytes,
1093+
APPROX_QUERY_NUMERIC_DIMS,
1094+
ApproximatePointRangeQuery.INT_FORMAT
1095+
)
1096+
);
9991097
}
1000-
if (hasDocValues) {
1001-
Query query = SortedNumericDocValuesField.newSlowRangeQuery(field, l, u);
1002-
if (context.indexSortedOnField(field)) {
1003-
query = new IndexSortSortedNumericDocValuesRangeQuery(field, l, u, query);
1004-
}
1005-
return query;
1098+
if (context.indexSortedOnField(field)) {
1099+
dvQuery = new IndexSortSortedNumericDocValuesRangeQuery(field, l, u, dvQuery);
10061100
}
1007-
return IntPoint.newRangeQuery(field, l, u);
1101+
return dvQuery;
10081102
}
10091103

10101104
@Override
@@ -1136,11 +1230,10 @@ public Query rangeQuery(
11361230
field,
11371231
LongPoint.pack(new long[] { l }).bytes,
11381232
LongPoint.pack(new long[] { u }).bytes,
1139-
new long[] { l }.length,
1233+
APPROX_QUERY_NUMERIC_DIMS,
11401234
ApproximatePointRangeQuery.LONG_FORMAT
11411235
)
11421236
);
1143-
11441237
}
11451238
if (context.indexSortedOnField(field)) {
11461239
dvQuery = new IndexSortSortedNumericDocValuesRangeQuery(field, l, u, dvQuery);
@@ -1257,10 +1350,22 @@ public Query rangeQuery(
12571350
QueryShardContext context
12581351
) {
12591352
return unsignedLongRangeQuery(lowerTerm, upperTerm, includeLower, includeUpper, (l, u) -> {
1260-
if (isSearchable && hasDocValues) {
1353+
if (isSearchable) {
12611354
Query query = BigIntegerPoint.newRangeQuery(field, l, u);
1262-
Query dvQuery = SortedUnsignedLongDocValuesRangeQuery.newSlowRangeQuery(field, l, u);
1263-
return new IndexOrDocValuesQuery(query, dvQuery);
1355+
if (hasDocValues) {
1356+
Query dvQuery = SortedUnsignedLongDocValuesRangeQuery.newSlowRangeQuery(field, l, u);
1357+
query = new IndexOrDocValuesQuery(query, dvQuery);
1358+
}
1359+
return new ApproximateScoreQuery(
1360+
query,
1361+
new ApproximatePointRangeQuery(
1362+
field,
1363+
NumberType.UNSIGNED_LONG.encodePoint(l),
1364+
NumberType.UNSIGNED_LONG.encodePoint(u),
1365+
APPROX_QUERY_NUMERIC_DIMS,
1366+
ApproximatePointRangeQuery.UNSIGNED_LONG_FORMAT
1367+
)
1368+
);
12641369
}
12651370
if (hasDocValues) {
12661371
return SortedUnsignedLongDocValuesRangeQuery.newSlowRangeQuery(field, l, u);

0 commit comments

Comments
 (0)