Skip to content

Commit 4aff917

Browse files
Attempt to fix the scaled_float precision issue (#19188)
* Initial commit Signed-off-by: Prudhvi Godithi <[email protected]> * initial commit Signed-off-by: Prudhvi Godithi <[email protected]> * Fix the precision loss Signed-off-by: Prudhvi Godithi <[email protected]> * Update range query Signed-off-by: Prudhvi Godithi <[email protected]> * Add tests Signed-off-by: Prudhvi Godithi <[email protected]> * UPstream fetch Signed-off-by: Prudhvi Godithi <[email protected]> * Fix tests Signed-off-by: Prudhvi Godithi <[email protected]> * Address comments Signed-off-by: Prudhvi Godithi <[email protected]> * Upstream fetch Signed-off-by: Prudhvi Godithi <[email protected]> --------- Signed-off-by: Prudhvi Godithi <[email protected]>
1 parent 53f428c commit 4aff917

File tree

3 files changed

+103
-72
lines changed

3 files changed

+103
-72
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
3636
- Fix skip_unavailable setting changing to default during node drop issue ([#18766](https://github.com/opensearch-project/OpenSearch/pull/18766))
3737
- Fix pull-based ingestion pause state initialization during replica promotion ([#19212](https://github.com/opensearch-project/OpenSearch/pull/19212))
3838
- Fix QueryPhaseResultConsumer incomplete callback loops ([#19231](https://github.com/opensearch-project/OpenSearch/pull/19231))
39+
- Fix the `scaled_float` precision issue ([#19188](https://github.com/opensearch-project/OpenSearch/pull/19188))
3940

4041
### Dependencies
4142
- Bump `com.netflix.nebula.ospackage-base` from 12.0.0 to 12.1.0 ([#19019](https://github.com/opensearch-project/OpenSearch/pull/19019))

modules/mapper-extras/src/main/java/org/opensearch/index/mapper/ScaledFloatFieldMapper.java

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,6 @@
6565
import org.opensearch.search.lookup.SearchLookup;
6666

6767
import java.io.IOException;
68-
import java.math.BigDecimal;
6968
import java.time.ZoneId;
7069
import java.util.ArrayList;
7170
import java.util.Arrays;
@@ -279,21 +278,22 @@ public Query rangeQuery(Object lowerTerm, Object upperTerm, boolean includeLower
279278
failIfNotIndexedAndNoDocValues();
280279
Long lo = null;
281280
if (lowerTerm != null) {
282-
double dValue = scale(lowerTerm);
283-
if (includeLower == false) {
284-
dValue = Math.nextUp(dValue);
285-
}
286-
lo = Math.round(Math.ceil(dValue));
281+
lo = Math.round(scale(lowerTerm));
287282
}
288283
Long hi = null;
289284
if (upperTerm != null) {
290-
double dValue = scale(upperTerm);
291-
if (includeUpper == false) {
292-
dValue = Math.nextDown(dValue);
293-
}
294-
hi = Math.round(Math.floor(dValue));
285+
hi = Math.round(scale(upperTerm));
295286
}
296-
Query query = NumberFieldMapper.NumberType.LONG.rangeQuery(name(), lo, hi, true, true, hasDocValues(), isSearchable(), context);
287+
Query query = NumberFieldMapper.NumberType.LONG.rangeQuery(
288+
name(),
289+
lo,
290+
hi,
291+
includeLower,
292+
includeUpper,
293+
hasDocValues(),
294+
isSearchable(),
295+
context
296+
);
297297
if (boost() != 1f) {
298298
query = new BoostQuery(query, boost());
299299
}
@@ -360,15 +360,16 @@ public DocValueFormat docValueFormat(String format, ZoneId timeZone) {
360360

361361
/**
362362
* Parses input value and multiplies it with the scaling factor.
363-
* Uses the round-trip of creating a {@link BigDecimal} from the stringified {@code double}
364-
* input to ensure intuitively exact floating point operations.
365-
* (e.g. for a scaling factor of 100, JVM behaviour results in {@code 79.99D * 100 ==> 7998.99..} compared to
366-
* {@code scale(79.99) ==> 7999})
363+
* Note: Uses direct floating-point multiplication for consistency
364+
* between indexing and querying. While this may result in
365+
* floating-point imprecision (e.g., 79.99 * 100 = 7998.999...),
366+
* the consistent behavior ensures search queries work correctly.
367+
*
367368
* @param input Input value to parse floating point num from
368369
* @return Scaled value
369370
*/
370371
private double scale(Object input) {
371-
return new BigDecimal(Double.toString(parse(input))).multiply(BigDecimal.valueOf(scalingFactor)).doubleValue();
372+
return parse(input) * scalingFactor;
372373
}
373374

374375
@Override

modules/mapper-extras/src/test/java/org/opensearch/index/mapper/ScaledFloatFieldTypeTests.java

Lines changed: 84 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@
3333
package org.opensearch.index.mapper;
3434

3535
import org.apache.lucene.document.Document;
36-
import org.apache.lucene.document.DoublePoint;
3736
import org.apache.lucene.document.LongField;
3837
import org.apache.lucene.document.LongPoint;
3938
import org.apache.lucene.document.SortedNumericDocValuesField;
@@ -83,56 +82,6 @@ public void testTermsQuery() {
8382
assertEquals(LongField.newSetQuery("scaled_float", scaledValue1, scaledValue2), ft.termsQuery(Arrays.asList(value1, value2), null));
8483
}
8584

86-
public void testRangeQuery() throws IOException {
87-
// make sure the accuracy loss of scaled floats only occurs at index time
88-
// this test checks that searching scaled floats yields the same results as
89-
// searching doubles that are rounded to the closest half float
90-
ScaledFloatFieldMapper.ScaledFloatFieldType ft = new ScaledFloatFieldMapper.ScaledFloatFieldType(
91-
"scaled_float",
92-
true,
93-
false,
94-
false,
95-
Collections.emptyMap(),
96-
0.1 + randomDouble() * 100,
97-
null
98-
);
99-
Directory dir = newDirectory();
100-
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null));
101-
final int numDocs = 1000;
102-
for (int i = 0; i < numDocs; ++i) {
103-
Document doc = new Document();
104-
double value = (randomDouble() * 2 - 1) * 10000;
105-
long scaledValue = Math.round(value * ft.getScalingFactor());
106-
double rounded = scaledValue / ft.getScalingFactor();
107-
doc.add(new LongPoint("scaled_float", scaledValue));
108-
doc.add(new DoublePoint("double", rounded));
109-
w.addDocument(doc);
110-
}
111-
final DirectoryReader reader = DirectoryReader.open(w);
112-
w.close();
113-
IndexSearcher searcher = newSearcher(reader);
114-
final int numQueries = 1000;
115-
for (int i = 0; i < numQueries; ++i) {
116-
Double l = randomBoolean() ? null : (randomDouble() * 2 - 1) * 10000;
117-
Double u = randomBoolean() ? null : (randomDouble() * 2 - 1) * 10000;
118-
boolean includeLower = randomBoolean();
119-
boolean includeUpper = randomBoolean();
120-
Query doubleQ = NumberFieldMapper.NumberType.DOUBLE.rangeQuery(
121-
"double",
122-
l,
123-
u,
124-
includeLower,
125-
includeUpper,
126-
false,
127-
true,
128-
MOCK_QSC
129-
);
130-
Query scaledFloatQ = ft.rangeQuery(l, u, includeLower, includeUpper, MOCK_QSC);
131-
assertEquals(searcher.count(doubleQ), searcher.count(scaledFloatQ));
132-
}
133-
IOUtils.close(reader, dir);
134-
}
135-
13685
public void testRoundsUpperBoundCorrectly() {
13786
ScaledFloatFieldMapper.ScaledFloatFieldType ft = new ScaledFloatFieldMapper.ScaledFloatFieldType("scaled_float", 100);
13887
Query scaledFloatQ = ft.rangeQuery(null, 0.1, true, false, MOCK_QSC);
@@ -142,11 +91,11 @@ public void testRoundsUpperBoundCorrectly() {
14291
scaledFloatQ = ft.rangeQuery(null, 0.095, true, false, MOCK_QSC);
14392
assertEquals("scaled_float:[-9223372036854775808 TO 9]", getQueryString(scaledFloatQ));
14493
scaledFloatQ = ft.rangeQuery(null, 0.095, true, true, MOCK_QSC);
145-
assertEquals("scaled_float:[-9223372036854775808 TO 9]", getQueryString(scaledFloatQ));
94+
assertEquals("scaled_float:[-9223372036854775808 TO 10]", getQueryString(scaledFloatQ));
14695
scaledFloatQ = ft.rangeQuery(null, 0.105, true, false, MOCK_QSC);
14796
assertEquals("scaled_float:[-9223372036854775808 TO 10]", getQueryString(scaledFloatQ));
14897
scaledFloatQ = ft.rangeQuery(null, 0.105, true, true, MOCK_QSC);
149-
assertEquals("scaled_float:[-9223372036854775808 TO 10]", getQueryString(scaledFloatQ));
98+
assertEquals("scaled_float:[-9223372036854775808 TO 11]", getQueryString(scaledFloatQ));
15099
scaledFloatQ = ft.rangeQuery(null, 79.99, true, true, MOCK_QSC);
151100
assertEquals("scaled_float:[-9223372036854775808 TO 7999]", getQueryString(scaledFloatQ));
152101
}
@@ -158,11 +107,11 @@ public void testRoundsLowerBoundCorrectly() {
158107
scaledFloatQ = ft.rangeQuery(-0.1, null, true, true, MOCK_QSC);
159108
assertEquals("scaled_float:[-10 TO 9223372036854775807]", getQueryString(scaledFloatQ));
160109
scaledFloatQ = ft.rangeQuery(-0.095, null, false, true, MOCK_QSC);
161-
assertEquals("scaled_float:[-9 TO 9223372036854775807]", getQueryString(scaledFloatQ));
110+
assertEquals("scaled_float:[-8 TO 9223372036854775807]", getQueryString(scaledFloatQ));
162111
scaledFloatQ = ft.rangeQuery(-0.095, null, true, true, MOCK_QSC);
163112
assertEquals("scaled_float:[-9 TO 9223372036854775807]", getQueryString(scaledFloatQ));
164113
scaledFloatQ = ft.rangeQuery(-0.105, null, false, true, MOCK_QSC);
165-
assertEquals("scaled_float:[-10 TO 9223372036854775807]", getQueryString(scaledFloatQ));
114+
assertEquals("scaled_float:[-9 TO 9223372036854775807]", getQueryString(scaledFloatQ));
166115
scaledFloatQ = ft.rangeQuery(-0.105, null, true, true, MOCK_QSC);
167116
assertEquals("scaled_float:[-10 TO 9223372036854775807]", getQueryString(scaledFloatQ));
168117
}
@@ -239,4 +188,84 @@ public void testFetchSourceValue() throws IOException {
239188
.fieldType();
240189
assertEquals(Collections.singletonList(2.71), fetchSourceValue(nullValueMapper, ""));
241190
}
191+
192+
public void testRandomPriceValues() {
193+
ScaledFloatFieldMapper.ScaledFloatFieldType ft = new ScaledFloatFieldMapper.ScaledFloatFieldType("price", 100);
194+
Query q = ft.rangeQuery(null, 19.99, true, true, MOCK_QSC);
195+
assertEquals("price:[-9223372036854775808 TO 1999]", getQueryString(q));
196+
q = ft.rangeQuery(null, 99.99, true, true, MOCK_QSC);
197+
assertEquals("price:[-9223372036854775808 TO 9999]", getQueryString(q));
198+
q = ft.rangeQuery(null, 9.99, true, true, MOCK_QSC);
199+
assertEquals("price:[-9223372036854775808 TO 999]", getQueryString(q));
200+
}
201+
202+
public void testIndexingQueryingConsistency() throws IOException {
203+
ScaledFloatFieldMapper.ScaledFloatFieldType ft = new ScaledFloatFieldMapper.ScaledFloatFieldType("scaled_float", 100);
204+
Directory dir = newDirectory();
205+
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null));
206+
// Index the problematic value
207+
Document doc = new Document();
208+
double value = 79.99;
209+
long scaledValue = Math.round(value * 100);
210+
doc.add(new LongPoint("scaled_float", scaledValue));
211+
w.addDocument(doc);
212+
DirectoryReader reader = DirectoryReader.open(w);
213+
w.close();
214+
IndexSearcher searcher = newSearcher(reader);
215+
// Range query should find it
216+
Query rangeQ = ft.rangeQuery(79.0, 80.0, true, true, MOCK_QSC);
217+
assertEquals(1, searcher.count(rangeQ));
218+
// Exact range should find it
219+
Query exactQ = ft.rangeQuery(value, value, true, true, MOCK_QSC);
220+
assertEquals(1, searcher.count(exactQ));
221+
IOUtils.close(reader, dir);
222+
}
223+
224+
public void testLargeNumberIndexingAndQuerying() throws IOException {
225+
double largeValue = 92233720368547750.0;
226+
double scalingFactor = 100.0;
227+
ScaledFloatFieldMapper.ScaledFloatFieldType ft = new ScaledFloatFieldMapper.ScaledFloatFieldType(
228+
"scaled_float",
229+
true,
230+
false,
231+
true,
232+
Collections.emptyMap(),
233+
scalingFactor,
234+
null
235+
);
236+
Directory dir = newDirectory();
237+
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null));
238+
// Index the document with the large value
239+
Document doc = new Document();
240+
long scaledValue = Math.round(largeValue * scalingFactor);
241+
doc.add(new LongPoint("scaled_float", scaledValue));
242+
doc.add(new SortedNumericDocValuesField("scaled_float", scaledValue));
243+
w.addDocument(doc);
244+
// Add another doc with a different value to ensure we're finding the right one
245+
Document doc2 = new Document();
246+
double otherValue = 1000.0;
247+
long scaledValue2 = Math.round(otherValue * scalingFactor);
248+
doc2.add(new LongPoint("scaled_float", scaledValue2));
249+
doc2.add(new SortedNumericDocValuesField("scaled_float", scaledValue2));
250+
w.addDocument(doc2);
251+
DirectoryReader reader = DirectoryReader.open(w);
252+
w.close();
253+
IndexSearcher searcher = newSearcher(reader);
254+
// Test 1: Term query should find the exact document
255+
Query termQuery = ft.termQuery(largeValue, MOCK_QSC);
256+
assertEquals("Term query should find exactly one document", 1, searcher.count(termQuery));
257+
// Test 2: Range query containing the value should find it
258+
Query rangeQuery = ft.rangeQuery(largeValue - 1, largeValue + 1, true, true, MOCK_QSC);
259+
assertEquals("Range query should find the large value", 1, searcher.count(rangeQuery));
260+
// Test 3: Exact range query (value to value) should find it
261+
Query exactRangeQuery = ft.rangeQuery(largeValue, largeValue, true, true, MOCK_QSC);
262+
assertEquals("Exact range query should find the document", 1, searcher.count(exactRangeQuery));
263+
// Test 4: Range query excluding the value should not find it
264+
Query exclusiveRangeQuery = ft.rangeQuery(largeValue, largeValue + 1, false, false, MOCK_QSC);
265+
assertEquals("Exclusive range should not find the document", 0, searcher.count(exclusiveRangeQuery));
266+
// Test 5: Terms query with multiple values should work
267+
Query termsQuery = ft.termsQuery(Arrays.asList(largeValue, otherValue), MOCK_QSC);
268+
assertEquals("Terms query should find both documents", 2, searcher.count(termsQuery));
269+
IOUtils.close(reader, dir);
270+
}
242271
}

0 commit comments

Comments
 (0)