diff --git a/CHANGELOG.md b/CHANGELOG.md index 80c6804c11a0b..4e1b82ad838c7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,6 +36,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Fix skip_unavailable setting changing to default during node drop issue ([#18766](https://github.com/opensearch-project/OpenSearch/pull/18766)) - Fix pull-based ingestion pause state initialization during replica promotion ([#19212](https://github.com/opensearch-project/OpenSearch/pull/19212)) - Fix QueryPhaseResultConsumer incomplete callback loops ([#19231](https://github.com/opensearch-project/OpenSearch/pull/19231)) +- Fix the `scaled_float` precision issue ([#19188](https://github.com/opensearch-project/OpenSearch/pull/19188)) ### Dependencies - Bump `com.netflix.nebula.ospackage-base` from 12.0.0 to 12.1.0 ([#19019](https://github.com/opensearch-project/OpenSearch/pull/19019)) diff --git a/modules/mapper-extras/src/main/java/org/opensearch/index/mapper/ScaledFloatFieldMapper.java b/modules/mapper-extras/src/main/java/org/opensearch/index/mapper/ScaledFloatFieldMapper.java index 46f403251c323..e17d56fcab3b8 100644 --- a/modules/mapper-extras/src/main/java/org/opensearch/index/mapper/ScaledFloatFieldMapper.java +++ b/modules/mapper-extras/src/main/java/org/opensearch/index/mapper/ScaledFloatFieldMapper.java @@ -65,7 +65,6 @@ import org.opensearch.search.lookup.SearchLookup; import java.io.IOException; -import java.math.BigDecimal; import java.time.ZoneId; import java.util.ArrayList; import java.util.Arrays; @@ -279,21 +278,22 @@ public Query rangeQuery(Object lowerTerm, Object upperTerm, boolean includeLower failIfNotIndexedAndNoDocValues(); Long lo = null; if (lowerTerm != null) { - double dValue = scale(lowerTerm); - if (includeLower == false) { - dValue = Math.nextUp(dValue); - } - lo = Math.round(Math.ceil(dValue)); + lo = Math.round(scale(lowerTerm)); } Long hi = null; if (upperTerm != null) { - double dValue = scale(upperTerm); - if (includeUpper == false) { - dValue = Math.nextDown(dValue); - } - hi = Math.round(Math.floor(dValue)); + hi = Math.round(scale(upperTerm)); } - Query query = NumberFieldMapper.NumberType.LONG.rangeQuery(name(), lo, hi, true, true, hasDocValues(), isSearchable(), context); + Query query = NumberFieldMapper.NumberType.LONG.rangeQuery( + name(), + lo, + hi, + includeLower, + includeUpper, + hasDocValues(), + isSearchable(), + context + ); if (boost() != 1f) { query = new BoostQuery(query, boost()); } @@ -360,15 +360,16 @@ public DocValueFormat docValueFormat(String format, ZoneId timeZone) { /** * Parses input value and multiplies it with the scaling factor. - * Uses the round-trip of creating a {@link BigDecimal} from the stringified {@code double} - * input to ensure intuitively exact floating point operations. - * (e.g. for a scaling factor of 100, JVM behaviour results in {@code 79.99D * 100 ==> 7998.99..} compared to - * {@code scale(79.99) ==> 7999}) + * Note: Uses direct floating-point multiplication for consistency + * between indexing and querying. While this may result in + * floating-point imprecision (e.g., 79.99 * 100 = 7998.999...), + * the consistent behavior ensures search queries work correctly. + * * @param input Input value to parse floating point num from * @return Scaled value */ private double scale(Object input) { - return new BigDecimal(Double.toString(parse(input))).multiply(BigDecimal.valueOf(scalingFactor)).doubleValue(); + return parse(input) * scalingFactor; } @Override diff --git a/modules/mapper-extras/src/test/java/org/opensearch/index/mapper/ScaledFloatFieldTypeTests.java b/modules/mapper-extras/src/test/java/org/opensearch/index/mapper/ScaledFloatFieldTypeTests.java index 97976d0db0b96..8f8e2ad17a132 100644 --- a/modules/mapper-extras/src/test/java/org/opensearch/index/mapper/ScaledFloatFieldTypeTests.java +++ b/modules/mapper-extras/src/test/java/org/opensearch/index/mapper/ScaledFloatFieldTypeTests.java @@ -33,7 +33,6 @@ package org.opensearch.index.mapper; import org.apache.lucene.document.Document; -import org.apache.lucene.document.DoublePoint; import org.apache.lucene.document.LongField; import org.apache.lucene.document.LongPoint; import org.apache.lucene.document.SortedNumericDocValuesField; @@ -83,56 +82,6 @@ public void testTermsQuery() { assertEquals(LongField.newSetQuery("scaled_float", scaledValue1, scaledValue2), ft.termsQuery(Arrays.asList(value1, value2), null)); } - public void testRangeQuery() throws IOException { - // make sure the accuracy loss of scaled floats only occurs at index time - // this test checks that searching scaled floats yields the same results as - // searching doubles that are rounded to the closest half float - ScaledFloatFieldMapper.ScaledFloatFieldType ft = new ScaledFloatFieldMapper.ScaledFloatFieldType( - "scaled_float", - true, - false, - false, - Collections.emptyMap(), - 0.1 + randomDouble() * 100, - null - ); - Directory dir = newDirectory(); - IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null)); - final int numDocs = 1000; - for (int i = 0; i < numDocs; ++i) { - Document doc = new Document(); - double value = (randomDouble() * 2 - 1) * 10000; - long scaledValue = Math.round(value * ft.getScalingFactor()); - double rounded = scaledValue / ft.getScalingFactor(); - doc.add(new LongPoint("scaled_float", scaledValue)); - doc.add(new DoublePoint("double", rounded)); - w.addDocument(doc); - } - final DirectoryReader reader = DirectoryReader.open(w); - w.close(); - IndexSearcher searcher = newSearcher(reader); - final int numQueries = 1000; - for (int i = 0; i < numQueries; ++i) { - Double l = randomBoolean() ? null : (randomDouble() * 2 - 1) * 10000; - Double u = randomBoolean() ? null : (randomDouble() * 2 - 1) * 10000; - boolean includeLower = randomBoolean(); - boolean includeUpper = randomBoolean(); - Query doubleQ = NumberFieldMapper.NumberType.DOUBLE.rangeQuery( - "double", - l, - u, - includeLower, - includeUpper, - false, - true, - MOCK_QSC - ); - Query scaledFloatQ = ft.rangeQuery(l, u, includeLower, includeUpper, MOCK_QSC); - assertEquals(searcher.count(doubleQ), searcher.count(scaledFloatQ)); - } - IOUtils.close(reader, dir); - } - public void testRoundsUpperBoundCorrectly() { ScaledFloatFieldMapper.ScaledFloatFieldType ft = new ScaledFloatFieldMapper.ScaledFloatFieldType("scaled_float", 100); Query scaledFloatQ = ft.rangeQuery(null, 0.1, true, false, MOCK_QSC); @@ -142,11 +91,11 @@ public void testRoundsUpperBoundCorrectly() { scaledFloatQ = ft.rangeQuery(null, 0.095, true, false, MOCK_QSC); assertEquals("scaled_float:[-9223372036854775808 TO 9]", getQueryString(scaledFloatQ)); scaledFloatQ = ft.rangeQuery(null, 0.095, true, true, MOCK_QSC); - assertEquals("scaled_float:[-9223372036854775808 TO 9]", getQueryString(scaledFloatQ)); + assertEquals("scaled_float:[-9223372036854775808 TO 10]", getQueryString(scaledFloatQ)); scaledFloatQ = ft.rangeQuery(null, 0.105, true, false, MOCK_QSC); assertEquals("scaled_float:[-9223372036854775808 TO 10]", getQueryString(scaledFloatQ)); scaledFloatQ = ft.rangeQuery(null, 0.105, true, true, MOCK_QSC); - assertEquals("scaled_float:[-9223372036854775808 TO 10]", getQueryString(scaledFloatQ)); + assertEquals("scaled_float:[-9223372036854775808 TO 11]", getQueryString(scaledFloatQ)); scaledFloatQ = ft.rangeQuery(null, 79.99, true, true, MOCK_QSC); assertEquals("scaled_float:[-9223372036854775808 TO 7999]", getQueryString(scaledFloatQ)); } @@ -158,11 +107,11 @@ public void testRoundsLowerBoundCorrectly() { scaledFloatQ = ft.rangeQuery(-0.1, null, true, true, MOCK_QSC); assertEquals("scaled_float:[-10 TO 9223372036854775807]", getQueryString(scaledFloatQ)); scaledFloatQ = ft.rangeQuery(-0.095, null, false, true, MOCK_QSC); - assertEquals("scaled_float:[-9 TO 9223372036854775807]", getQueryString(scaledFloatQ)); + assertEquals("scaled_float:[-8 TO 9223372036854775807]", getQueryString(scaledFloatQ)); scaledFloatQ = ft.rangeQuery(-0.095, null, true, true, MOCK_QSC); assertEquals("scaled_float:[-9 TO 9223372036854775807]", getQueryString(scaledFloatQ)); scaledFloatQ = ft.rangeQuery(-0.105, null, false, true, MOCK_QSC); - assertEquals("scaled_float:[-10 TO 9223372036854775807]", getQueryString(scaledFloatQ)); + assertEquals("scaled_float:[-9 TO 9223372036854775807]", getQueryString(scaledFloatQ)); scaledFloatQ = ft.rangeQuery(-0.105, null, true, true, MOCK_QSC); assertEquals("scaled_float:[-10 TO 9223372036854775807]", getQueryString(scaledFloatQ)); } @@ -239,4 +188,84 @@ public void testFetchSourceValue() throws IOException { .fieldType(); assertEquals(Collections.singletonList(2.71), fetchSourceValue(nullValueMapper, "")); } + + public void testRandomPriceValues() { + ScaledFloatFieldMapper.ScaledFloatFieldType ft = new ScaledFloatFieldMapper.ScaledFloatFieldType("price", 100); + Query q = ft.rangeQuery(null, 19.99, true, true, MOCK_QSC); + assertEquals("price:[-9223372036854775808 TO 1999]", getQueryString(q)); + q = ft.rangeQuery(null, 99.99, true, true, MOCK_QSC); + assertEquals("price:[-9223372036854775808 TO 9999]", getQueryString(q)); + q = ft.rangeQuery(null, 9.99, true, true, MOCK_QSC); + assertEquals("price:[-9223372036854775808 TO 999]", getQueryString(q)); + } + + public void testIndexingQueryingConsistency() throws IOException { + ScaledFloatFieldMapper.ScaledFloatFieldType ft = new ScaledFloatFieldMapper.ScaledFloatFieldType("scaled_float", 100); + Directory dir = newDirectory(); + IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null)); + // Index the problematic value + Document doc = new Document(); + double value = 79.99; + long scaledValue = Math.round(value * 100); + doc.add(new LongPoint("scaled_float", scaledValue)); + w.addDocument(doc); + DirectoryReader reader = DirectoryReader.open(w); + w.close(); + IndexSearcher searcher = newSearcher(reader); + // Range query should find it + Query rangeQ = ft.rangeQuery(79.0, 80.0, true, true, MOCK_QSC); + assertEquals(1, searcher.count(rangeQ)); + // Exact range should find it + Query exactQ = ft.rangeQuery(value, value, true, true, MOCK_QSC); + assertEquals(1, searcher.count(exactQ)); + IOUtils.close(reader, dir); + } + + public void testLargeNumberIndexingAndQuerying() throws IOException { + double largeValue = 92233720368547750.0; + double scalingFactor = 100.0; + ScaledFloatFieldMapper.ScaledFloatFieldType ft = new ScaledFloatFieldMapper.ScaledFloatFieldType( + "scaled_float", + true, + false, + true, + Collections.emptyMap(), + scalingFactor, + null + ); + Directory dir = newDirectory(); + IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null)); + // Index the document with the large value + Document doc = new Document(); + long scaledValue = Math.round(largeValue * scalingFactor); + doc.add(new LongPoint("scaled_float", scaledValue)); + doc.add(new SortedNumericDocValuesField("scaled_float", scaledValue)); + w.addDocument(doc); + // Add another doc with a different value to ensure we're finding the right one + Document doc2 = new Document(); + double otherValue = 1000.0; + long scaledValue2 = Math.round(otherValue * scalingFactor); + doc2.add(new LongPoint("scaled_float", scaledValue2)); + doc2.add(new SortedNumericDocValuesField("scaled_float", scaledValue2)); + w.addDocument(doc2); + DirectoryReader reader = DirectoryReader.open(w); + w.close(); + IndexSearcher searcher = newSearcher(reader); + // Test 1: Term query should find the exact document + Query termQuery = ft.termQuery(largeValue, MOCK_QSC); + assertEquals("Term query should find exactly one document", 1, searcher.count(termQuery)); + // Test 2: Range query containing the value should find it + Query rangeQuery = ft.rangeQuery(largeValue - 1, largeValue + 1, true, true, MOCK_QSC); + assertEquals("Range query should find the large value", 1, searcher.count(rangeQuery)); + // Test 3: Exact range query (value to value) should find it + Query exactRangeQuery = ft.rangeQuery(largeValue, largeValue, true, true, MOCK_QSC); + assertEquals("Exact range query should find the document", 1, searcher.count(exactRangeQuery)); + // Test 4: Range query excluding the value should not find it + Query exclusiveRangeQuery = ft.rangeQuery(largeValue, largeValue + 1, false, false, MOCK_QSC); + assertEquals("Exclusive range should not find the document", 0, searcher.count(exclusiveRangeQuery)); + // Test 5: Terms query with multiple values should work + Query termsQuery = ft.termsQuery(Arrays.asList(largeValue, otherValue), MOCK_QSC); + assertEquals("Terms query should find both documents", 2, searcher.count(termsQuery)); + IOUtils.close(reader, dir); + } }