diff --git a/server/src/main/java/org/opensearch/search/query/rewriters/BooleanFlatteningRewriter.java b/server/src/main/java/org/opensearch/search/query/rewriters/BooleanFlatteningRewriter.java index 5fe316e9a1c1a..73a05888e384f 100644 --- a/server/src/main/java/org/opensearch/search/query/rewriters/BooleanFlatteningRewriter.java +++ b/server/src/main/java/org/opensearch/search/query/rewriters/BooleanFlatteningRewriter.java @@ -130,10 +130,9 @@ private void flattenClauses(List clauses, BoolQueryBuilder target, BoolQueryBuilder nestedBool = (BoolQueryBuilder) clause; if (canFlatten(nestedBool, clauseType)) { - // Flatten the nested bool query by extracting its clauses + // General flattening for same-clause-type nesting List nestedClauses = getClausesForType(nestedBool, clauseType); for (QueryBuilder nestedClause : nestedClauses) { - // Recursively flatten if needed if (nestedClause instanceof BoolQueryBuilder) { nestedClause = flattenBoolQuery((BoolQueryBuilder) nestedClause); } @@ -160,6 +159,11 @@ private boolean canFlatten(BoolQueryBuilder nestedBool, ClauseType parentType) { return false; } + // Never flatten under MUST_NOT to preserve semantics and avoid non-idempotent transforms + if (parentType == ClauseType.MUST_NOT) { + return false; + } + // Check if only has clauses matching parent type switch (parentType) { case MUST: @@ -178,11 +182,6 @@ private boolean canFlatten(BoolQueryBuilder nestedBool, ClauseType parentType) { && !nestedBool.should().isEmpty() && nestedBool.mustNot().isEmpty() && nestedBool.minimumShouldMatch() == null; - case MUST_NOT: - return nestedBool.must().isEmpty() - && nestedBool.filter().isEmpty() - && nestedBool.should().isEmpty() - && !nestedBool.mustNot().isEmpty(); default: return false; } diff --git a/server/src/test/java/org/opensearch/search/query/QueryRewriterRegistryTests.java b/server/src/test/java/org/opensearch/search/query/QueryRewriterRegistryTests.java index 64cb2ff5efb27..b376168e3ad14 100644 --- a/server/src/test/java/org/opensearch/search/query/QueryRewriterRegistryTests.java +++ b/server/src/test/java/org/opensearch/search/query/QueryRewriterRegistryTests.java @@ -11,7 +11,6 @@ import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; import org.opensearch.index.query.BoolQueryBuilder; -import org.opensearch.index.query.MatchAllQueryBuilder; import org.opensearch.index.query.QueryBuilder; import org.opensearch.index.query.QueryBuilders; import org.opensearch.index.query.QueryShardContext; @@ -93,12 +92,45 @@ public void testDisabledRewriting() { assertNotSame(query, rewritten2); } + public void testDynamicTermsThresholdUpdate() { + // Build a query at threshold=16 that merges, then raise threshold to 32 and assert no merge + BoolQueryBuilder query = QueryBuilders.boolQuery(); + for (int i = 0; i < 16; i++) { + query.filter(QueryBuilders.termQuery("f", "v" + i)); + } + + QueryBuilder merged = QueryRewriterRegistry.INSTANCE.rewrite(query, context); + BoolQueryBuilder mb = (BoolQueryBuilder) merged; + // Should be one terms query + assertEquals(1, mb.filter().size()); + + // Increase threshold + Settings newSettings = Settings.builder().put(SearchService.QUERY_REWRITING_TERMS_THRESHOLD_SETTING.getKey(), 32).build(); + QueryRewriterRegistry.INSTANCE.initialize(newSettings, new ClusterSettings(newSettings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS)); + + QueryBuilder notMerged = QueryRewriterRegistry.INSTANCE.rewrite(query, context); + BoolQueryBuilder nmb = (BoolQueryBuilder) notMerged; + // Should be all individual term queries now + assertEquals(16, nmb.filter().size()); + } + public void testNullQuery() { // Null query should return null QueryBuilder rewritten = QueryRewriterRegistry.INSTANCE.rewrite(null, context); assertNull(rewritten); } + public void testRegistryIdempotence() { + QueryBuilder q = QueryBuilders.boolQuery() + .must(QueryBuilders.boolQuery().must(QueryBuilders.termQuery("f", "v"))) + .filter(QueryBuilders.boolQuery().filter(QueryBuilders.termQuery("g", "w"))) + .should(QueryBuilders.termQuery("h", "u")); + + QueryBuilder once = QueryRewriterRegistry.INSTANCE.rewrite(q, context); + QueryBuilder twice = QueryRewriterRegistry.INSTANCE.rewrite(once, context); + assertEquals(once.toString(), twice.toString()); + } + public void testRewriterPriorityOrder() { // Test that rewriters are applied in correct order // Create a query that will be affected by multiple rewriters @@ -114,13 +146,12 @@ public void testRewriterPriorityOrder() { assertThat(rewritten, instanceOf(BoolQueryBuilder.class)); BoolQueryBuilder rewrittenBool = (BoolQueryBuilder) rewritten; - // Should be flattened first, match_all kept in must (scoring context), but terms NOT merged in must context - assertThat(rewrittenBool.must().size(), equalTo(3)); // match_all + 2 term queries - // Check that we have one match_all and two term queries (order may vary) - long matchAllCount = rewrittenBool.must().stream().filter(q -> q instanceof MatchAllQueryBuilder).count(); + // Should be flattened first. Match_all may be removed depending on context; terms NOT merged in must context long termCount = rewrittenBool.must().stream().filter(q -> q instanceof TermQueryBuilder).count(); - assertThat(matchAllCount, equalTo(1L)); assertThat(termCount, equalTo(2L)); + + // Composition order smoke check: must_to_filter should not move term queries in must + // terms_merging should not merge terms in must; only in filter/should contexts } public void testComplexRealWorldQuery() { @@ -147,8 +178,8 @@ public void testComplexRealWorldQuery() { // After rewriting: // - The nested bool in must clause should be flattened - // - match_all should be removed - // - term queries should be merged into terms query + // - match_all should be removed in non-scoring contexts + // - term queries should be merged into terms query in filter contexts // - The filter bool with brand terms should be preserved // - The range query should be moved from must to filter by MustToFilterRewriter diff --git a/server/src/test/java/org/opensearch/search/query/rewriters/BooleanFlatteningRewriterTests.java b/server/src/test/java/org/opensearch/search/query/rewriters/BooleanFlatteningRewriterTests.java index 317d753493cf1..075c6a964bd78 100644 --- a/server/src/test/java/org/opensearch/search/query/rewriters/BooleanFlatteningRewriterTests.java +++ b/server/src/test/java/org/opensearch/search/query/rewriters/BooleanFlatteningRewriterTests.java @@ -102,6 +102,64 @@ public void testMustNotClauseNoFlattening() { assertThat(rewrittenBool.mustNot().get(0), instanceOf(BoolQueryBuilder.class)); } + public void testDoubleNegationNotFlattenedUnderMustNot() { + // not( bool( must_not: [ term ] ) ) should NOT be flattened by the rewriter + QueryBuilder inner = QueryBuilders.boolQuery().mustNot(QueryBuilders.termQuery("product", "Oranges")); + QueryBuilder query = QueryBuilders.boolQuery().mustNot(inner); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertThat(rewritten, instanceOf(BoolQueryBuilder.class)); + BoolQueryBuilder rewrittenBool = (BoolQueryBuilder) rewritten; + + // Outer must_not remains and inner bool is preserved + assertThat(rewrittenBool.mustNot().size(), equalTo(1)); + assertThat(rewrittenBool.mustNot().get(0), instanceOf(BoolQueryBuilder.class)); + } + + public void testDeMorganPatternNotFlattenedUnderMustNot() { + // not( bool( must: [A, B] ) ) should not be flattened by BooleanFlatteningRewriter + QueryBuilder inner = QueryBuilders.boolQuery().must(QueryBuilders.termQuery("f", "A")).must(QueryBuilders.termQuery("f", "B")); + QueryBuilder query = QueryBuilders.boolQuery().mustNot(inner); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertThat(rewritten, instanceOf(BoolQueryBuilder.class)); + BoolQueryBuilder rewrittenBool = (BoolQueryBuilder) rewritten; + assertThat(rewrittenBool.mustNot().size(), equalTo(1)); + assertThat(rewrittenBool.mustNot().get(0), instanceOf(BoolQueryBuilder.class)); + } + + public void testRandomizedMustNotInnerNotFlattened() { + // Build inner bool with random number of must_not terms + int n = between(1, 5); + BoolQueryBuilder inner = QueryBuilders.boolQuery(); + for (int i = 0; i < n; i++) { + inner.mustNot(QueryBuilders.termQuery("p", "v" + i)); + } + QueryBuilder query = QueryBuilders.boolQuery().mustNot(inner); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertThat(rewritten, instanceOf(BoolQueryBuilder.class)); + BoolQueryBuilder rewrittenBool = (BoolQueryBuilder) rewritten; + assertThat(rewrittenBool.mustNot().size(), equalTo(1)); + assertThat(rewrittenBool.mustNot().get(0), instanceOf(BoolQueryBuilder.class)); + } + + public void testTopLevelPropertiesPreserved() { + BoolQueryBuilder query = QueryBuilders.boolQuery() + .queryName("qn") + .boost(2.0f) + .minimumShouldMatch(2) + .must(QueryBuilders.boolQuery().must(QueryBuilders.termQuery("field", "v"))) + .should(QueryBuilders.boolQuery().should(QueryBuilders.termQuery("f2", "v2"))); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertThat(rewritten, instanceOf(BoolQueryBuilder.class)); + BoolQueryBuilder rewrittenBool = (BoolQueryBuilder) rewritten; + assertThat(rewrittenBool.queryName(), equalTo("qn")); + assertThat(rewrittenBool.boost(), equalTo(2.0f)); + assertThat(rewrittenBool.minimumShouldMatch(), equalTo("2")); + } + @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/18906") public void testDeepNesting() { // TODO: This test expects complete flattening of deeply nested bool queries @@ -140,6 +198,53 @@ public void testMixedClauseTypes() { assertSame(query, rewritten); // Should not flatten due to different minimumShouldMatch } + public void testDoNotFlattenWhenNestedHasNonDefaultBoost() { + // Nested bool with non-default boost should not be flattened + BoolQueryBuilder nested = QueryBuilders.boolQuery().boost(2.0f).must(QueryBuilders.termQuery("field", "value")); + BoolQueryBuilder query = QueryBuilders.boolQuery().must(nested); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertThat(rewritten, instanceOf(BoolQueryBuilder.class)); + BoolQueryBuilder outer = (BoolQueryBuilder) rewritten; + assertThat(outer.must().size(), equalTo(1)); + assertThat(outer.must().get(0), instanceOf(BoolQueryBuilder.class)); + BoolQueryBuilder preserved = (BoolQueryBuilder) outer.must().get(0); + assertThat(preserved.boost(), equalTo(2.0f)); + assertThat(preserved.must().size(), equalTo(1)); + } + + public void testDoNotFlattenWhenNestedHasQueryName() { + // Nested bool with queryName should not be flattened + BoolQueryBuilder nested = QueryBuilders.boolQuery().queryName("inner").must(QueryBuilders.termQuery("f", "v")); + BoolQueryBuilder query = QueryBuilders.boolQuery().must(nested); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertThat(rewritten, instanceOf(BoolQueryBuilder.class)); + BoolQueryBuilder outer = (BoolQueryBuilder) rewritten; + assertThat(outer.must().size(), equalTo(1)); + assertThat(outer.must().get(0), instanceOf(BoolQueryBuilder.class)); + BoolQueryBuilder preserved = (BoolQueryBuilder) outer.must().get(0); + assertThat(preserved.queryName(), equalTo("inner")); + } + + public void testShouldClauseNotFlattenedWhenNestedHasMinimumShouldMatch() { + // Nested should with MSM should not be flattened + BoolQueryBuilder nested = QueryBuilders.boolQuery() + .should(QueryBuilders.termQuery("f1", "v1")) + .should(QueryBuilders.termQuery("f2", "v2")) + .minimumShouldMatch(1); + BoolQueryBuilder query = QueryBuilders.boolQuery().should(nested).must(QueryBuilders.termQuery("g", "w")); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertThat(rewritten, instanceOf(BoolQueryBuilder.class)); + BoolQueryBuilder outer = (BoolQueryBuilder) rewritten; + assertThat(outer.should().size(), equalTo(1)); + assertThat(outer.should().get(0), instanceOf(BoolQueryBuilder.class)); + BoolQueryBuilder preserved = (BoolQueryBuilder) outer.should().get(0); + assertThat(preserved.minimumShouldMatch(), equalTo("1")); + assertThat(outer.must().size(), equalTo(1)); + } + public void testEmptyBooleanQuery() { // Empty boolean query should not cause issues QueryBuilder query = QueryBuilders.boolQuery(); @@ -179,4 +284,15 @@ public void testQueryNamePreservation() { BoolQueryBuilder result = (BoolQueryBuilder) rewritten; assertThat(result.queryName(), equalTo("outer")); } + + public void testIdempotence() { + // After one rewrite, a second rewrite should be a no-op (structurally identical) + QueryBuilder query = QueryBuilders.boolQuery() + .must(QueryBuilders.boolQuery().must(QueryBuilders.termQuery("f", "v"))) + .filter(QueryBuilders.boolQuery().filter(QueryBuilders.termQuery("g", "w"))); + + QueryBuilder once = rewriter.rewrite(query, context); + QueryBuilder twice = rewriter.rewrite(once, context); + assertEquals(once.toString(), twice.toString()); + } } diff --git a/server/src/test/java/org/opensearch/search/query/rewriters/MatchAllRemovalRewriterTests.java b/server/src/test/java/org/opensearch/search/query/rewriters/MatchAllRemovalRewriterTests.java index 6f2c6cf93133d..e6b039c92dae3 100644 --- a/server/src/test/java/org/opensearch/search/query/rewriters/MatchAllRemovalRewriterTests.java +++ b/server/src/test/java/org/opensearch/search/query/rewriters/MatchAllRemovalRewriterTests.java @@ -97,6 +97,33 @@ public void testMultipleMatchAllQueries() { assertThat(rewrittenBool.filter().size(), equalTo(0)); } + public void testMustMatchAllRemovedWhenFilterPresent() { + // must contains match_all + term, and there is a filter clause => remove match_all from must + QueryBuilder query = QueryBuilders.boolQuery() + .must(QueryBuilders.matchAllQuery()) + .must(QueryBuilders.termQuery("field", "v")) + .filter(QueryBuilders.termQuery("f2", "x")); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertThat(rewritten, instanceOf(BoolQueryBuilder.class)); + BoolQueryBuilder rewrittenBool = (BoolQueryBuilder) rewritten; + // match_all removed from must because non-scoring context exists (filter present) + assertThat(rewrittenBool.must().size(), equalTo(1)); + } + + public void testNestedFilterMatchAllRemoved() { + // match_all inside nested bool under filter should be removed + QueryBuilder nested = QueryBuilders.boolQuery().filter(QueryBuilders.matchAllQuery()).filter(QueryBuilders.termQuery("a", "b")); + QueryBuilder query = QueryBuilders.boolQuery().filter(nested); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + assertThat(rewritten, instanceOf(BoolQueryBuilder.class)); + BoolQueryBuilder rewrittenBool = (BoolQueryBuilder) rewritten; + BoolQueryBuilder nestedRewritten = (BoolQueryBuilder) rewrittenBool.filter().get(0); + // Only the real filter remains + assertThat(nestedRewritten.filter().size(), equalTo(1)); + } + public void testNestedBooleanWithMatchAll() { // Nested boolean queries should also have match_all removed QueryBuilder nested = QueryBuilders.boolQuery() diff --git a/server/src/test/java/org/opensearch/search/query/rewriters/MustNotToShouldRewriterTests.java b/server/src/test/java/org/opensearch/search/query/rewriters/MustNotToShouldRewriterTests.java index 4fd21d9ad601e..224b4bc3a0c3f 100644 --- a/server/src/test/java/org/opensearch/search/query/rewriters/MustNotToShouldRewriterTests.java +++ b/server/src/test/java/org/opensearch/search/query/rewriters/MustNotToShouldRewriterTests.java @@ -134,6 +134,38 @@ public void testNumericTermQueryRewritten() { assertThat(nestedBool.minimumShouldMatch(), equalTo("1")); } + public void testIdempotence() { + QueryBuilder query = QueryBuilders.boolQuery().mustNot(QueryBuilders.termQuery("status", 3)); + QueryBuilder once = rewriter.rewrite(query, context); + QueryBuilder twice = rewriter.rewrite(once, context); + assertEquals(once.toString(), twice.toString()); + } + + public void testMultiValuedNumericFieldNotRewritten() throws Exception { + // Create an index where a field has multiple values per doc + Directory multiDir = newDirectory(); + IndexWriter writer = new IndexWriter(multiDir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); + Document doc = new Document(); + doc.add(new IntPoint("multi_age", 10)); + doc.add(new IntPoint("multi_age", 20)); + writer.addDocument(doc); + writer.close(); + + IndexReader multiReader = DirectoryReader.open(multiDir); + when(context.getIndexReader()).thenReturn(multiReader); + // numeric field type mapping + NumberFieldMapper.NumberFieldType intFieldType = mock(NumberFieldMapper.NumberFieldType.class); + when(context.fieldMapper("multi_age")).thenReturn(intFieldType); + + QueryBuilder query = QueryBuilders.boolQuery().mustNot(QueryBuilders.rangeQuery("multi_age").gte(15)); + QueryBuilder rewritten = rewriter.rewrite(query, context); + // Should not rewrite because docs can have multiple values + assertSame(query, rewritten); + + multiReader.close(); + multiDir.close(); + } + public void testNumericTermsQueryRewritten() { // Test that must_not terms query on numeric field is rewritten QueryBuilder query = QueryBuilders.boolQuery().mustNot(QueryBuilders.termsQuery("status", new Object[] { 1, 2, 3 })); diff --git a/server/src/test/java/org/opensearch/search/query/rewriters/MustToFilterRewriterTests.java b/server/src/test/java/org/opensearch/search/query/rewriters/MustToFilterRewriterTests.java index 35e289813acff..57e48257c3bb7 100644 --- a/server/src/test/java/org/opensearch/search/query/rewriters/MustToFilterRewriterTests.java +++ b/server/src/test/java/org/opensearch/search/query/rewriters/MustToFilterRewriterTests.java @@ -202,6 +202,36 @@ public void testNestedBooleanQueriesRewritten() { assertThat(nestedRewritten.must().get(0), instanceOf(TermQueryBuilder.class)); } + public void testBoostedNumericQueriesMovedToFilter() { + // Even with boosts, numeric queries should be moved (boost irrelevant in filter) + QueryBuilder query = QueryBuilders.boolQuery() + .must(QueryBuilders.termQuery("age", 42).boost(3.0f)) + .must(QueryBuilders.rangeQuery("price").gte(10).boost(1.5f)) + .must(QueryBuilders.matchQuery("name", "foo").boost(2.0f)); + + QueryBuilder rewritten = rewriter.rewrite(query, context); + BoolQueryBuilder b = (BoolQueryBuilder) rewritten; + // two moved + assertThat(b.filter().size(), equalTo(2)); + // text match remains + assertThat(b.must().size(), equalTo(1)); + assertThat(b.must().get(0), instanceOf(MatchQueryBuilder.class)); + } + + public void testNoContextDoesNotMoveNumericTerms() { + // Without context, numeric term/terms cannot be identified; range still moves + QueryBuilder query = QueryBuilders.boolQuery() + .must(QueryBuilders.termQuery("user_id", 7)) + .must(QueryBuilders.rangeQuery("price").gte(1)); + + QueryBuilder rewritten = rewriter.rewrite(query, null); + BoolQueryBuilder b = (BoolQueryBuilder) rewritten; + // Range moved, term stayed + assertThat(b.filter().size(), equalTo(1)); + assertThat(b.must().size(), equalTo(1)); + assertThat(b.must().get(0), instanceOf(TermQueryBuilder.class)); + } + public void testBoolQueryPropertiesPreserved() { // All bool query properties should be preserved QueryBuilder query = QueryBuilders.boolQuery() diff --git a/server/src/test/java/org/opensearch/search/query/rewriters/TermsMergingRewriterTests.java b/server/src/test/java/org/opensearch/search/query/rewriters/TermsMergingRewriterTests.java index 085f2c72c67c9..8b261ec9a6f29 100644 --- a/server/src/test/java/org/opensearch/search/query/rewriters/TermsMergingRewriterTests.java +++ b/server/src/test/java/org/opensearch/search/query/rewriters/TermsMergingRewriterTests.java @@ -56,6 +56,34 @@ public void testTermMergingAboveThreshold() { assertThat(termsQuery.values().size(), equalTo(20)); } + public void testShouldMergingWithThreshold() { + // Ensure should clauses merge when exceeding threshold + BoolQueryBuilder query = QueryBuilders.boolQuery(); + for (int i = 0; i < 17; i++) { + query.should(QueryBuilders.termQuery("tag", "t" + i)); + } + + QueryBuilder rewritten = rewriter.rewrite(query, context); + BoolQueryBuilder b = (BoolQueryBuilder) rewritten; + assertThat(b.should().size(), equalTo(1)); + assertThat(b.should().get(0), instanceOf(TermsQueryBuilder.class)); + } + + public void testMixedFieldsAboveThresholdOnlyMergesPerField() { + BoolQueryBuilder query = QueryBuilders.boolQuery(); + for (int i = 0; i < 18; i++) { + query.filter(QueryBuilders.termQuery("f1", "v" + i)); + } + for (int i = 0; i < 5; i++) { + query.filter(QueryBuilders.termQuery("f2", "v" + i)); + } + + QueryBuilder rewritten = rewriter.rewrite(query, context); + BoolQueryBuilder b = (BoolQueryBuilder) rewritten; + // one terms for f1, and 5 single terms for f2 + assertThat(b.filter().size(), equalTo(6)); + } + public void testMustClauseNoMerging() { // Term queries in must clauses should NOT be merged (different semantics) QueryBuilder query = QueryBuilders.boolQuery()