diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index 9f78b245942..f4b9abe8330 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -380,8 +380,7 @@ public LogicalPlan visitRareTopN(RareTopN node, AnalysisContext context) { fields.forEach( field -> newEnv.define(new Symbol(Namespace.FIELD_NAME, field.toString()), field.type())); - List options = node.getArguments(); - Integer noOfResults = (Integer) options.get(0).getValue().getValue(); + Integer noOfResults = node.getNoOfResults(); return new LogicalRareTopN(child, node.getCommandType(), noOfResults, fields, groupBys); } diff --git a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java index 86b2343ace1..67cc893c5b0 100644 --- a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java +++ b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java @@ -540,8 +540,16 @@ public static RareTopN rareTopN( List noOfResults, List groupList, Field... fields) { - return new RareTopN(input, commandType, noOfResults, Arrays.asList(fields), groupList) - .attach(input); + Integer N = + (Integer) + Argument.ArgumentMap.of(noOfResults) + .getOrDefault("noOfResults", new Literal(10, DataType.INTEGER)) + .getValue(); + List removed = + noOfResults.stream() + .filter(argument -> !argument.getArgName().equals("noOfResults")) + .toList(); + return new RareTopN(commandType, N, removed, Arrays.asList(fields), groupList).attach(input); } public static Limit limit(UnresolvedPlan input, Integer limit, Integer offset) { diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/Argument.java b/core/src/main/java/org/opensearch/sql/ast/expression/Argument.java index 0e0e032e22b..607e27b7de9 100644 --- a/core/src/main/java/org/opensearch/sql/ast/expression/Argument.java +++ b/core/src/main/java/org/opensearch/sql/ast/expression/Argument.java @@ -37,6 +37,8 @@ public R accept(AbstractNodeVisitor nodeVisitor, C context) { } /** ArgumentMap is a helper class to get argument value by name. */ + @EqualsAndHashCode + @ToString public static class ArgumentMap { private final Map map; diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/RareTopN.java b/core/src/main/java/org/opensearch/sql/ast/tree/RareTopN.java index 3fd3aa3a2c0..6c543ddc8c3 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/RareTopN.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/RareTopN.java @@ -7,7 +7,6 @@ import com.google.common.collect.ImmutableList; import java.util.List; -import lombok.AllArgsConstructor; import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.RequiredArgsConstructor; @@ -24,12 +23,11 @@ @ToString @EqualsAndHashCode(callSuper = false) @RequiredArgsConstructor -@AllArgsConstructor public class RareTopN extends UnresolvedPlan { private UnresolvedPlan child; private final CommandType commandType; - // arguments: noOfResults: Integer, countField: String, showCount: Boolean + private final Integer noOfResults; private final List arguments; private final List fields; private final List groupExprList; @@ -54,4 +52,10 @@ public enum CommandType { TOP, RARE } + + public enum Option { + countField, + showCount, + useNull, + } } diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 2c90f059986..67151519274 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -15,9 +15,9 @@ import static org.opensearch.sql.ast.tree.Sort.SortOrder.ASC; import static org.opensearch.sql.ast.tree.Sort.SortOrder.DESC; import static org.opensearch.sql.calcite.utils.PlanUtils.ROW_NUMBER_COLUMN_FOR_DEDUP; -import static org.opensearch.sql.calcite.utils.PlanUtils.ROW_NUMBER_COLUMN_NAME; import static org.opensearch.sql.calcite.utils.PlanUtils.ROW_NUMBER_COLUMN_NAME_MAIN; import static org.opensearch.sql.calcite.utils.PlanUtils.ROW_NUMBER_COLUMN_NAME_SUBSEARCH; +import static org.opensearch.sql.calcite.utils.PlanUtils.ROW_NUMBER_COLUMN_NAME_TOP_RARE; import static org.opensearch.sql.calcite.utils.PlanUtils.getRelation; import static org.opensearch.sql.calcite.utils.PlanUtils.getRexCall; import static org.opensearch.sql.calcite.utils.PlanUtils.transformPlanToAttachChild; @@ -1128,22 +1128,7 @@ public RelNode visitAggregation(Aggregation node, CalcitePlanContext context) { Pair, List> aggregationAttributes = aggregateWithTrimming(groupExprList, aggExprList, context); if (toAddHintsOnAggregate) { - final RelHint statHits = - RelHint.builder("stats_args").hintOption(Argument.BUCKET_NULLABLE, "false").build(); - assert context.relBuilder.peek() instanceof LogicalAggregate - : "Stats hits should be added to LogicalAggregate"; - context.relBuilder.hints(statHits); - context - .relBuilder - .getCluster() - .setHintStrategies( - HintStrategyTable.builder() - .hintStrategy( - "stats_args", - (hint, rel) -> { - return rel instanceof LogicalAggregate; - }) - .build()); + addIgnoreNullBucketHintToAggregate(context); } // schema reordering @@ -1862,9 +1847,8 @@ public RelNode visitKmeans(Kmeans node, CalcitePlanContext context) { @Override public RelNode visitRareTopN(RareTopN node, CalcitePlanContext context) { visitChildren(node, context); - - ArgumentMap arguments = ArgumentMap.of(node.getArguments()); - String countFieldName = (String) arguments.get("countField").getValue(); + ArgumentMap argumentMap = ArgumentMap.of(node.getArguments()); + String countFieldName = (String) argumentMap.get(RareTopN.Option.countField.name()).getValue(); if (context.relBuilder.peek().getRowType().getFieldNames().contains(countFieldName)) { throw new IllegalArgumentException( "Field `" @@ -1879,8 +1863,27 @@ public RelNode visitRareTopN(RareTopN node, CalcitePlanContext context) { groupExprList.addAll(fieldList); List aggExprList = List.of(AstDSL.alias(countFieldName, AstDSL.aggregate("count", null))); + + // if usenull=false, add a isNotNull before Aggregate and the hint to this Aggregate + Boolean bucketNullable = (Boolean) argumentMap.get(RareTopN.Option.useNull.name()).getValue(); + boolean toAddHintsOnAggregate = false; + if (!bucketNullable && !groupExprList.isEmpty()) { + toAddHintsOnAggregate = true; + // add isNotNull filter before aggregation to filter out null bucket + List groupByList = + groupExprList.stream().map(expr -> rexVisitor.analyze(expr, context)).toList(); + context.relBuilder.filter( + PlanUtils.getSelectColumns(groupByList).stream() + .map(context.relBuilder::field) + .map(context.relBuilder::isNotNull) + .toList()); + } aggregateWithTrimming(groupExprList, aggExprList, context); + if (toAddHintsOnAggregate) { + addIgnoreNullBucketHintToAggregate(context); + } + // 2. add a window column List partitionKeys = rexVisitor.analyze(node.getGroupExprList(), context); RexNode countField; @@ -1899,26 +1902,46 @@ public RelNode visitRareTopN(RareTopN node, CalcitePlanContext context) { List.of(countField), WindowFrame.toCurrentRow()); context.relBuilder.projectPlus( - context.relBuilder.alias(rowNumberWindowOver, ROW_NUMBER_COLUMN_NAME)); + context.relBuilder.alias(rowNumberWindowOver, ROW_NUMBER_COLUMN_NAME_TOP_RARE)); // 3. filter row_number() <= k in each partition - Integer N = (Integer) arguments.get("noOfResults").getValue(); + int k = node.getNoOfResults(); context.relBuilder.filter( context.relBuilder.lessThanOrEqual( - context.relBuilder.field(ROW_NUMBER_COLUMN_NAME), context.relBuilder.literal(N))); + context.relBuilder.field(ROW_NUMBER_COLUMN_NAME_TOP_RARE), + context.relBuilder.literal(k))); // 4. project final output. the default output is group by list + field list - Boolean showCount = (Boolean) arguments.get("showCount").getValue(); + Boolean showCount = (Boolean) argumentMap.get(RareTopN.Option.showCount.name()).getValue(); if (showCount) { - context.relBuilder.projectExcept(context.relBuilder.field(ROW_NUMBER_COLUMN_NAME)); + context.relBuilder.projectExcept(context.relBuilder.field(ROW_NUMBER_COLUMN_NAME_TOP_RARE)); } else { context.relBuilder.projectExcept( - context.relBuilder.field(ROW_NUMBER_COLUMN_NAME), + context.relBuilder.field(ROW_NUMBER_COLUMN_NAME_TOP_RARE), context.relBuilder.field(countFieldName)); } return context.relBuilder.peek(); } + private static void addIgnoreNullBucketHintToAggregate(CalcitePlanContext context) { + final RelHint statHits = + RelHint.builder("stats_args").hintOption(Argument.BUCKET_NULLABLE, "false").build(); + assert context.relBuilder.peek() instanceof LogicalAggregate + : "Stats hits should be added to LogicalAggregate"; + context.relBuilder.hints(statHits); + context + .relBuilder + .getCluster() + .setHintStrategies( + HintStrategyTable.builder() + .hintStrategy( + "stats_args", + (hint, rel) -> { + return rel instanceof LogicalAggregate; + }) + .build()); + } + @Override public RelNode visitTableFunction(TableFunction node, CalcitePlanContext context) { throw new CalciteUnsupportedException("Table function is unsupported in Calcite"); diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java index 153135c5cf8..3e7b97ea1a6 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java @@ -62,7 +62,7 @@ public interface PlanUtils { /** this is only for dedup command, do not reuse it in other command */ String ROW_NUMBER_COLUMN_FOR_DEDUP = "_row_number_dedup_"; - String ROW_NUMBER_COLUMN_NAME = "_row_number_"; + String ROW_NUMBER_COLUMN_NAME_TOP_RARE = "_row_number_top_rare_"; String ROW_NUMBER_COLUMN_NAME_MAIN = "_row_number_main_"; String ROW_NUMBER_COLUMN_NAME_SUBSEARCH = "_row_number_subsearch_"; diff --git a/docs/user/ppl/admin/settings.rst b/docs/user/ppl/admin/settings.rst index 88db0a59bd2..d99cdc6c2d0 100644 --- a/docs/user/ppl/admin/settings.rst +++ b/docs/user/ppl/admin/settings.rst @@ -211,6 +211,7 @@ The behaviours it controlled includes: - The default value of argument ``bucket_nullable`` in ``stats`` command. Check `stats command <../cmd/stats.rst>`_ for details. - The return value of ``divide`` and ``/`` operator. Check `expressions <../functions/expressions.rst>`_ for details. +- The default value of argument ``usenull`` in ``top`` and ``rare`` commands. Check `top command <../cmd/top.rst>`_ and `rare command <../cmd/rare.rst>`_ for details. Example 1 ------- diff --git a/docs/user/ppl/cmd/rare.rst b/docs/user/ppl/cmd/rare.rst index 8d2011cc1b2..d16dc4878dd 100644 --- a/docs/user/ppl/cmd/rare.rst +++ b/docs/user/ppl/cmd/rare.rst @@ -1,6 +1,6 @@ -============= +==== rare -============= +==== .. rubric:: Table of contents @@ -10,13 +10,13 @@ rare Description -============ +=========== | Using ``rare`` command to find the least common tuple of values of all fields in the field list. **Note**: A maximum of 10 results is returned for each distinct tuple of values of the group-by fields. Syntax -============ +====== rare [by-clause] rare [rare-options] [by-clause] ``(available from 3.1.0+)`` @@ -26,10 +26,13 @@ rare [rare-options] [by-clause] ``(available from 3.1.0+)`` * rare-options: optional. options for the rare command. Supported syntax is [countfield=] [showcount=]. * showcount=: optional. whether to create a field in output that represent a count of the tuple of values. Default value is ``true``. * countfield=: optional. the name of the field that contains count. Default value is ``'count'``. +* usenull=: optional (since 3.4.0). whether to output the null value. The default value of ``usenull`` is determined by ``plugins.ppl.syntax.legacy.preferred``: + * When ``plugins.ppl.syntax.legacy.preferred=true``, ``usenull`` defaults to ``true`` + * When ``plugins.ppl.syntax.legacy.preferred=false``, ``usenull`` defaults to ``false`` Example 1: Find the least common values in a field -=========================================== +================================================== The example finds least common gender of all the accounts. @@ -46,7 +49,7 @@ PPL query:: Example 2: Find the least common values organized by gender -==================================================== +=========================================================== The example finds least common age of all the accounts group by gender. @@ -66,12 +69,10 @@ PPL query:: Example 3: Rare command with Calcite enabled ============================================ -The example finds least common gender of all the accounts when ``plugins.calcite.enabled`` is true. - PPL query:: - PPL> source=accounts | rare gender; - fetched row + os> source=accounts | rare gender; + fetched rows / total rows = 2/2 +--------+-------+ | gender | count | |--------+-------| @@ -83,12 +84,10 @@ PPL query:: Example 4: Specify the count field option ========================================= -The example specifies the count field when ``plugins.calcite.enabled`` is true. - PPL query:: - PPL> source=accounts | rare countfield='cnt' gender; - fetched row + os> source=accounts | rare countfield='cnt' gender; + fetched rows / total rows = 2/2 +--------+-----+ | gender | cnt | |--------+-----| @@ -96,6 +95,36 @@ PPL query:: | M | 3 | +--------+-----+ + +Example 5: Specify the usenull field option +=========================================== + +PPL query:: + + os> source=accounts | rare usenull=false email; + fetched rows / total rows = 3/3 + +-----------------------+-------+ + | email | count | + |-----------------------+-------| + | amberduke@pyrami.com | 1 | + | daleadams@boink.com | 1 | + | hattiebond@netagy.com | 1 | + +-----------------------+-------+ + +PPL query:: + + os> source=accounts | rare usenull=true email; + fetched rows / total rows = 4/4 + +-----------------------+-------+ + | email | count | + |-----------------------+-------| + | null | 1 | + | amberduke@pyrami.com | 1 | + | daleadams@boink.com | 1 | + | hattiebond@netagy.com | 1 | + +-----------------------+-------+ + + Limitations =========== The ``rare`` command is not rewritten to OpenSearch DSL, it is only executed on the coordination node. diff --git a/docs/user/ppl/cmd/top.rst b/docs/user/ppl/cmd/top.rst index 5f4bfb9b4b6..a786d7ed9a9 100644 --- a/docs/user/ppl/cmd/top.rst +++ b/docs/user/ppl/cmd/top.rst @@ -1,6 +1,6 @@ -============= +=== top -============= +=== .. rubric:: Table of contents @@ -10,12 +10,12 @@ top Description -============ +=========== | Using ``top`` command to find the most common tuple of values of all fields in the field list. Syntax -============ +====== top [N] [by-clause] top [N] [top-options] [by-clause] ``(available from 3.1.0+)`` @@ -26,10 +26,13 @@ top [N] [top-options] [by-clause] ``(available from 3.1.0+)`` * top-options: optional. options for the top command. Supported syntax is [countfield=] [showcount=]. * showcount=: optional. whether to create a field in output that represent a count of the tuple of values. Default value is ``true``. * countfield=: optional. the name of the field that contains count. Default value is ``'count'``. +* usenull=: optional (since 3.4.0). whether to output the null value. The default value of ``usenull`` is determined by ``plugins.ppl.syntax.legacy.preferred``: + * When ``plugins.ppl.syntax.legacy.preferred=true``, ``usenull`` defaults to ``true`` + * When ``plugins.ppl.syntax.legacy.preferred=false``, ``usenull`` defaults to ``false`` Example 1: Find the most common values in a field -=========================================== +================================================= The example finds most common gender of all the accounts. @@ -45,7 +48,7 @@ PPL query:: +--------+ Example 2: Find the most common values in a field -=========================================== +================================================= The example finds most common gender of all the accounts. @@ -60,7 +63,7 @@ PPL query:: +--------+ Example 2: Find the most common values organized by gender -==================================================== +========================================================== The example finds most common age of all the accounts group by gender. @@ -78,12 +81,10 @@ PPL query:: Example 3: Top command with Calcite enabled =========================================== -The example finds most common gender of all the accounts when ``plugins.calcite.enabled`` is true. - PPL query:: - PPL> source=accounts | top gender; - fetched row + os> source=accounts | top gender; + fetched rows / total rows = 2/2 +--------+-------+ | gender | count | |--------+-------| @@ -95,12 +96,10 @@ PPL query:: Example 4: Specify the count field option ========================================= -The example specifies the count field when ``plugins.calcite.enabled`` is true. - PPL query:: - PPL> source=accounts | top countfield='cnt' gender; - fetched row + os> source=accounts | top countfield='cnt' gender; + fetched rows / total rows = 2/2 +--------+-----+ | gender | cnt | |--------+-----| @@ -108,6 +107,36 @@ PPL query:: | F | 1 | +--------+-----+ + +Example 5: Specify the usenull field option +=========================================== + +PPL query:: + + os> source=accounts | top usenull=false email; + fetched rows / total rows = 3/3 + +-----------------------+-------+ + | email | count | + |-----------------------+-------| + | amberduke@pyrami.com | 1 | + | daleadams@boink.com | 1 | + | hattiebond@netagy.com | 1 | + +-----------------------+-------+ + +PPL query:: + + os> source=accounts | top usenull=true email; + fetched rows / total rows = 4/4 + +-----------------------+-------+ + | email | count | + |-----------------------+-------| + | null | 1 | + | amberduke@pyrami.com | 1 | + | daleadams@boink.com | 1 | + | hattiebond@netagy.com | 1 | + +-----------------------+-------+ + + Limitations =========== The ``top`` command is not rewritten to OpenSearch DSL, it is only executed on the coordination node. diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index 81936ffbfb1..d874b09bdd6 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -22,6 +22,7 @@ import org.junit.Assume; import org.junit.Ignore; import org.junit.Test; +import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.ppl.ExplainIT; public class CalciteExplainIT extends ExplainIT { @@ -1266,6 +1267,60 @@ public void testReplaceCommandExplain() throws IOException { TEST_INDEX_ACCOUNT))); } + @Test + public void testExplainRareCommandUseNull() throws IOException { + String expected = loadExpectedPlan("explain_rare_usenull_false.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + String.format("source=%s | rare 2 usenull=false state by gender", TEST_INDEX_ACCOUNT))); + expected = loadExpectedPlan("explain_rare_usenull_true.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + String.format("source=%s | rare 2 usenull=true state by gender", TEST_INDEX_ACCOUNT))); + withSettings( + Settings.Key.PPL_SYNTAX_LEGACY_PREFERRED, + "false", + () -> { + try { + assertYamlEqualsIgnoreId( + loadExpectedPlan("explain_rare_usenull_false.yaml"), + explainQueryYaml( + String.format("source=%s | rare 2 state by gender", TEST_INDEX_ACCOUNT))); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + } + + @Test + public void testExplainTopCommandUseNull() throws IOException { + String expected = loadExpectedPlan("explain_top_usenull_false.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + String.format("source=%s | top 2 usenull=false state by gender", TEST_INDEX_ACCOUNT))); + expected = loadExpectedPlan("explain_top_usenull_true.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + String.format("source=%s | top 2 usenull=true state by gender", TEST_INDEX_ACCOUNT))); + withSettings( + Settings.Key.PPL_SYNTAX_LEGACY_PREFERRED, + "false", + () -> { + try { + assertYamlEqualsIgnoreId( + loadExpectedPlan("explain_top_usenull_false.yaml"), + explainQueryYaml( + String.format("source=%s | top 2 state by gender", TEST_INDEX_ACCOUNT))); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + } + // Test cases for verifying the fix of https://github.com/opensearch-project/sql/issues/4571 @Test public void testPushDownMinOrMaxAggOnDerivedField() throws IOException { diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteRareCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteRareCommandIT.java index eaf59e09a73..9689b7385bb 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteRareCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteRareCommandIT.java @@ -5,6 +5,15 @@ package org.opensearch.sql.calcite.remote; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK_WITH_NULL_VALUES; +import static org.opensearch.sql.util.MatcherUtils.schema; +import static org.opensearch.sql.util.MatcherUtils.verifyNumOfRows; +import static org.opensearch.sql.util.MatcherUtils.verifySchemaInOrder; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.Test; +import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.ppl.RareCommandIT; public class CalciteRareCommandIT extends RareCommandIT { @@ -12,5 +21,42 @@ public class CalciteRareCommandIT extends RareCommandIT { public void init() throws Exception { super.init(); enableCalcite(); + loadIndex(Index.BANK_WITH_NULL_VALUES); + } + + @Test + public void testRareCommandUseNull() throws IOException { + JSONObject result = + executeQuery(String.format("source=%s | rare age", TEST_INDEX_BANK_WITH_NULL_VALUES)); + verifySchemaInOrder(result, schema("age", "int"), schema("count", "bigint")); + verifyNumOfRows(result, 6); + } + + @Test + public void testRareCommandUseNullFalse() throws IOException { + JSONObject result = + executeQuery( + String.format("source=%s | rare usenull=false age", TEST_INDEX_BANK_WITH_NULL_VALUES)); + verifySchemaInOrder(result, schema("age", "int"), schema("count", "bigint")); + verifyNumOfRows(result, 5); + } + + @Test + public void testRareCommandLegacyFalse() throws IOException { + withSettings( + Settings.Key.PPL_SYNTAX_LEGACY_PREFERRED, + "false", + () -> { + JSONObject result; + try { + result = + executeQuery( + String.format("source=%s | rare age", TEST_INDEX_BANK_WITH_NULL_VALUES)); + } catch (IOException e) { + throw new RuntimeException(e); + } + verifySchemaInOrder(result, schema("age", "int"), schema("count", "bigint")); + verifyNumOfRows(result, 5); + }); } } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTopCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTopCommandIT.java index 76a8b1e49cf..e555576a9cd 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTopCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTopCommandIT.java @@ -5,6 +5,15 @@ package org.opensearch.sql.calcite.remote; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK_WITH_NULL_VALUES; +import static org.opensearch.sql.util.MatcherUtils.schema; +import static org.opensearch.sql.util.MatcherUtils.verifyNumOfRows; +import static org.opensearch.sql.util.MatcherUtils.verifySchemaInOrder; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.Test; +import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.ppl.TopCommandIT; public class CalciteTopCommandIT extends TopCommandIT { @@ -12,5 +21,42 @@ public class CalciteTopCommandIT extends TopCommandIT { public void init() throws Exception { super.init(); enableCalcite(); + loadIndex(Index.BANK_WITH_NULL_VALUES); + } + + @Test + public void testTopCommandUseNull() throws IOException { + JSONObject result = + executeQuery(String.format("source=%s | top age", TEST_INDEX_BANK_WITH_NULL_VALUES)); + verifySchemaInOrder(result, schema("age", "int"), schema("count", "bigint")); + verifyNumOfRows(result, 6); + } + + @Test + public void testTopCommandUseNullFalse() throws IOException { + JSONObject result = + executeQuery( + String.format("source=%s | top usenull=false age", TEST_INDEX_BANK_WITH_NULL_VALUES)); + verifySchemaInOrder(result, schema("age", "int"), schema("count", "bigint")); + verifyNumOfRows(result, 5); + } + + @Test + public void testTopCommandLegacyFalse() throws IOException { + withSettings( + Settings.Key.PPL_SYNTAX_LEGACY_PREFERRED, + "false", + () -> { + JSONObject result; + try { + result = + executeQuery( + String.format("source=%s | top age", TEST_INDEX_BANK_WITH_NULL_VALUES)); + } catch (IOException e) { + throw new RuntimeException(e); + } + verifySchemaInOrder(result, schema("age", "int"), schema("count", "bigint")); + verifyNumOfRows(result, 5); + }); } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_rare_usenull_false.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_rare_usenull_false.yaml new file mode 100644 index 00000000000..4557813e9a2 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_rare_usenull_false.yaml @@ -0,0 +1,15 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(gender=[$0], state=[$1], count=[$2]) + LogicalFilter(condition=[<=($3, 2)]) + LogicalProject(gender=[$0], state=[$1], count=[$2], _row_number_top_rare_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $2)]) + LogicalAggregate(group=[{0, 1}], count=[COUNT()]) + LogicalProject(gender=[$4], state=[$7]) + LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($7))]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[2], expr#5=[<=($t3, $t4)], proj#0..2=[{exprs}], $condition=[$t5]) + EnumerableWindow(window#0=[window(partition {0} order by [2] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count=COUNT())], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":false,"order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_rare_usenull_true.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_rare_usenull_true.yaml new file mode 100644 index 00000000000..58900d698fd --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_rare_usenull_true.yaml @@ -0,0 +1,14 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(gender=[$0], state=[$1], count=[$2]) + LogicalFilter(condition=[<=($3, 2)]) + LogicalProject(gender=[$0], state=[$1], count=[$2], _row_number_top_rare_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $2)]) + LogicalAggregate(group=[{0, 1}], count=[COUNT()]) + LogicalProject(gender=[$4], state=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[2], expr#5=[<=($t3, $t4)], proj#0..2=[{exprs}], $condition=[$t5]) + EnumerableWindow(window#0=[window(partition {0} order by [2] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count=COUNT())], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_top_usenull_false.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_top_usenull_false.yaml new file mode 100644 index 00000000000..cf2820f4097 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_top_usenull_false.yaml @@ -0,0 +1,15 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(gender=[$0], state=[$1], count=[$2]) + LogicalFilter(condition=[<=($3, 2)]) + LogicalProject(gender=[$0], state=[$1], count=[$2], _row_number_top_rare_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $2 DESC)]) + LogicalAggregate(group=[{0, 1}], count=[COUNT()]) + LogicalProject(gender=[$4], state=[$7]) + LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($7))]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[2], expr#5=[<=($t3, $t4)], proj#0..2=[{exprs}], $condition=[$t5]) + EnumerableWindow(window#0=[window(partition {0} order by [2 DESC] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count=COUNT())], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":false,"order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_top_usenull_true.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_top_usenull_true.yaml new file mode 100644 index 00000000000..a12cb33592b --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_top_usenull_true.yaml @@ -0,0 +1,14 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(gender=[$0], state=[$1], count=[$2]) + LogicalFilter(condition=[<=($3, 2)]) + LogicalProject(gender=[$0], state=[$1], count=[$2], _row_number_top_rare_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $2 DESC)]) + LogicalAggregate(group=[{0, 1}], count=[COUNT()]) + LogicalProject(gender=[$4], state=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[2], expr#5=[<=($t3, $t4)], proj#0..2=[{exprs}], $condition=[$t5]) + EnumerableWindow(window#0=[window(partition {0} order by [2 DESC] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count=COUNT())], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_rare_usenull_false.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_rare_usenull_false.yaml new file mode 100644 index 00000000000..5ef75ad3f69 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_rare_usenull_false.yaml @@ -0,0 +1,17 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(gender=[$0], state=[$1], count=[$2]) + LogicalFilter(condition=[<=($3, 2)]) + LogicalProject(gender=[$0], state=[$1], count=[$2], _row_number_top_rare_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $2)]) + LogicalAggregate(group=[{0, 1}], count=[COUNT()]) + LogicalProject(gender=[$4], state=[$7]) + LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($7))]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[2], expr#5=[<=($t3, $t4)], proj#0..2=[{exprs}], $condition=[$t5]) + EnumerableWindow(window#0=[window(partition {0} order by [2] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableAggregate(group=[{4, 7}], count=[COUNT()]) + EnumerableCalc(expr#0..16=[{inputs}], expr#17=[IS NOT NULL($t4)], expr#18=[IS NOT NULL($t7)], expr#19=[AND($t17, $t18)], proj#0..16=[{exprs}], $condition=[$t19]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_rare_usenull_true.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_rare_usenull_true.yaml new file mode 100644 index 00000000000..3b24078b2b5 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_rare_usenull_true.yaml @@ -0,0 +1,15 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(gender=[$0], state=[$1], count=[$2]) + LogicalFilter(condition=[<=($3, 2)]) + LogicalProject(gender=[$0], state=[$1], count=[$2], _row_number_top_rare_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $2)]) + LogicalAggregate(group=[{0, 1}], count=[COUNT()]) + LogicalProject(gender=[$4], state=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[2], expr#5=[<=($t3, $t4)], proj#0..2=[{exprs}], $condition=[$t5]) + EnumerableWindow(window#0=[window(partition {0} order by [2] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableAggregate(group=[{4, 7}], count=[COUNT()]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_top_usenull_false.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_top_usenull_false.yaml new file mode 100644 index 00000000000..352f9851897 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_top_usenull_false.yaml @@ -0,0 +1,17 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(gender=[$0], state=[$1], count=[$2]) + LogicalFilter(condition=[<=($3, 2)]) + LogicalProject(gender=[$0], state=[$1], count=[$2], _row_number_top_rare_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $2 DESC)]) + LogicalAggregate(group=[{0, 1}], count=[COUNT()]) + LogicalProject(gender=[$4], state=[$7]) + LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($7))]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[2], expr#5=[<=($t3, $t4)], proj#0..2=[{exprs}], $condition=[$t5]) + EnumerableWindow(window#0=[window(partition {0} order by [2 DESC] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableAggregate(group=[{4, 7}], count=[COUNT()]) + EnumerableCalc(expr#0..16=[{inputs}], expr#17=[IS NOT NULL($t4)], expr#18=[IS NOT NULL($t7)], expr#19=[AND($t17, $t18)], proj#0..16=[{exprs}], $condition=[$t19]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_top_usenull_true.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_top_usenull_true.yaml new file mode 100644 index 00000000000..43b1ff58b73 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_top_usenull_true.yaml @@ -0,0 +1,15 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(gender=[$0], state=[$1], count=[$2]) + LogicalFilter(condition=[<=($3, 2)]) + LogicalProject(gender=[$0], state=[$1], count=[$2], _row_number_top_rare_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $2 DESC)]) + LogicalAggregate(group=[{0, 1}], count=[COUNT()]) + LogicalProject(gender=[$4], state=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[2], expr#5=[<=($t3, $t4)], proj#0..2=[{exprs}], $condition=[$t5]) + EnumerableWindow(window#0=[window(partition {0} order by [2 DESC] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableAggregate(group=[{4, 7}], count=[COUNT()]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) \ No newline at end of file diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index ba1e4960bb2..c6d28be69a3 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -111,6 +111,7 @@ PARTITIONS: 'PARTITIONS'; ALLNUM: 'ALLNUM'; DELIM: 'DELIM'; BUCKET_NULLABLE: 'BUCKET_NULLABLE'; +USENULL: 'USENULL'; CENTROIDS: 'CENTROIDS'; ITERATIONS: 'ITERATIONS'; DISTANCE_TYPE: 'DISTANCE_TYPE'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index d07af92e93c..3a06b624e47 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -59,8 +59,7 @@ commands | evalCommand | headCommand | binCommand - | topCommand - | rareCommand + | rareTopCommand | grokCommand | parseCommand | spathCommand @@ -309,12 +308,14 @@ logSpanValue : LOG_WITH_BASE # logWithBaseSpan ; -topCommand - : TOP (number = integerLiteral)? (COUNTFIELD EQUAL countfield = stringLiteral)? (SHOWCOUNT EQUAL showcount = booleanLiteral)? fieldList (byClause)? +rareTopCommand + : (TOP | RARE) (number = integerLiteral)? rareTopOption* fieldList (byClause)? ; -rareCommand - : RARE (number = integerLiteral)? (COUNTFIELD EQUAL countfield = stringLiteral)? (SHOWCOUNT EQUAL showcount = booleanLiteral)? fieldList (byClause)? +rareTopOption + : COUNTFIELD EQUAL countField = stringLiteral + | SHOWCOUNT EQUAL showCount = booleanLiteral + | USENULL EQUAL useNull = booleanLiteral ; grokCommand @@ -1454,6 +1455,7 @@ searchableKeyWord | ALLNUM | DELIM | BUCKET_NULLABLE + | USENULL | CENTROIDS | ITERATIONS | DISTANCE_TYPE diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index b7e33246027..8802dcbf3c9 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -696,26 +696,19 @@ private Set getUniqueFieldSet(FieldListContext ctx) { return uniqueFields; } - /** Rare command. */ + /** Rare and Top commands. */ @Override - public UnresolvedPlan visitRareCommand(OpenSearchPPLParser.RareCommandContext ctx) { + public UnresolvedPlan visitRareTopCommand(OpenSearchPPLParser.RareTopCommandContext ctx) { List groupList = ctx.byClause() == null ? emptyList() : getGroupByList(ctx.byClause()); + Integer noOfResults = + ctx.number != null + ? (Integer) ((Literal) expressionBuilder.visitIntegerLiteral(ctx.number)).getValue() + : 10; return new RareTopN( - CommandType.RARE, - ArgumentFactory.getArgumentList(ctx), - getFieldList(ctx.fieldList()), - groupList); - } - - /** Top command. */ - @Override - public UnresolvedPlan visitTopCommand(OpenSearchPPLParser.TopCommandContext ctx) { - List groupList = - ctx.byClause() == null ? emptyList() : getGroupByList(ctx.byClause()); - return new RareTopN( - CommandType.TOP, - ArgumentFactory.getArgumentList(ctx), + ctx.TOP() != null ? CommandType.TOP : CommandType.RARE, + noOfResults, + ArgumentFactory.getArgumentList(ctx, settings), getFieldList(ctx.fieldList()), groupList); } diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java index 8f58e41f5e3..85481da2426 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java @@ -9,11 +9,13 @@ import java.util.Arrays; import java.util.Collections; import java.util.List; +import java.util.Optional; import org.antlr.v4.runtime.ParserRuleContext; import org.opensearch.sql.ast.expression.Argument; import org.opensearch.sql.ast.expression.DataType; import org.opensearch.sql.ast.expression.Literal; import org.opensearch.sql.ast.tree.Join; +import org.opensearch.sql.ast.tree.RareTopN; import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.common.utils.StringUtils; import org.opensearch.sql.exception.SemanticCheckException; @@ -25,10 +27,8 @@ import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.FieldsCommandContext; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.IntegerLiteralContext; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.PrefixSortFieldContext; -import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.RareCommandContext; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.SortFieldContext; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.SuffixSortFieldContext; -import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.TopCommandContext; /** Util class to get all arguments as a list from the PPL command. */ public class ArgumentFactory { @@ -179,42 +179,37 @@ private static Argument getTypeArgument(OpenSearchPPLParser.SortFieldExpressionC } } - /** - * Get list of {@link Argument}. - * - * @param ctx TopCommandContext instance - * @return the list of arguments fetched from the top command - */ - public static List getArgumentList(TopCommandContext ctx) { - return Arrays.asList( - ctx.number != null - ? new Argument("noOfResults", getArgumentValue(ctx.number)) - : new Argument("noOfResults", new Literal(10, DataType.INTEGER)), - ctx.countfield != null - ? new Argument("countField", getArgumentValue(ctx.countfield)) - : new Argument("countField", new Literal("count", DataType.STRING)), - ctx.showcount != null - ? new Argument("showCount", getArgumentValue(ctx.showcount)) - : new Argument("showCount", new Literal(true, DataType.BOOLEAN))); - } - /** * Get list of {@link Argument}. * * @param ctx RareCommandContext instance + * @param settings Settings instance * @return the list of argument with default number of results for the rare command */ - public static List getArgumentList(RareCommandContext ctx) { - return Arrays.asList( - ctx.number != null - ? new Argument("noOfResults", getArgumentValue(ctx.number)) - : new Argument("noOfResults", new Literal(10, DataType.INTEGER)), - ctx.countfield != null - ? new Argument("countField", getArgumentValue(ctx.countfield)) - : new Argument("countField", new Literal("count", DataType.STRING)), - ctx.showcount != null - ? new Argument("showCount", getArgumentValue(ctx.showcount)) - : new Argument("showCount", new Literal(true, DataType.BOOLEAN))); + public static List getArgumentList( + OpenSearchPPLParser.RareTopCommandContext ctx, Settings settings) { + List list = new ArrayList<>(); + Optional opt = + ctx.rareTopOption().stream().filter(op -> op.countField != null).findFirst(); + list.add( + new Argument( + RareTopN.Option.countField.name(), + opt.isPresent() + ? getArgumentValue(opt.get().countField) + : new Literal("count", DataType.STRING))); + opt = ctx.rareTopOption().stream().filter(op -> op.showCount != null).findFirst(); + list.add( + new Argument( + RareTopN.Option.showCount.name(), + opt.isPresent() ? getArgumentValue(opt.get().showCount) : Literal.TRUE)); + opt = ctx.rareTopOption().stream().filter(op -> op.useNull != null).findFirst(); + list.add( + new Argument( + RareTopN.Option.useNull.name(), + opt.isPresent() + ? getArgumentValue(opt.get().useNull) + : legacyPreferred(settings) ? Literal.TRUE : Literal.FALSE)); + return list; } /** diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java index e392a682cef..f8c935175d0 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java @@ -382,14 +382,16 @@ public String visitWindow(Window node, String context) { public String visitRareTopN(RareTopN node, String context) { final String child = node.getChild().get(0).accept(this, context); ArgumentMap arguments = ArgumentMap.of(node.getArguments()); - Integer noOfResults = (Integer) arguments.get("noOfResults").getValue(); - String countField = (String) arguments.get("countField").getValue(); - Boolean showCount = (Boolean) arguments.get("showCount").getValue(); + Integer noOfResults = node.getNoOfResults(); + String countField = (String) arguments.get(RareTopN.Option.countField.name()).getValue(); + Boolean showCount = (Boolean) arguments.get(RareTopN.Option.showCount.name()).getValue(); + Boolean useNull = (Boolean) arguments.get(RareTopN.Option.useNull.name()).getValue(); String fields = visitFieldList(node.getFields()); String group = visitExpressionList(node.getGroupExprList()); String options = isCalciteEnabled(settings) - ? StringUtils.format("countield='%s' showcount=%s ", countField, showCount) + ? StringUtils.format( + "countield='%s' showcount=%s usenull=%s ", countField, showCount, useNull) : ""; return StringUtils.format( "%s | %s %d %s%s", diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLRareTopNTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLRareTopNTest.java index 23dab511671..a4167b432ad 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLRareTopNTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLRareTopNTest.java @@ -26,8 +26,8 @@ public void testRare() { String expectedLogical = "LogicalProject(JOB=[$0], count=[$1])\n" + " LogicalFilter(condition=[<=($2, 10)])\n" - + " LogicalProject(JOB=[$0], count=[$1], _row_number_=[ROW_NUMBER() OVER (ORDER BY" - + " $1)])\n" + + " LogicalProject(JOB=[$0], count=[$1], _row_number_top_rare_=[ROW_NUMBER() OVER" + + " (ORDER BY $1)])\n" + " LogicalAggregate(group=[{0}], count=[COUNT()])\n" + " LogicalProject(JOB=[$2])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; @@ -45,10 +45,10 @@ public void testRare() { String expectedSparkSql = "SELECT `JOB`, `count`\n" + "FROM (SELECT `JOB`, COUNT(*) `count`, ROW_NUMBER() OVER (ORDER BY COUNT(*) NULLS" - + " LAST) `_row_number_`\n" + + " LAST) `_row_number_top_rare_`\n" + "FROM `scott`.`EMP`\n" + "GROUP BY `JOB`) `t1`\n" - + "WHERE `_row_number_` <= 10"; + + "WHERE `_row_number_top_rare_` <= 10"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -59,8 +59,8 @@ public void testRareBy() { String expectedLogical = "LogicalProject(DEPTNO=[$0], JOB=[$1], count=[$2])\n" + " LogicalFilter(condition=[<=($3, 10)])\n" - + " LogicalProject(DEPTNO=[$0], JOB=[$1], count=[$2], _row_number_=[ROW_NUMBER()" - + " OVER (PARTITION BY $0 ORDER BY $2)])\n" + + " LogicalProject(DEPTNO=[$0], JOB=[$1], count=[$2]," + + " _row_number_top_rare_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $2)])\n" + " LogicalAggregate(group=[{0, 1}], count=[COUNT()])\n" + " LogicalProject(DEPTNO=[$7], JOB=[$2])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; @@ -82,10 +82,10 @@ public void testRareBy() { String expectedSparkSql = "SELECT `DEPTNO`, `JOB`, `count`\n" + "FROM (SELECT `DEPTNO`, `JOB`, COUNT(*) `count`, ROW_NUMBER() OVER (PARTITION BY" - + " `DEPTNO` ORDER BY COUNT(*) NULLS LAST) `_row_number_`\n" + + " `DEPTNO` ORDER BY COUNT(*) NULLS LAST) `_row_number_top_rare_`\n" + "FROM `scott`.`EMP`\n" + "GROUP BY `DEPTNO`, `JOB`) `t1`\n" - + "WHERE `_row_number_` <= 10"; + + "WHERE `_row_number_top_rare_` <= 10"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -96,8 +96,8 @@ public void testRareDisableShowCount() { String expectedLogical = "LogicalProject(DEPTNO=[$0], JOB=[$1])\n" + " LogicalFilter(condition=[<=($3, 10)])\n" - + " LogicalProject(DEPTNO=[$0], JOB=[$1], count=[$2], _row_number_=[ROW_NUMBER()" - + " OVER (PARTITION BY $0 ORDER BY $2)])\n" + + " LogicalProject(DEPTNO=[$0], JOB=[$1], count=[$2]," + + " _row_number_top_rare_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $2)])\n" + " LogicalAggregate(group=[{0, 1}], count=[COUNT()])\n" + " LogicalProject(DEPTNO=[$7], JOB=[$2])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; @@ -119,10 +119,10 @@ public void testRareDisableShowCount() { String expectedSparkSql = "SELECT `DEPTNO`, `JOB`\n" + "FROM (SELECT `DEPTNO`, `JOB`, COUNT(*) `count`, ROW_NUMBER() OVER (PARTITION BY" - + " `DEPTNO` ORDER BY COUNT(*) NULLS LAST) `_row_number_`\n" + + " `DEPTNO` ORDER BY COUNT(*) NULLS LAST) `_row_number_top_rare_`\n" + "FROM `scott`.`EMP`\n" + "GROUP BY `DEPTNO`, `JOB`) `t1`\n" - + "WHERE `_row_number_` <= 10"; + + "WHERE `_row_number_top_rare_` <= 10"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -133,8 +133,8 @@ public void testRareCountField() { String expectedLogical = "LogicalProject(DEPTNO=[$0], JOB=[$1], my_cnt=[$2])\n" + " LogicalFilter(condition=[<=($3, 10)])\n" - + " LogicalProject(DEPTNO=[$0], JOB=[$1], my_cnt=[$2], _row_number_=[ROW_NUMBER()" - + " OVER (PARTITION BY $0 ORDER BY $2)])\n" + + " LogicalProject(DEPTNO=[$0], JOB=[$1], my_cnt=[$2]," + + " _row_number_top_rare_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $2)])\n" + " LogicalAggregate(group=[{0, 1}], my_cnt=[COUNT()])\n" + " LogicalProject(DEPTNO=[$7], JOB=[$2])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; @@ -156,10 +156,49 @@ public void testRareCountField() { String expectedSparkSql = "SELECT `DEPTNO`, `JOB`, `my_cnt`\n" + "FROM (SELECT `DEPTNO`, `JOB`, COUNT(*) `my_cnt`, ROW_NUMBER() OVER (PARTITION BY" - + " `DEPTNO` ORDER BY COUNT(*) NULLS LAST) `_row_number_`\n" + + " `DEPTNO` ORDER BY COUNT(*) NULLS LAST) `_row_number_top_rare_`\n" + "FROM `scott`.`EMP`\n" + "GROUP BY `DEPTNO`, `JOB`) `t1`\n" - + "WHERE `_row_number_` <= 10"; + + "WHERE `_row_number_top_rare_` <= 10"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testRareUseNullFalse() { + String ppl = "source=EMP | rare usenull=false JOB by DEPTNO"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(DEPTNO=[$0], JOB=[$1], count=[$2])\n" + + " LogicalFilter(condition=[<=($3, 10)])\n" + + " LogicalProject(DEPTNO=[$0], JOB=[$1], count=[$2]," + + " _row_number_top_rare_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $2)])\n" + + " LogicalAggregate(group=[{0, 1}], count=[COUNT()])\n" + + " LogicalProject(DEPTNO=[$7], JOB=[$2])\n" + + " LogicalFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($2))])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedResult = + "" + + "DEPTNO=20; JOB=MANAGER; count=1\n" + + "DEPTNO=20; JOB=CLERK; count=2\n" + + "DEPTNO=20; JOB=ANALYST; count=2\n" + + "DEPTNO=10; JOB=MANAGER; count=1\n" + + "DEPTNO=10; JOB=CLERK; count=1\n" + + "DEPTNO=10; JOB=PRESIDENT; count=1\n" + + "DEPTNO=30; JOB=MANAGER; count=1\n" + + "DEPTNO=30; JOB=CLERK; count=1\n" + + "DEPTNO=30; JOB=SALESMAN; count=4\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT `DEPTNO`, `JOB`, `count`\n" + + "FROM (SELECT `DEPTNO`, `JOB`, COUNT(*) `count`, ROW_NUMBER() OVER (PARTITION BY" + + " `DEPTNO` ORDER BY COUNT(*) NULLS LAST) `_row_number_top_rare_`\n" + + "FROM `scott`.`EMP`\n" + + "WHERE `DEPTNO` IS NOT NULL AND `JOB` IS NOT NULL\n" + + "GROUP BY `DEPTNO`, `JOB`) `t2`\n" + + "WHERE `_row_number_top_rare_` <= 10"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -182,4 +221,187 @@ public void failWithDuplicatedName() { is("Field `DEPTNO` is existed, change the count field by setting countfield='xyz'")); } } + + @Test + public void testTop() { + String ppl = "source=EMP | top JOB"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(JOB=[$0], count=[$1])\n" + + " LogicalFilter(condition=[<=($2, 10)])\n" + + " LogicalProject(JOB=[$0], count=[$1], _row_number_top_rare_=[ROW_NUMBER() OVER" + + " (ORDER BY $1 DESC)])\n" + + " LogicalAggregate(group=[{0}], count=[COUNT()])\n" + + " LogicalProject(JOB=[$2])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedResult = + "" + + "JOB=SALESMAN; count=4\n" + + "JOB=CLERK; count=4\n" + + "JOB=MANAGER; count=3\n" + + "JOB=ANALYST; count=2\n" + + "JOB=PRESIDENT; count=1\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT `JOB`, `count`\n" + + "FROM (SELECT `JOB`, COUNT(*) `count`, ROW_NUMBER() OVER (ORDER BY COUNT(*) DESC" + + " NULLS FIRST) `_row_number_top_rare_`\n" + + "FROM `scott`.`EMP`\n" + + "GROUP BY `JOB`) `t1`\n" + + "WHERE `_row_number_top_rare_` <= 10"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testTopBy() { + String ppl = "source=EMP | top JOB by DEPTNO"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(DEPTNO=[$0], JOB=[$1], count=[$2])\n" + + " LogicalFilter(condition=[<=($3, 10)])\n" + + " LogicalProject(DEPTNO=[$0], JOB=[$1], count=[$2]," + + " _row_number_top_rare_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $2 DESC)])\n" + + " LogicalAggregate(group=[{0, 1}], count=[COUNT()])\n" + + " LogicalProject(DEPTNO=[$7], JOB=[$2])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedResult = + "" + + "DEPTNO=20; JOB=CLERK; count=2\n" + + "DEPTNO=20; JOB=ANALYST; count=2\n" + + "DEPTNO=20; JOB=MANAGER; count=1\n" + + "DEPTNO=10; JOB=MANAGER; count=1\n" + + "DEPTNO=10; JOB=CLERK; count=1\n" + + "DEPTNO=10; JOB=PRESIDENT; count=1\n" + + "DEPTNO=30; JOB=SALESMAN; count=4\n" + + "DEPTNO=30; JOB=MANAGER; count=1\n" + + "DEPTNO=30; JOB=CLERK; count=1\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT `DEPTNO`, `JOB`, `count`\n" + + "FROM (SELECT `DEPTNO`, `JOB`, COUNT(*) `count`, ROW_NUMBER() OVER (PARTITION BY" + + " `DEPTNO` ORDER BY COUNT(*) DESC NULLS FIRST) `_row_number_top_rare_`\n" + + "FROM `scott`.`EMP`\n" + + "GROUP BY `DEPTNO`, `JOB`) `t1`\n" + + "WHERE `_row_number_top_rare_` <= 10"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testTopDisableShowCount() { + String ppl = "source=EMP | top showcount=false JOB by DEPTNO"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(DEPTNO=[$0], JOB=[$1])\n" + + " LogicalFilter(condition=[<=($3, 10)])\n" + + " LogicalProject(DEPTNO=[$0], JOB=[$1], count=[$2]," + + " _row_number_top_rare_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $2 DESC)])\n" + + " LogicalAggregate(group=[{0, 1}], count=[COUNT()])\n" + + " LogicalProject(DEPTNO=[$7], JOB=[$2])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedResult = + "" + + "DEPTNO=20; JOB=CLERK\n" + + "DEPTNO=20; JOB=ANALYST\n" + + "DEPTNO=20; JOB=MANAGER\n" + + "DEPTNO=10; JOB=MANAGER\n" + + "DEPTNO=10; JOB=CLERK\n" + + "DEPTNO=10; JOB=PRESIDENT\n" + + "DEPTNO=30; JOB=SALESMAN\n" + + "DEPTNO=30; JOB=MANAGER\n" + + "DEPTNO=30; JOB=CLERK\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT `DEPTNO`, `JOB`\n" + + "FROM (SELECT `DEPTNO`, `JOB`, COUNT(*) `count`, ROW_NUMBER() OVER (PARTITION BY" + + " `DEPTNO` ORDER BY COUNT(*) DESC NULLS FIRST) `_row_number_top_rare_`\n" + + "FROM `scott`.`EMP`\n" + + "GROUP BY `DEPTNO`, `JOB`) `t1`\n" + + "WHERE `_row_number_top_rare_` <= 10"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testTopCountField() { + String ppl = "source=EMP | top countfield='my_cnt' JOB by DEPTNO"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(DEPTNO=[$0], JOB=[$1], my_cnt=[$2])\n" + + " LogicalFilter(condition=[<=($3, 10)])\n" + + " LogicalProject(DEPTNO=[$0], JOB=[$1], my_cnt=[$2]," + + " _row_number_top_rare_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $2 DESC)])\n" + + " LogicalAggregate(group=[{0, 1}], my_cnt=[COUNT()])\n" + + " LogicalProject(DEPTNO=[$7], JOB=[$2])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedResult = + "" + + "DEPTNO=20; JOB=CLERK; my_cnt=2\n" + + "DEPTNO=20; JOB=ANALYST; my_cnt=2\n" + + "DEPTNO=20; JOB=MANAGER; my_cnt=1\n" + + "DEPTNO=10; JOB=MANAGER; my_cnt=1\n" + + "DEPTNO=10; JOB=CLERK; my_cnt=1\n" + + "DEPTNO=10; JOB=PRESIDENT; my_cnt=1\n" + + "DEPTNO=30; JOB=SALESMAN; my_cnt=4\n" + + "DEPTNO=30; JOB=MANAGER; my_cnt=1\n" + + "DEPTNO=30; JOB=CLERK; my_cnt=1\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT `DEPTNO`, `JOB`, `my_cnt`\n" + + "FROM (SELECT `DEPTNO`, `JOB`, COUNT(*) `my_cnt`, ROW_NUMBER() OVER (PARTITION BY" + + " `DEPTNO` ORDER BY COUNT(*) DESC NULLS FIRST) `_row_number_top_rare_`\n" + + "FROM `scott`.`EMP`\n" + + "GROUP BY `DEPTNO`, `JOB`) `t1`\n" + + "WHERE `_row_number_top_rare_` <= 10"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testTopUseNullFalse() { + String ppl = "source=EMP | top usenull=false JOB by DEPTNO"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(DEPTNO=[$0], JOB=[$1], count=[$2])\n" + + " LogicalFilter(condition=[<=($3, 10)])\n" + + " LogicalProject(DEPTNO=[$0], JOB=[$1], count=[$2]," + + " _row_number_top_rare_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $2 DESC)])\n" + + " LogicalAggregate(group=[{0, 1}], count=[COUNT()])\n" + + " LogicalProject(DEPTNO=[$7], JOB=[$2])\n" + + " LogicalFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($2))])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedResult = + "" + + "DEPTNO=20; JOB=CLERK; count=2\n" + + "DEPTNO=20; JOB=ANALYST; count=2\n" + + "DEPTNO=20; JOB=MANAGER; count=1\n" + + "DEPTNO=10; JOB=MANAGER; count=1\n" + + "DEPTNO=10; JOB=CLERK; count=1\n" + + "DEPTNO=10; JOB=PRESIDENT; count=1\n" + + "DEPTNO=30; JOB=SALESMAN; count=4\n" + + "DEPTNO=30; JOB=MANAGER; count=1\n" + + "DEPTNO=30; JOB=CLERK; count=1\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT `DEPTNO`, `JOB`, `count`\n" + + "FROM (SELECT `DEPTNO`, `JOB`, COUNT(*) `count`, ROW_NUMBER() OVER (PARTITION BY" + + " `DEPTNO` ORDER BY COUNT(*) DESC NULLS FIRST) `_row_number_top_rare_`\n" + + "FROM `scott`.`EMP`\n" + + "WHERE `DEPTNO` IS NOT NULL AND `JOB` IS NOT NULL\n" + + "GROUP BY `DEPTNO`, `JOB`) `t2`\n" + + "WHERE `_row_number_top_rare_` <= 10"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java index 300c4099e1b..be43b64cfa4 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java @@ -729,7 +729,8 @@ public void testRareCommand() { exprList( argument("noOfResults", intLiteral(10)), argument("countField", stringLiteral("count")), - argument("showCount", booleanLiteral(true))), + argument("showCount", booleanLiteral(true)), + argument("useNull", booleanLiteral(true))), emptyList(), field("a"))); } @@ -744,7 +745,8 @@ public void testRareCommandWithGroupBy() { exprList( argument("noOfResults", intLiteral(10)), argument("countField", stringLiteral("count")), - argument("showCount", booleanLiteral(true))), + argument("showCount", booleanLiteral(true)), + argument("useNull", booleanLiteral(true))), exprList(field("b")), field("a"))); } @@ -759,7 +761,8 @@ public void testRareCommandWithMultipleFields() { exprList( argument("noOfResults", intLiteral(10)), argument("countField", stringLiteral("count")), - argument("showCount", booleanLiteral(true))), + argument("showCount", booleanLiteral(true)), + argument("useNull", booleanLiteral(true))), exprList(field("c")), field("a"), field("b"))); @@ -775,7 +778,8 @@ public void testTopCommandWithN() { exprList( argument("noOfResults", intLiteral(1)), argument("countField", stringLiteral("count")), - argument("showCount", booleanLiteral(true))), + argument("showCount", booleanLiteral(true)), + argument("useNull", booleanLiteral(true))), emptyList(), field("a"))); } @@ -790,7 +794,8 @@ public void testTopCommandWithoutNAndGroupBy() { exprList( argument("noOfResults", intLiteral(10)), argument("countField", stringLiteral("count")), - argument("showCount", booleanLiteral(true))), + argument("showCount", booleanLiteral(true)), + argument("useNull", booleanLiteral(true))), emptyList(), field("a"))); } @@ -805,7 +810,8 @@ public void testTopCommandWithNAndGroupBy() { exprList( argument("noOfResults", intLiteral(1)), argument("countField", stringLiteral("count")), - argument("showCount", booleanLiteral(true))), + argument("showCount", booleanLiteral(true)), + argument("useNull", booleanLiteral(true))), exprList(field("b")), field("a"))); } @@ -820,12 +826,46 @@ public void testTopCommandWithMultipleFields() { exprList( argument("noOfResults", intLiteral(1)), argument("countField", stringLiteral("count")), - argument("showCount", booleanLiteral(true))), + argument("showCount", booleanLiteral(true)), + argument("useNull", booleanLiteral(true))), exprList(field("c")), field("a"), field("b"))); } + @Test + public void testTopCommandWithUseNullFalse() { + assertEqual( + "source=t | top 1 usenull=false a by b", + rareTopN( + relation("t"), + CommandType.TOP, + exprList( + argument("noOfResults", intLiteral(1)), + argument("countField", stringLiteral("count")), + argument("showCount", booleanLiteral(true)), + argument("useNull", booleanLiteral(false))), + exprList(field("b")), + field("a"))); + } + + @Test + public void testTopCommandWithLegacyFalse() { + when(settings.getSettingValue(Key.PPL_SYNTAX_LEGACY_PREFERRED)).thenReturn(false); + assertEqual( + "source=t | top 1 a by b", + rareTopN( + relation("t"), + CommandType.TOP, + exprList( + argument("noOfResults", intLiteral(1)), + argument("countField", stringLiteral("count")), + argument("showCount", booleanLiteral(true)), + argument("useNull", booleanLiteral(false))), + exprList(field("b")), + field("a"))); + } + @Test public void testGrokCommand() { assertEqual( diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index 6de9acacfe1..2f18db5c995 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -311,7 +311,8 @@ public void testTopCommandWithNAndGroupBy() { public void testRareCommandWithGroupByWithCalcite() { when(settings.getSettingValue(Key.CALCITE_ENGINE_ENABLED)).thenReturn(true); assertEquals( - "source=table | rare 10 countield='count' showcount=true identifier by identifier", + "source=table | rare 10 countield='count' showcount=true usenull=true identifier by" + + " identifier", anonymize("source=t | rare a by b")); } @@ -319,7 +320,8 @@ public void testRareCommandWithGroupByWithCalcite() { public void testTopCommandWithNAndGroupByWithCalcite() { when(settings.getSettingValue(Key.CALCITE_ENGINE_ENABLED)).thenReturn(true); assertEquals( - "source=table | top 1 countield='count' showcount=true identifier by identifier", + "source=table | top 1 countield='count' showcount=true usenull=true identifier by" + + " identifier", anonymize("source=t | top 1 a by b")); }