opensearch-project · qianheng-aws · Oct 30, 2025 · Oct 29, 2025 · Oct 29, 2025 · yuancu
@@ -380,8 +380,7 @@ public LogicalPlan visitRareTopN(RareTopN node, AnalysisContext context) {
     fields.forEach(
         field -> newEnv.define(new Symbol(Namespace.FIELD_NAME, field.toString()), field.type()));
 
-    List<Argument> options = node.getArguments();
-    Integer noOfResults = (Integer) options.get(0).getValue().getValue();
+    Integer noOfResults = node.getNoOfResults();
 
     return new LogicalRareTopN(child, node.getCommandType(), noOfResults, fields, groupBys);
   }

@@ -540,8 +540,16 @@ public static RareTopN rareTopN(
       List<Argument> noOfResults,
       List<UnresolvedExpression> groupList,
       Field... fields) {
-    return new RareTopN(input, commandType, noOfResults, Arrays.asList(fields), groupList)
-        .attach(input);
+    Integer N =
+        (Integer)
+            Argument.ArgumentMap.of(noOfResults)
+                .getOrDefault("noOfResults", new Literal(10, DataType.INTEGER))
+                .getValue();
+    List<Argument> removed =
+        noOfResults.stream()
+            .filter(argument -> !argument.getArgName().equals("noOfResults"))
+            .toList();
+    return new RareTopN(commandType, N, removed, Arrays.asList(fields), groupList).attach(input);
   }
 
   public static Limit limit(UnresolvedPlan input, Integer limit, Integer offset) {

@@ -37,6 +37,8 @@ public <R, C> R accept(AbstractNodeVisitor<R, C> nodeVisitor, C context) {
   }
 
   /** ArgumentMap is a helper class to get argument value by name. */
+  @EqualsAndHashCode
+  @ToString
   public static class ArgumentMap {
     private final Map<String, Literal> map;
 

@@ -7,7 +7,6 @@
 
 import com.google.common.collect.ImmutableList;
 import java.util.List;
-import lombok.AllArgsConstructor;
 import lombok.EqualsAndHashCode;
 import lombok.Getter;
 import lombok.RequiredArgsConstructor;
@@ -24,12 +23,11 @@
 @ToString
 @EqualsAndHashCode(callSuper = false)
 @RequiredArgsConstructor
-@AllArgsConstructor
 public class RareTopN extends UnresolvedPlan {
 
   private UnresolvedPlan child;
   private final CommandType commandType;
-  // arguments: noOfResults: Integer, countField: String, showCount: Boolean
+  private final Integer noOfResults;
   private final List<Argument> arguments;
   private final List<Field> fields;
   private final List<UnresolvedExpression> groupExprList;
@@ -54,4 +52,10 @@ public enum CommandType {
     TOP,
     RARE
   }
+
+  public enum Option {
+    countField,
+    showCount,
+    useNull,
+  }
 }
@@ -15,9 +15,9 @@
 import static org.opensearch.sql.ast.tree.Sort.SortOrder.ASC;
 import static org.opensearch.sql.ast.tree.Sort.SortOrder.DESC;
 import static org.opensearch.sql.calcite.utils.PlanUtils.ROW_NUMBER_COLUMN_FOR_DEDUP;
-import static org.opensearch.sql.calcite.utils.PlanUtils.ROW_NUMBER_COLUMN_NAME;
 import static org.opensearch.sql.calcite.utils.PlanUtils.ROW_NUMBER_COLUMN_NAME_MAIN;
 import static org.opensearch.sql.calcite.utils.PlanUtils.ROW_NUMBER_COLUMN_NAME_SUBSEARCH;
+import static org.opensearch.sql.calcite.utils.PlanUtils.ROW_NUMBER_COLUMN_NAME_TOP_RARE;
 import static org.opensearch.sql.calcite.utils.PlanUtils.getRelation;
 import static org.opensearch.sql.calcite.utils.PlanUtils.getRexCall;
 import static org.opensearch.sql.calcite.utils.PlanUtils.transformPlanToAttachChild;
@@ -1128,22 +1128,7 @@ public RelNode visitAggregation(Aggregation node, CalcitePlanContext context) {
     Pair<List<RexNode>, List<AggCall>> aggregationAttributes =
         aggregateWithTrimming(groupExprList, aggExprList, context);
     if (toAddHintsOnAggregate) {
-      final RelHint statHits =
-          RelHint.builder("stats_args").hintOption(Argument.BUCKET_NULLABLE, "false").build();
-      assert context.relBuilder.peek() instanceof LogicalAggregate
-          : "Stats hits should be added to LogicalAggregate";
-      context.relBuilder.hints(statHits);
-      context
-          .relBuilder
-          .getCluster()
-          .setHintStrategies(
-              HintStrategyTable.builder()
-                  .hintStrategy(
-                      "stats_args",
-                      (hint, rel) -> {
-                        return rel instanceof LogicalAggregate;
-                      })
-                  .build());
+      addIgnoreNullBucketHintToAggregate(context);
     }
 
     // schema reordering
@@ -1862,9 +1847,8 @@ public RelNode visitKmeans(Kmeans node, CalcitePlanContext context) {
   @Override
   public RelNode visitRareTopN(RareTopN node, CalcitePlanContext context) {
     visitChildren(node, context);
-
-    ArgumentMap arguments = ArgumentMap.of(node.getArguments());
-    String countFieldName = (String) arguments.get("countField").getValue();
+    ArgumentMap argumentMap = ArgumentMap.of(node.getArguments());
+    String countFieldName = (String) argumentMap.get(RareTopN.Option.countField.name()).getValue();
     if (context.relBuilder.peek().getRowType().getFieldNames().contains(countFieldName)) {
       throw new IllegalArgumentException(
           "Field `"
@@ -1879,8 +1863,27 @@ public RelNode visitRareTopN(RareTopN node, CalcitePlanContext context) {
     groupExprList.addAll(fieldList);
     List<UnresolvedExpression> aggExprList =
         List.of(AstDSL.alias(countFieldName, AstDSL.aggregate("count", null)));
+
+    // if usenull=false, add a isNotNull before Aggregate and the hint to this Aggregate
+    Boolean bucketNullable = (Boolean) argumentMap.get(RareTopN.Option.useNull.name()).getValue();
+    boolean toAddHintsOnAggregate = false;
+    if (!bucketNullable && !groupExprList.isEmpty()) {
+      toAddHintsOnAggregate = true;
+      // add isNotNull filter before aggregation to filter out null bucket
+      List<RexNode> groupByList =
+          groupExprList.stream().map(expr -> rexVisitor.analyze(expr, context)).toList();
+      context.relBuilder.filter(
+          PlanUtils.getSelectColumns(groupByList).stream()
+              .map(context.relBuilder::field)
+              .map(context.relBuilder::isNotNull)
+              .toList());
+    }
     aggregateWithTrimming(groupExprList, aggExprList, context);
 
+    if (toAddHintsOnAggregate) {
+      addIgnoreNullBucketHintToAggregate(context);
+    }
+
     // 2. add a window column
     List<RexNode> partitionKeys = rexVisitor.analyze(node.getGroupExprList(), context);
     RexNode countField;
@@ -1899,26 +1902,46 @@ public RelNode visitRareTopN(RareTopN node, CalcitePlanContext context) {
             List.of(countField),
             WindowFrame.toCurrentRow());
     context.relBuilder.projectPlus(
-        context.relBuilder.alias(rowNumberWindowOver, ROW_NUMBER_COLUMN_NAME));
+        context.relBuilder.alias(rowNumberWindowOver, ROW_NUMBER_COLUMN_NAME_TOP_RARE));
 
     // 3. filter row_number() <= k in each partition
-    Integer N = (Integer) arguments.get("noOfResults").getValue();
+    int k = node.getNoOfResults();
     context.relBuilder.filter(
         context.relBuilder.lessThanOrEqual(
-            context.relBuilder.field(ROW_NUMBER_COLUMN_NAME), context.relBuilder.literal(N)));
+            context.relBuilder.field(ROW_NUMBER_COLUMN_NAME_TOP_RARE),
+            context.relBuilder.literal(k)));
 
     // 4. project final output. the default output is group by list + field list
-    Boolean showCount = (Boolean) arguments.get("showCount").getValue();
+    Boolean showCount = (Boolean) argumentMap.get(RareTopN.Option.showCount.name()).getValue();
     if (showCount) {
-      context.relBuilder.projectExcept(context.relBuilder.field(ROW_NUMBER_COLUMN_NAME));
+      context.relBuilder.projectExcept(context.relBuilder.field(ROW_NUMBER_COLUMN_NAME_TOP_RARE));
     } else {
       context.relBuilder.projectExcept(
-          context.relBuilder.field(ROW_NUMBER_COLUMN_NAME),
+          context.relBuilder.field(ROW_NUMBER_COLUMN_NAME_TOP_RARE),
           context.relBuilder.field(countFieldName));
     }
     return context.relBuilder.peek();
   }
 
+  private static void addIgnoreNullBucketHintToAggregate(CalcitePlanContext context) {
+    final RelHint statHits =
+        RelHint.builder("stats_args").hintOption(Argument.BUCKET_NULLABLE, "false").build();
+    assert context.relBuilder.peek() instanceof LogicalAggregate
+        : "Stats hits should be added to LogicalAggregate";
+    context.relBuilder.hints(statHits);
+    context
+        .relBuilder
+        .getCluster()
+        .setHintStrategies(
+            HintStrategyTable.builder()
+                .hintStrategy(
+                    "stats_args",
+                    (hint, rel) -> {
+                      return rel instanceof LogicalAggregate;
+                    })
+                .build());
+  }
+
   @Override
   public RelNode visitTableFunction(TableFunction node, CalcitePlanContext context) {
     throw new CalciteUnsupportedException("Table function is unsupported in Calcite");

@@ -62,7 +62,7 @@ public interface PlanUtils {
   /** this is only for dedup command, do not reuse it in other command */
   String ROW_NUMBER_COLUMN_FOR_DEDUP = "_row_number_dedup_";
 
-  String ROW_NUMBER_COLUMN_NAME = "_row_number_";
+  String ROW_NUMBER_COLUMN_NAME_TOP_RARE = "_row_number_top_rare_";
   String ROW_NUMBER_COLUMN_NAME_MAIN = "_row_number_main_";
   String ROW_NUMBER_COLUMN_NAME_SUBSEARCH = "_row_number_subsearch_";
 

@@ -211,6 +211,7 @@ The behaviours it controlled includes:
 
 - The default value of argument ``bucket_nullable`` in ``stats`` command. Check `stats command <../cmd/stats.rst>`_ for details.
 - The return value of ``divide`` and ``/`` operator. Check `expressions <../functions/expressions.rst>`_ for details.
+- The default value of argument ``usenull`` in ``top`` and ``rare`` commands. Check `top command <../cmd/top.rst>`_  and `rare command <../cmd/rare.rst>`_ for details.
 
 Example 1
 -------

@@ -1,6 +1,6 @@
-=============
+====
 rare
-=============
+====
 
 .. rubric:: Table of contents
 
@@ -10,13 +10,13 @@ rare
 
 
 Description
-============
+===========
 | Using ``rare`` command to find the least common tuple of values of all fields in the field list.
 
 **Note**: A maximum of 10 results is returned for each distinct tuple of values of the group-by fields.
 
 Syntax
-============
+======
 rare <field-list> [by-clause]
 
 rare [rare-options] <field-list> [by-clause] ``(available from 3.1.0+)``
@@ -26,10 +26,13 @@ rare [rare-options] <field-list> [by-clause] ``(available from 3.1.0+)``
 * rare-options: optional. options for the rare command. Supported syntax is [countfield=<string>] [showcount=<bool>].
 * showcount=<bool>: optional. whether to create a field in output that represent a count of the tuple of values. Default value is ``true``.
 * countfield=<string>: optional. the name of the field that contains count. Default value is ``'count'``.
+* usenull=<bool>: optional (since 3.4.0). whether to output the null value. The default value of ``usenull`` is determined by ``plugins.ppl.syntax.legacy.preferred``:
 
+ * When ``plugins.ppl.syntax.legacy.preferred=true``, ``usenull`` defaults to ``true``
+ * When ``plugins.ppl.syntax.legacy.preferred=false``, ``usenull`` defaults to ``false``
 
 Example 1: Find the least common values in a field
-===========================================
+==================================================
 
 The example finds least common gender of all the accounts.
 
@@ -46,7 +49,7 @@ PPL query::
 
 
 Example 2: Find the least common values organized by gender
-====================================================
+===========================================================
 
 The example finds least common age of all the accounts group by gender.
 
@@ -66,12 +69,10 @@ PPL query::
 Example 3: Rare command with Calcite enabled
 ============================================
 
-The example finds least common gender of all the accounts when ``plugins.calcite.enabled`` is true.
-
 PPL query::
 
-    PPL> source=accounts | rare gender;
-    fetched row
+    os> source=accounts | rare gender;
+    fetched rows / total rows = 2/2
     +--------+-------+
     | gender | count |
     |--------+-------|
@@ -83,19 +84,47 @@ PPL query::
 Example 4: Specify the count field option
 =========================================
 
-The example specifies the count field when ``plugins.calcite.enabled`` is true.
-
 PPL query::
 
-    PPL> source=accounts | rare countfield='cnt' gender;
-    fetched row
+    os> source=accounts | rare countfield='cnt' gender;
+    fetched rows / total rows = 2/2
     +--------+-----+
     | gender | cnt |
     |--------+-----|
     | F      | 1   |
     | M      | 3   |
     +--------+-----+
 
+
+Example 5: Specify the usenull field option
+===========================================
+
+PPL query::
+
+    os> source=accounts | rare usenull=false email;
+    fetched rows / total rows = 3/3
+    +-----------------------+-------+
+    | email                 | count |
+    |-----------------------+-------|
+    | [email protected]  | 1     |
+    | [email protected]   | 1     |
+    | [email protected] | 1     |
+    +-----------------------+-------+
+
+PPL query::
+
+    os> source=accounts | rare usenull=true email;
+    fetched rows / total rows = 4/4
+    +-----------------------+-------+
+    | email                 | count |
+    |-----------------------+-------|
+    | null                  | 1     |
+    | [email protected]  | 1     |
+    | [email protected]   | 1     |
+    | [email protected] | 1     |
+    +-----------------------+-------+
+
+
 Limitations
 ===========
 The ``rare`` command is not rewritten to OpenSearch DSL, it is only executed on the coordination node.