Skip to content

Commit d6a9719

Browse files
authored
Support format=yaml in Explain API (#4446)
--------- Signed-off-by: Peng Huo <[email protected]>
1 parent ab6ab0a commit d6a9719

File tree

17 files changed

+401
-184
lines changed

17 files changed

+401
-184
lines changed

core/src/main/java/org/opensearch/sql/ast/statement/Explain.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,9 @@ public enum ExplainFormat {
3939
SIMPLE,
4040
STANDARD,
4141
EXTENDED,
42-
COST
42+
COST,
43+
/** Formats explain output in yaml format. */
44+
YAML
4345
}
4446

4547
public static ExplainFormat format(String format) {

core/src/main/java/org/opensearch/sql/executor/ExecutionEngine.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,22 @@ public boolean equals(Object o) {
109109
public int hashCode() {
110110
return Objects.hash(root, calcite);
111111
}
112+
113+
public static ExplainResponse normalizeLf(ExplainResponse response) {
114+
ExecutionEngine.ExplainResponseNodeV2 calcite = response.getCalcite();
115+
if (calcite != null) {
116+
return new ExplainResponse(
117+
new ExecutionEngine.ExplainResponseNodeV2(
118+
normalizeLf(calcite.getLogical()),
119+
normalizeLf(calcite.getPhysical()),
120+
normalizeLf(calcite.getExtended())));
121+
}
122+
return response;
123+
}
124+
125+
private static String normalizeLf(String value) {
126+
return value == null ? null : value.replace("\r\n", "\n");
127+
}
112128
}
113129

114130
@AllArgsConstructor

core/src/main/java/org/opensearch/sql/executor/QueryService.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,8 @@ public void explainWithLegacy(
201201
Explain.ExplainFormat format,
202202
Optional<Throwable> calciteFailure) {
203203
try {
204-
if (format != null && format != Explain.ExplainFormat.STANDARD) {
204+
if (format != null
205+
&& (format != Explain.ExplainFormat.STANDARD && format != Explain.ExplainFormat.YAML)) {
205206
throw new UnsupportedOperationException(
206207
"Explain mode " + format.name() + " is not supported in v2 engine");
207208
}

core/src/main/java/org/opensearch/sql/utils/YamlFormatter.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,10 @@
55

66
package org.opensearch.sql.utils;
77

8+
import com.fasterxml.jackson.annotation.JsonInclude;
89
import com.fasterxml.jackson.core.JsonProcessingException;
910
import com.fasterxml.jackson.databind.ObjectMapper;
11+
import com.fasterxml.jackson.databind.SerializationFeature;
1012
import com.fasterxml.jackson.dataformat.yaml.YAMLFactory;
1113
import com.fasterxml.jackson.dataformat.yaml.YAMLGenerator;
1214

@@ -21,11 +23,16 @@ public class YamlFormatter {
2123
static {
2224
YAMLFactory yamlFactory = new YAMLFactory();
2325
yamlFactory.disable(YAMLGenerator.Feature.WRITE_DOC_START_MARKER);
26+
yamlFactory.enable(YAMLGenerator.Feature.USE_PLATFORM_LINE_BREAKS);
27+
yamlFactory.enable(YAMLGenerator.Feature.LITERAL_BLOCK_STYLE);
2428
yamlFactory.enable(YAMLGenerator.Feature.MINIMIZE_QUOTES); // Enable smart quoting
2529
yamlFactory.enable(
2630
YAMLGenerator.Feature.ALWAYS_QUOTE_NUMBERS_AS_STRINGS); // Quote numeric strings
2731
yamlFactory.enable(YAMLGenerator.Feature.INDENT_ARRAYS_WITH_INDICATOR);
2832
YAML_MAPPER = new ObjectMapper(yamlFactory);
33+
34+
YAML_MAPPER.setSerializationInclusion(JsonInclude.Include.NON_NULL);
35+
YAML_MAPPER.configure(SerializationFeature.WRITE_NULL_MAP_VALUES, false);
2936
}
3037

3138
/**

docs/category.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
{
22
"bash": [
3-
"user/ppl/interfaces/endpoint.rst",
4-
"user/ppl/interfaces/protocol.rst",
53
"user/optimization/optimization.rst",
64
"user/admin/settings.rst"
75
],
8-
"ppl_cli": [
6+
"bash_calcite": [
7+
"user/ppl/interfaces/endpoint.rst",
8+
"user/ppl/interfaces/protocol.rst"
99
],
1010
"sql_cli": [
1111
"user/dql/expressions.rst",

docs/user/ppl/interfaces/endpoint.rst

Lines changed: 68 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -73,28 +73,78 @@ Description
7373

7474
You can send HTTP explain request to endpoint **/_plugins/_ppl/_explain** with your query in request body to understand the execution plan for the PPL query. The explain endpoint is useful when user want to get insight how the query is executed in the engine.
7575

76-
Example
77-
-------
76+
Description
77+
-----------
78+
79+
To translate your query, send it to explain endpoint. The explain output is OpenSearch domain specific language (DSL) in JSON format. You can just copy and paste it to your console to run it against OpenSearch directly.
80+
81+
Explain output could be set different formats: ``standard`` (the default format), ``simple``, ``extended``, ``dsl``.
82+
83+
84+
Example 1 default (standard) format
85+
-----------------------------------
7886

79-
The following PPL query demonstrated that where and stats command were pushed down to OpenSearch DSL aggregation query::
87+
Explain query::
8088

8189
sh$ curl -sS -H 'Content-Type: application/json' \
8290
... -X POST localhost:9200/_plugins/_ppl/_explain \
83-
... -d '{"query" : "source=accounts | where age > 10 | stats avg(age)"}'
91+
... -d '{"query" : "source=state_country | where age>30"}'
8492
{
85-
"root": {
86-
"name": "ProjectOperator",
87-
"description": {
88-
"fields": "[avg(age)]"
89-
},
90-
"children": [
91-
{
92-
"name": "OpenSearchIndexScan",
93-
"description": {
94-
"request": "OpenSearchQueryRequest(indexName=accounts, sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":10,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"aggregations\":{\"avg(age)\":{\"avg\":{\"field\":\"age\"}}}}, searchDone=false)"
95-
},
96-
"children": []
97-
}
98-
]
93+
"calcite": {
94+
"logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(name=[$0], country=[$1], state=[$2], month=[$3], year=[$4], age=[$5])\n LogicalFilter(condition=[>($5, 30)])\n CalciteLogicalIndexScan(table=[[OpenSearch, state_country]])\n",
95+
"physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, state_country]], PushDownContext=[[PROJECT->[name, country, state, month, year, age], FILTER->>($5, 30), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"name\",\"country\",\"state\",\"month\",\"year\",\"age\"],\"excludes\":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n"
96+
}
97+
}
98+
99+
Example 2 simple format
100+
-----------------------
101+
102+
Explain query::
103+
104+
sh$ curl -sS -H 'Content-Type: application/json' \
105+
... -X POST localhost:9200/_plugins/_ppl/_explain?format=simple \
106+
... -d '{"query" : "source=state_country | where age>30"}'
107+
{
108+
"calcite": {
109+
"logical": "LogicalSystemLimit\n LogicalProject\n LogicalFilter\n CalciteLogicalIndexScan\n"
99110
}
100111
}
112+
113+
Example 3 extended format
114+
-------------------------
115+
116+
Explain query::
117+
118+
sh$ curl -sS -H 'Content-Type: application/json' \
119+
... -X POST localhost:9200/_plugins/_ppl/_explain?format=extended \
120+
... -d '{"query" : "source=state_country | where age>30 | dedup age"}'
121+
{
122+
"calcite": {
123+
"logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(name=[$0], country=[$1], state=[$2], month=[$3], year=[$4], age=[$5])\n LogicalFilter(condition=[<=($12, 1)])\n LogicalProject(name=[$0], country=[$1], state=[$2], month=[$3], year=[$4], age=[$5], _id=[$6], _index=[$7], _score=[$8], _maxscore=[$9], _sort=[$10], _routing=[$11], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $5 ORDER BY $5)])\n LogicalFilter(condition=[IS NOT NULL($5)])\n LogicalFilter(condition=[>($5, 30)])\n CalciteLogicalIndexScan(table=[[OpenSearch, state_country]])\n",
124+
"physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..6=[{inputs}], expr#7=[1], expr#8=[<=($t6, $t7)], proj#0..5=[{exprs}], $condition=[$t8])\n EnumerableWindow(window#0=[window(partition {5} order by [5] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])])\n CalciteEnumerableIndexScan(table=[[OpenSearch, state_country]], PushDownContext=[[PROJECT->[name, country, state, month, year, age], FILTER->>($5, 30)], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"name\",\"country\",\"state\",\"month\",\"year\",\"age\"],\"excludes\":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n",
125+
"extended": "public org.apache.calcite.linq4j.Enumerable bind(final org.apache.calcite.DataContext root) {\n final org.opensearch.sql.opensearch.storage.scan.CalciteEnumerableIndexScan v1stashed = (org.opensearch.sql.opensearch.storage.scan.CalciteEnumerableIndexScan) root.get(\"v1stashed\");\n int prevStart;\n int prevEnd;\n final java.util.Comparator comparator = new java.util.Comparator(){\n public int compare(Object[] v0, Object[] v1) {\n final int c;\n c = org.apache.calcite.runtime.Utilities.compareNullsLast((Long) v0[5], (Long) v1[5]);\n if (c != 0) {\n return c;\n }\n return 0;\n }\n\n public int compare(Object o0, Object o1) {\n return this.compare((Object[]) o0, (Object[]) o1);\n }\n\n };\n final org.apache.calcite.runtime.SortedMultiMap multiMap = new org.apache.calcite.runtime.SortedMultiMap();\n v1stashed.scan().foreach(new org.apache.calcite.linq4j.function.Function1() {\n public Object apply(Object[] v) {\n Long key = (Long) v[5];\n multiMap.putMulti(key, v);\n return null;\n }\n public Object apply(Object v) {\n return apply(\n (Object[]) v);\n }\n }\n );\n final java.util.Iterator iterator = multiMap.arrays(comparator);\n final java.util.ArrayList _list = new java.util.ArrayList(\n multiMap.size());\n Long a0w0 = (Long) null;\n while (iterator.hasNext()) {\n final Object[] _rows = (Object[]) iterator.next();\n prevStart = -1;\n prevEnd = 2147483647;\n for (int i = 0; i < _rows.length; ++i) {\n final Object[] row = (Object[]) _rows[i];\n if (i != prevEnd) {\n int actualStart = i < prevEnd ? 0 : prevEnd + 1;\n prevEnd = i;\n a0w0 = Long.valueOf(((Number)org.apache.calcite.linq4j.tree.Primitive.of(long.class).numberValueRoundDown((i - 0 + 1))).longValue());\n }\n _list.add(new Object[] {\n row[0],\n row[1],\n row[2],\n row[3],\n row[4],\n row[5],\n a0w0});\n }\n }\n multiMap.clear();\n final org.apache.calcite.linq4j.Enumerable _inputEnumerable = org.apache.calcite.linq4j.Linq4j.asEnumerable(_list);\n final org.apache.calcite.linq4j.AbstractEnumerable child = new org.apache.calcite.linq4j.AbstractEnumerable(){\n public org.apache.calcite.linq4j.Enumerator enumerator() {\n return new org.apache.calcite.linq4j.Enumerator(){\n public final org.apache.calcite.linq4j.Enumerator inputEnumerator = _inputEnumerable.enumerator();\n public void reset() {\n inputEnumerator.reset();\n }\n\n public boolean moveNext() {\n while (inputEnumerator.moveNext()) {\n if (org.apache.calcite.runtime.SqlFunctions.toLong(((Object[]) inputEnumerator.current())[6]) <= $L4J$C$_Number_org_apache_calcite_linq4j_tree_Primitive_of_long_class_358aa52b) {\n return true;\n }\n }\n return false;\n }\n\n public void close() {\n inputEnumerator.close();\n }\n\n public Object current() {\n final Object[] current = (Object[]) inputEnumerator.current();\n final Object input_value = current[0];\n final Object input_value0 = current[1];\n final Object input_value1 = current[2];\n final Object input_value2 = current[3];\n final Object input_value3 = current[4];\n final Object input_value4 = current[5];\n return new Object[] {\n input_value,\n input_value0,\n input_value1,\n input_value2,\n input_value3,\n input_value4};\n }\n\n static final long $L4J$C$_Number_org_apache_calcite_linq4j_tree_Primitive_of_long_class_358aa52b = ((Number)org.apache.calcite.linq4j.tree.Primitive.of(long.class).numberValueRoundDown(1)).longValue();\n };\n }\n\n };\n return child.take(10000);\n}\n\n\npublic Class getElementType() {\n return java.lang.Object[].class;\n}\n\n\n"
126+
}
127+
}
128+
129+
Example 4 YAML format (experimental)
130+
-----------------------------------
131+
132+
.. note::
133+
YAML explain output is an experimental feature and not intended for
134+
production use. The interface and output may change without notice.
135+
136+
Return Explain response format in In ``yaml`` format.
137+
138+
Explain query::
139+
140+
sh$ curl -sS -H 'Content-Type: application/json' \
141+
... -X POST localhost:9200/_plugins/_ppl/_explain?format=yaml \
142+
... -d '{"query" : "source=state_country | where age>30"}'
143+
calcite:
144+
logical: |
145+
LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])
146+
LogicalProject(name=[$0], country=[$1], state=[$2], month=[$3], year=[$4], age=[$5])
147+
LogicalFilter(condition=[>($5, 30)])
148+
CalciteLogicalIndexScan(table=[[OpenSearch, state_country]])
149+
physical: |
150+
CalciteEnumerableIndexScan(table=[[OpenSearch, state_country]], PushDownContext=[[PROJECT->[name, country, state, month, year, age], FILTER->>($5, 30), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"range":{"age":{"from":30,"to":null,"include_lower":false,"include_upper":true,"boost":1.0}}},"_source":{"includes":["name","country","state","month","year","age"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])

0 commit comments

Comments
 (0)