From 112d5ce30a383ea1e5e5f1c5f4e4c77b95230f14 Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Tue, 3 Jun 2025 18:18:58 +0800 Subject: [PATCH 01/20] Create scalffolds for implementing expand command Signed-off-by: Yuanchun Shen --- .../sql/ast/AbstractNodeVisitor.java | 33 ++----------- .../org/opensearch/sql/ast/tree/Expand.java | 48 +++++++++++++++++++ .../sql/calcite/CalciteRelNodeVisitor.java | 31 +++--------- .../sql/planner/logical/LogicalExpand.java | 36 ++++++++++++++ .../logical/LogicalPlanNodeVisitor.java | 4 ++ .../sql/calcite/remote/CalciteExpandIT.java | 19 ++++++++ .../opensearch/sql/ppl/ExpandCommandIT.java | 47 ++++++++++++++++++ ppl/src/main/antlr/OpenSearchPPLLexer.g4 | 1 + ppl/src/main/antlr/OpenSearchPPLParser.g4 | 6 +++ .../opensearch/sql/ppl/parser/AstBuilder.java | 32 ++++--------- 10 files changed, 180 insertions(+), 77 deletions(-) create mode 100644 core/src/main/java/org/opensearch/sql/ast/tree/Expand.java create mode 100644 core/src/main/java/org/opensearch/sql/planner/logical/LogicalExpand.java create mode 100644 integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExpandIT.java create mode 100644 integ-test/src/test/java/org/opensearch/sql/ppl/ExpandCommandIT.java diff --git a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java index 39401db606f..7781236ff8b 100644 --- a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java @@ -43,34 +43,7 @@ import org.opensearch.sql.ast.statement.Explain; import org.opensearch.sql.ast.statement.Query; import org.opensearch.sql.ast.statement.Statement; -import org.opensearch.sql.ast.tree.AD; -import org.opensearch.sql.ast.tree.Aggregation; -import org.opensearch.sql.ast.tree.CloseCursor; -import org.opensearch.sql.ast.tree.Dedupe; -import org.opensearch.sql.ast.tree.Eval; -import org.opensearch.sql.ast.tree.FetchCursor; -import org.opensearch.sql.ast.tree.FillNull; -import org.opensearch.sql.ast.tree.Filter; -import org.opensearch.sql.ast.tree.Head; -import org.opensearch.sql.ast.tree.Join; -import org.opensearch.sql.ast.tree.Kmeans; -import org.opensearch.sql.ast.tree.Limit; -import org.opensearch.sql.ast.tree.Lookup; -import org.opensearch.sql.ast.tree.ML; -import org.opensearch.sql.ast.tree.Paginate; -import org.opensearch.sql.ast.tree.Parse; -import org.opensearch.sql.ast.tree.Patterns; -import org.opensearch.sql.ast.tree.Project; -import org.opensearch.sql.ast.tree.RareTopN; -import org.opensearch.sql.ast.tree.Relation; -import org.opensearch.sql.ast.tree.RelationSubquery; -import org.opensearch.sql.ast.tree.Rename; -import org.opensearch.sql.ast.tree.Sort; -import org.opensearch.sql.ast.tree.SubqueryAlias; -import org.opensearch.sql.ast.tree.TableFunction; -import org.opensearch.sql.ast.tree.Trendline; -import org.opensearch.sql.ast.tree.Values; -import org.opensearch.sql.ast.tree.Window; +import org.opensearch.sql.ast.tree.*; /** AST nodes visitor Defines the traverse path. */ public abstract class AbstractNodeVisitor { @@ -344,6 +317,10 @@ public T visitFillNull(FillNull fillNull, C context) { return visitChildren(fillNull, context); } + public T visitExpand(Expand expand, C context) { + return visitChildren(expand, context); + } + public T visitPatterns(Patterns patterns, C context) { return visitChildren(patterns, context); } diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Expand.java b/core/src/main/java/org/opensearch/sql/ast/tree/Expand.java new file mode 100644 index 00000000000..bc03da8f5d7 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Expand.java @@ -0,0 +1,48 @@ +/* + * + * * Copyright OpenSearch Contributors + * * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.opensearch.sql.ast.tree; + +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.RequiredArgsConstructor; +import lombok.ToString; +import org.opensearch.sql.ast.AbstractNodeVisitor; +import org.opensearch.sql.ast.Node; +import org.opensearch.sql.ast.expression.Field; +import org.opensearch.sql.ast.expression.UnresolvedExpression; + +import java.util.List; +import java.util.Optional; + +/** AST node represent Expand operation. */ +@RequiredArgsConstructor +@EqualsAndHashCode(callSuper = false) +@ToString +public class Expand extends UnresolvedPlan{ + private UnresolvedPlan child; + @Getter + private final Field field; + @Getter + private final Optional alias; + + @Override + public Expand attach(UnresolvedPlan child) { + this.child = child; + return this; + } + + @Override + public List getChild() { + return child == null ? List.of() : List.of(child); + } + + @Override + public T accept(AbstractNodeVisitor nodeVisitor, C context) { + return nodeVisitor.visitExpand(this, context); + } +} diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index d941477d523..e0134733cf9 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -56,33 +56,9 @@ import org.opensearch.sql.ast.expression.UnresolvedExpression; import org.opensearch.sql.ast.expression.WindowFrame; import org.opensearch.sql.ast.expression.subquery.SubqueryExpression; -import org.opensearch.sql.ast.tree.AD; -import org.opensearch.sql.ast.tree.Aggregation; -import org.opensearch.sql.ast.tree.CloseCursor; -import org.opensearch.sql.ast.tree.Dedupe; -import org.opensearch.sql.ast.tree.Eval; -import org.opensearch.sql.ast.tree.FetchCursor; -import org.opensearch.sql.ast.tree.FillNull; -import org.opensearch.sql.ast.tree.Filter; -import org.opensearch.sql.ast.tree.Head; -import org.opensearch.sql.ast.tree.Join; -import org.opensearch.sql.ast.tree.Kmeans; -import org.opensearch.sql.ast.tree.Lookup; +import org.opensearch.sql.ast.tree.*; import org.opensearch.sql.ast.tree.Lookup.OutputStrategy; -import org.opensearch.sql.ast.tree.ML; -import org.opensearch.sql.ast.tree.Paginate; -import org.opensearch.sql.ast.tree.Parse; -import org.opensearch.sql.ast.tree.Project; -import org.opensearch.sql.ast.tree.RareTopN; -import org.opensearch.sql.ast.tree.Relation; -import org.opensearch.sql.ast.tree.Rename; -import org.opensearch.sql.ast.tree.Sort; import org.opensearch.sql.ast.tree.Sort.SortOption; -import org.opensearch.sql.ast.tree.SubqueryAlias; -import org.opensearch.sql.ast.tree.TableFunction; -import org.opensearch.sql.ast.tree.Trendline; -import org.opensearch.sql.ast.tree.UnresolvedPlan; -import org.opensearch.sql.ast.tree.Window; import org.opensearch.sql.calcite.plan.OpenSearchConstants; import org.opensearch.sql.calcite.utils.JoinAndLookupUtils; import org.opensearch.sql.calcite.utils.PlanUtils; @@ -847,4 +823,9 @@ public RelNode visitTableFunction(TableFunction node, CalcitePlanContext context public RelNode visitTrendline(Trendline node, CalcitePlanContext context) { throw new CalciteUnsupportedException("Trendline command is unsupported in Calcite"); } + + @Override + public RelNode visitExpand(Expand expand, CalcitePlanContext context) { + throw new CalciteUnsupportedException("Expand command is unsupported in Calcite"); + } } diff --git a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalExpand.java b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalExpand.java new file mode 100644 index 00000000000..7a77f4879b3 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalExpand.java @@ -0,0 +1,36 @@ +/* + * + * * Copyright OpenSearch Contributors + * * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.opensearch.sql.planner.logical; + +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.ToString; +import org.opensearch.sql.expression.Expression; + +import java.util.Collections; + +@ToString +@EqualsAndHashCode(callSuper = true) +public class LogicalExpand extends LogicalPlan{ + + @Getter + private final Expression field; + @Getter + private final Expression alias; + + public LogicalExpand(LogicalPlan child, Expression field, Expression alias) { + super(Collections.singletonList(child)); + this.field = field; + this.alias = alias; + } + + @Override + public R accept(LogicalPlanNodeVisitor visitor, C context) { + return visitor.visitExpand(this, context); + } +} diff --git a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitor.java b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitor.java index c9eedd8efc8..4ba2459fad4 100644 --- a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitor.java @@ -104,6 +104,10 @@ public R visitAD(LogicalAD plan, C context) { return visitNode(plan, context); } + public R visitExpand(LogicalExpand plan, C context) { + return visitNode(plan, context); + } + public R visitTrendline(LogicalTrendline plan, C context) { return visitNode(plan, context); } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExpandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExpandIT.java new file mode 100644 index 00000000000..51a0f99d50a --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExpandIT.java @@ -0,0 +1,19 @@ +/* + * + * * Copyright OpenSearch Contributors + * * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.opensearch.sql.calcite.remote; + +import org.opensearch.sql.ppl.ExpandCommandIT; + +public class CalciteExpandIT extends ExpandCommandIT { + @Override + public void init() throws Exception { + super.init(); + enableCalcite(); + disallowCalciteFallback(); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/ExpandCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/ExpandCommandIT.java new file mode 100644 index 00000000000..b4e7a9adc75 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/ExpandCommandIT.java @@ -0,0 +1,47 @@ +/* + * + * * Copyright OpenSearch Contributors + * * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.opensearch.sql.ppl; + +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_NESTED_SIMPLE; +import static org.opensearch.sql.util.MatcherUtils.schema; +import static org.opensearch.sql.util.MatcherUtils.verifyNumOfRows; +import static org.opensearch.sql.util.MatcherUtils.verifySchema; + +import org.json.JSONObject; +import org.junit.jupiter.api.Test; + +public class ExpandCommandIT extends PPLIntegTestCase { + @Override + public void init() throws Exception { + super.init(); + loadIndex(Index.NESTED_SIMPLE); + } + + @Test + public void testExpand() throws Exception { + JSONObject response = + executeQuery(String.format("source=%s | expand address", TEST_INDEX_NESTED_SIMPLE)); + verifySchema( + response, schema("name", "string"), schema("age", "integer"), schema("id", "integer"), schema("address", "object")); + verifyNumOfRows(response, 11); + } + + @Test + public void testExpandWithAlias() throws Exception { + JSONObject response = + executeQuery(String.format("source=%s | expand address as addr", TEST_INDEX_NESTED_SIMPLE)); + verifySchema( + response, + schema("name", "string"), + schema("age", "integer"), + schema("id", "integer"), + schema("address", "array"), + schema("addr", "object")); + verifyNumOfRows(response, 11); + } +} diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index 521e7d09a13..a9331e70ed9 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -37,6 +37,7 @@ KMEANS: 'KMEANS'; AD: 'AD'; ML: 'ML'; FILLNULL: 'FILLNULL'; +EXPAND: 'EXPAND'; TRENDLINE: 'TRENDLINE'; SIMPLE_PATTERN: 'SIMPLE_PATTERN'; BRAIN: 'BRAIN'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 08c9e78160b..70873b3afae 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -69,6 +69,7 @@ commands | adCommand | mlCommand | fillnullCommand + | expandCommand | trendlineCommand ; @@ -96,6 +97,7 @@ commandName | AD | ML | FILLNULL + | EXPAND | TRENDLINE | EXPLAIN ; @@ -221,6 +223,10 @@ fillNullUsing : USING replacementPair (COMMA replacementPair)* ; +expandCommand + : EXPAND fieldExpression (AS alias = qualifiedName)? + ; + replacementPair : fieldExpression EQUAL replacement = valueExpression ; diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index 295c8adbc71..88031c4e10e 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -55,31 +55,8 @@ import org.opensearch.sql.ast.expression.UnresolvedArgument; import org.opensearch.sql.ast.expression.UnresolvedExpression; import org.opensearch.sql.ast.expression.WindowFunction; -import org.opensearch.sql.ast.tree.AD; -import org.opensearch.sql.ast.tree.Aggregation; -import org.opensearch.sql.ast.tree.Dedupe; -import org.opensearch.sql.ast.tree.DescribeRelation; -import org.opensearch.sql.ast.tree.Eval; -import org.opensearch.sql.ast.tree.FillNull; -import org.opensearch.sql.ast.tree.Filter; -import org.opensearch.sql.ast.tree.Head; -import org.opensearch.sql.ast.tree.Join; -import org.opensearch.sql.ast.tree.Kmeans; -import org.opensearch.sql.ast.tree.Lookup; -import org.opensearch.sql.ast.tree.ML; -import org.opensearch.sql.ast.tree.Parse; -import org.opensearch.sql.ast.tree.Patterns; -import org.opensearch.sql.ast.tree.Project; -import org.opensearch.sql.ast.tree.RareTopN; +import org.opensearch.sql.ast.tree.*; import org.opensearch.sql.ast.tree.RareTopN.CommandType; -import org.opensearch.sql.ast.tree.Relation; -import org.opensearch.sql.ast.tree.Rename; -import org.opensearch.sql.ast.tree.Sort; -import org.opensearch.sql.ast.tree.SubqueryAlias; -import org.opensearch.sql.ast.tree.TableFunction; -import org.opensearch.sql.ast.tree.Trendline; -import org.opensearch.sql.ast.tree.UnresolvedPlan; -import org.opensearch.sql.ast.tree.Window; import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.common.setting.Settings.Key; import org.opensearch.sql.common.utils.StringUtils; @@ -651,6 +628,13 @@ public UnresolvedPlan visitFillNullUsing(OpenSearchPPLParser.FillNullUsingContex return FillNull.ofVariousValue(replacementsBuilder.build()); } + /** expand command. */ + @Override + public UnresolvedPlan visitExpandCommand(OpenSearchPPLParser.ExpandCommandContext ctx) { + return new Expand((Field) internalVisitExpression(ctx.fieldExpression()), + ctx.alias!=null ? Optional.of(internalVisitExpression(ctx.alias)) : Optional.empty()); + } + /** trendline command. */ @Override public UnresolvedPlan visitTrendlineCommand(OpenSearchPPLParser.TrendlineCommandContext ctx) { From 7220ed07b41451f8f2d511a1eb59a63c9b178690 Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Wed, 4 Jun 2025 18:39:42 +0800 Subject: [PATCH 02/20] WIP: Implementing expand command Signed-off-by: Yuanchun Shen --- .../sql/ast/AbstractNodeVisitor.java | 8 +-- .../org/opensearch/sql/ast/dsl/AstDSL.java | 26 ++------- .../org/opensearch/sql/ast/tree/Expand.java | 56 ++++++++----------- .../sql/calcite/CalciteRelNodeVisitor.java | 49 +++++++++++++++- .../sql/planner/logical/LogicalExpand.java | 27 ++++----- .../sql/calcite/remote/CalciteExpandIT.java | 12 ++-- .../opensearch/sql/ppl/ExpandCommandIT.java | 9 ++- ppl/src/main/antlr/OpenSearchPPLLexer.g4 | 2 +- ppl/src/main/antlr/OpenSearchPPLParser.g4 | 10 ++-- .../opensearch/sql/ppl/parser/AstBuilder.java | 14 ++--- .../sql/ppl/utils/PPLQueryDataAnonymizer.java | 29 +++------- 11 files changed, 127 insertions(+), 115 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java index 7781236ff8b..c4ed19c829c 100644 --- a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java @@ -85,6 +85,10 @@ public T visitRelationSubquery(RelationSubquery node, C context) { return visitChildren(node, context); } + public T visitExpand(Expand expand, C context) { + return visitChildren(expand, context); + } + public T visitTableFunction(TableFunction node, C context) { return visitChildren(node, context); } @@ -317,10 +321,6 @@ public T visitFillNull(FillNull fillNull, C context) { return visitChildren(fillNull, context); } - public T visitExpand(Expand expand, C context) { - return visitChildren(expand, context); - } - public T visitPatterns(Patterns patterns, C context) { return visitChildren(patterns, context); } diff --git a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java index 57536d62d6b..3f088ed7be0 100644 --- a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java +++ b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java @@ -48,29 +48,9 @@ import org.opensearch.sql.ast.expression.When; import org.opensearch.sql.ast.expression.WindowFunction; import org.opensearch.sql.ast.expression.Xor; -import org.opensearch.sql.ast.tree.Aggregation; -import org.opensearch.sql.ast.tree.Dedupe; -import org.opensearch.sql.ast.tree.DescribeRelation; -import org.opensearch.sql.ast.tree.Eval; -import org.opensearch.sql.ast.tree.FillNull; -import org.opensearch.sql.ast.tree.Filter; -import org.opensearch.sql.ast.tree.Head; -import org.opensearch.sql.ast.tree.Limit; -import org.opensearch.sql.ast.tree.Parse; -import org.opensearch.sql.ast.tree.Patterns; -import org.opensearch.sql.ast.tree.Project; -import org.opensearch.sql.ast.tree.RareTopN; +import org.opensearch.sql.ast.tree.*; import org.opensearch.sql.ast.tree.RareTopN.CommandType; -import org.opensearch.sql.ast.tree.Relation; -import org.opensearch.sql.ast.tree.RelationSubquery; -import org.opensearch.sql.ast.tree.Rename; -import org.opensearch.sql.ast.tree.Sort; import org.opensearch.sql.ast.tree.Sort.SortOption; -import org.opensearch.sql.ast.tree.SubqueryAlias; -import org.opensearch.sql.ast.tree.TableFunction; -import org.opensearch.sql.ast.tree.Trendline; -import org.opensearch.sql.ast.tree.UnresolvedPlan; -import org.opensearch.sql.ast.tree.Values; /** Class of static methods to create specific node instances. */ @UtilityClass @@ -117,6 +97,10 @@ public static Eval eval(UnresolvedPlan input, Let... projectList) { return new Eval(Arrays.asList(projectList)).attach(input); } + public Expand expand(UnresolvedPlan input, Field field) { + return new Expand(field).attach(input); + } + public static UnresolvedPlan projectWithArg( UnresolvedPlan input, List argList, UnresolvedExpression... projectList) { return new Project(Arrays.asList(projectList), argList).attach(input); diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Expand.java b/core/src/main/java/org/opensearch/sql/ast/tree/Expand.java index bc03da8f5d7..77aaac885de 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/Expand.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Expand.java @@ -1,48 +1,40 @@ /* - * - * * Copyright OpenSearch Contributors - * * SPDX-License-Identifier: Apache-2.0 - * + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 */ package org.opensearch.sql.ast.tree; -import lombok.EqualsAndHashCode; +import com.google.common.collect.ImmutableList; +import java.util.List; import lombok.Getter; import lombok.RequiredArgsConstructor; import lombok.ToString; import org.opensearch.sql.ast.AbstractNodeVisitor; -import org.opensearch.sql.ast.Node; import org.opensearch.sql.ast.expression.Field; -import org.opensearch.sql.ast.expression.UnresolvedExpression; - -import java.util.List; -import java.util.Optional; -/** AST node represent Expand operation. */ -@RequiredArgsConstructor -@EqualsAndHashCode(callSuper = false) +/** AST node representing an {@code expand } operation. */ +@Getter @ToString -public class Expand extends UnresolvedPlan{ - private UnresolvedPlan child; - @Getter - private final Field field; - @Getter - private final Optional alias; +@RequiredArgsConstructor +public class Expand extends UnresolvedPlan { + + private UnresolvedPlan child; + @Getter private final Field field; - @Override - public Expand attach(UnresolvedPlan child) { - this.child = child; - return this; - } + @Override + public Expand attach(UnresolvedPlan child) { + this.child = child; + return this; + } - @Override - public List getChild() { - return child == null ? List.of() : List.of(child); - } + @Override + public List getChild() { + return ImmutableList.of(child); + } - @Override - public T accept(AbstractNodeVisitor nodeVisitor, C context) { - return nodeVisitor.visitExpand(this, context); - } + @Override + public T accept(AbstractNodeVisitor nodeVisitor, C context) { + return nodeVisitor.visitExpand(this, context); + } } diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index e0134733cf9..1c1ce94676f 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -28,6 +28,7 @@ import org.apache.calcite.plan.ViewExpanders; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Aggregate; +import org.apache.calcite.rel.core.JoinRelType; import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexCorrelVariable; @@ -164,7 +165,7 @@ public RelNode visitProject(Project node, CalcitePlanContext context) { } /** See logic in {@link org.opensearch.sql.analysis.symbol.SymbolTable#lookupAllFields} */ - private void tryToRemoveNestedFields(CalcitePlanContext context) { + private static void tryToRemoveNestedFields(CalcitePlanContext context) { Set allFields = new HashSet<>(context.relBuilder.peek().getRowType().getFieldNames()); List duplicatedNestedFields = allFields.stream() @@ -824,8 +825,52 @@ public RelNode visitTrendline(Trendline node, CalcitePlanContext context) { throw new CalciteUnsupportedException("Trendline command is unsupported in Calcite"); } + /** + * Expand command visitor to handle array field expansion. 1. Unnest 2. Join with the original + * table to get all fields + * + *

S = π_{field, other_fields}(R ⨝ UNNEST_field(R)) + * + * @param expand Expand command to be visited + * @param context CalcitePlanContext containing the RelBuilder and other context + * @return RelNode representing records with the expanded array field + */ @Override public RelNode visitExpand(Expand expand, CalcitePlanContext context) { - throw new CalciteUnsupportedException("Expand command is unsupported in Calcite"); + // 1. Visit Children + visitChildren(expand, context); + + var relBuilder = context.relBuilder; + + // 3. Get the field to expand + Field arrayField = expand.getField(); + + // 5. Unnest the array field + // Analyze the array field to get its RexNode + RexNode arrayFieldRex = rexVisitor.analyze(arrayField, context); + + // Push the original table to the RelBuilder stack + RelNode originalTable = relBuilder.peek(); + // No alias is provided in the expand command, so we remove the original array field, + // then replace it with the unnest result. + relBuilder.projectExcept(arrayFieldRex); + relBuilder.push(originalTable); + + // Join on ROW_NUMBER_COLUMN_NAME + Holder correlVariable = Holder.empty(); + relBuilder.variable(correlVariable::set); + + relBuilder.project(List.of(arrayFieldRex), List.of(), false, List.of(correlVariable.get().id)); + // Alias is not supported in expand yet, we pass in an empty list + relBuilder.uncollect(List.of(), false); + + List allFields = + relBuilder.peek().getRowType().getFieldList().stream() + .map(f -> (RexNode) relBuilder.field(f.getName())) + .toList(); + + relBuilder.correlate(JoinRelType.INNER, correlVariable.get().id, relBuilder.fields()); + + return relBuilder.peek(); } } diff --git a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalExpand.java b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalExpand.java index 7a77f4879b3..690881bf733 100644 --- a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalExpand.java +++ b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalExpand.java @@ -7,30 +7,25 @@ package org.opensearch.sql.planner.logical; +import java.util.Collections; import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.ToString; import org.opensearch.sql.expression.Expression; -import java.util.Collections; - @ToString @EqualsAndHashCode(callSuper = true) -public class LogicalExpand extends LogicalPlan{ +public class LogicalExpand extends LogicalPlan { - @Getter - private final Expression field; - @Getter - private final Expression alias; + @Getter private final Expression field; - public LogicalExpand(LogicalPlan child, Expression field, Expression alias) { - super(Collections.singletonList(child)); - this.field = field; - this.alias = alias; - } + public LogicalExpand(LogicalPlan child, Expression field) { + super(Collections.singletonList(child)); + this.field = field; + } - @Override - public R accept(LogicalPlanNodeVisitor visitor, C context) { - return visitor.visitExpand(this, context); - } + @Override + public R accept(LogicalPlanNodeVisitor visitor, C context) { + return visitor.visitExpand(this, context); + } } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExpandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExpandIT.java index 51a0f99d50a..8365cfd915e 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExpandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExpandIT.java @@ -10,10 +10,10 @@ import org.opensearch.sql.ppl.ExpandCommandIT; public class CalciteExpandIT extends ExpandCommandIT { - @Override - public void init() throws Exception { - super.init(); - enableCalcite(); - disallowCalciteFallback(); - } + @Override + public void init() throws Exception { + super.init(); + enableCalcite(); + disallowCalciteFallback(); + } } diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/ExpandCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/ExpandCommandIT.java index b4e7a9adc75..08874daf549 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/ExpandCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/ExpandCommandIT.java @@ -13,6 +13,7 @@ import static org.opensearch.sql.util.MatcherUtils.verifySchema; import org.json.JSONObject; +import org.junit.Ignore; import org.junit.jupiter.api.Test; public class ExpandCommandIT extends PPLIntegTestCase { @@ -27,10 +28,16 @@ public void testExpand() throws Exception { JSONObject response = executeQuery(String.format("source=%s | expand address", TEST_INDEX_NESTED_SIMPLE)); verifySchema( - response, schema("name", "string"), schema("age", "integer"), schema("id", "integer"), schema("address", "object")); + response, + schema("name", "string"), + schema("age", "integer"), + schema("id", "integer"), + schema("address", "object")); verifyNumOfRows(response, 11); } + // TODO: double check if expand with alias is supported + @Ignore @Test public void testExpandWithAlias() throws Exception { JSONObject response = diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index a9331e70ed9..6392a01758f 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -37,8 +37,8 @@ KMEANS: 'KMEANS'; AD: 'AD'; ML: 'ML'; FILLNULL: 'FILLNULL'; -EXPAND: 'EXPAND'; TRENDLINE: 'TRENDLINE'; +EXPAND: 'EXPAND'; SIMPLE_PATTERN: 'SIMPLE_PATTERN'; BRAIN: 'BRAIN'; VARIABLE_COUNT_THRESHOLD: 'VARIABLE_COUNT_THRESHOLD'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 70873b3afae..5f85f647738 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -69,8 +69,8 @@ commands | adCommand | mlCommand | fillnullCommand - | expandCommand | trendlineCommand + | expandCommand ; commandName @@ -223,10 +223,6 @@ fillNullUsing : USING replacementPair (COMMA replacementPair)* ; -expandCommand - : EXPAND fieldExpression (AS alias = qualifiedName)? - ; - replacementPair : fieldExpression EQUAL replacement = valueExpression ; @@ -243,6 +239,10 @@ trendlineType : SMA ; +expandCommand + : EXPAND fieldExpression + ; + kmeansCommand : KMEANS (kmeansParameter)* ; diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index 88031c4e10e..3cb352c6e55 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -415,6 +415,13 @@ public UnresolvedPlan visitTopCommand(OpenSearchPPLParser.TopCommandContext ctx) groupList); } + /** expand command. */ + @Override + public UnresolvedPlan visitExpandCommand(OpenSearchPPLParser.ExpandCommandContext ctx) { + Field fieldExpression = (Field) internalVisitExpression(ctx.fieldExpression()); + return new Expand(fieldExpression); + } + @Override public UnresolvedPlan visitGrokCommand(OpenSearchPPLParser.GrokCommandContext ctx) { UnresolvedExpression sourceField = internalVisitExpression(ctx.source_field); @@ -628,13 +635,6 @@ public UnresolvedPlan visitFillNullUsing(OpenSearchPPLParser.FillNullUsingContex return FillNull.ofVariousValue(replacementsBuilder.build()); } - /** expand command. */ - @Override - public UnresolvedPlan visitExpandCommand(OpenSearchPPLParser.ExpandCommandContext ctx) { - return new Expand((Field) internalVisitExpression(ctx.fieldExpression()), - ctx.alias!=null ? Optional.of(internalVisitExpression(ctx.alias)) : Optional.empty()); - } - /** trendline command. */ @Override public UnresolvedPlan visitTrendlineCommand(OpenSearchPPLParser.TrendlineCommandContext ctx) { diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java index 3fc40b61207..af14a1a07d3 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java @@ -49,26 +49,7 @@ import org.opensearch.sql.ast.statement.Explain; import org.opensearch.sql.ast.statement.Query; import org.opensearch.sql.ast.statement.Statement; -import org.opensearch.sql.ast.tree.Aggregation; -import org.opensearch.sql.ast.tree.Dedupe; -import org.opensearch.sql.ast.tree.DescribeRelation; -import org.opensearch.sql.ast.tree.Eval; -import org.opensearch.sql.ast.tree.FillNull; -import org.opensearch.sql.ast.tree.Filter; -import org.opensearch.sql.ast.tree.Head; -import org.opensearch.sql.ast.tree.Join; -import org.opensearch.sql.ast.tree.Lookup; -import org.opensearch.sql.ast.tree.Parse; -import org.opensearch.sql.ast.tree.Project; -import org.opensearch.sql.ast.tree.RareTopN; -import org.opensearch.sql.ast.tree.Relation; -import org.opensearch.sql.ast.tree.Rename; -import org.opensearch.sql.ast.tree.Sort; -import org.opensearch.sql.ast.tree.SubqueryAlias; -import org.opensearch.sql.ast.tree.TableFunction; -import org.opensearch.sql.ast.tree.Trendline; -import org.opensearch.sql.ast.tree.UnresolvedPlan; -import org.opensearch.sql.ast.tree.Window; +import org.opensearch.sql.ast.tree.*; import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.common.utils.StringUtils; import org.opensearch.sql.planner.logical.LogicalAggregation; @@ -305,6 +286,14 @@ public String visitEval(Eval node, String context) { return StringUtils.format("%s | eval %s", child, expressions); } + @Override + public String visitExpand(Expand node, String context) { + String child = node.getChild().getFirst().accept(this, context); + String field = visitExpression(node.getField()); + + return StringUtils.format("%s | expand %s", child, field); + } + /** Build {@link LogicalSort}. */ @Override public String visitSort(Sort node, String context) { From c537b92561e2af6a40f272e3a74a87a664c213e9 Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Thu, 5 Jun 2025 23:11:35 +0800 Subject: [PATCH 03/20] Add array json and index mapping Signed-off-by: Yuanchun Shen --- .../sql/calcite/CalciteRelNodeVisitor.java | 34 +++++++++++-------- ...andIT.java => CalciteExpandCommandIT.java} | 0 .../sql/legacy/SQLIntegTestCase.java | 8 ++++- .../org/opensearch/sql/legacy/TestUtils.java | 5 +++ .../opensearch/sql/legacy/TestsConstants.java | 1 + integ-test/src/test/resources/array.json | 4 +++ .../indexDefinitions/array_index_mapping.json | 18 ++++++++++ 7 files changed, 55 insertions(+), 15 deletions(-) rename integ-test/src/test/java/org/opensearch/sql/calcite/remote/{CalciteExpandIT.java => CalciteExpandCommandIT.java} (100%) create mode 100644 integ-test/src/test/resources/array.json create mode 100644 integ-test/src/test/resources/indexDefinitions/array_index_mapping.json diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 1c1ce94676f..fbbc6398897 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -842,34 +842,40 @@ public RelNode visitExpand(Expand expand, CalcitePlanContext context) { var relBuilder = context.relBuilder; - // 3. Get the field to expand + // 2. Get the field to expand Field arrayField = expand.getField(); - // 5. Unnest the array field + // 3. Unnest the array field // Analyze the array field to get its RexNode - RexNode arrayFieldRex = rexVisitor.analyze(arrayField, context); - + RexInputRef arrayFieldRex = (RexInputRef) rexVisitor.analyze(arrayField, context); // Push the original table to the RelBuilder stack RelNode originalTable = relBuilder.peek(); // No alias is provided in the expand command, so we remove the original array field, // then replace it with the unnest result. - relBuilder.projectExcept(arrayFieldRex); - relBuilder.push(originalTable); + // relBuilder.projectExcept(arrayFieldRex); - // Join on ROW_NUMBER_COLUMN_NAME + // Capture the outer row in a CorrelationId Holder correlVariable = Holder.empty(); relBuilder.variable(correlVariable::set); - relBuilder.project(List.of(arrayFieldRex), List.of(), false, List.of(correlVariable.get().id)); + relBuilder.push(originalTable); + + RexNode correlArrayField = + relBuilder.field( + context.rexBuilder.makeCorrel(originalTable.getRowType(), correlVariable.get().id), + arrayFieldRex.getIndex()); + + relBuilder.project( + List.of(correlArrayField), + List.of(arrayField.getField().toString()), + false, + List.of(correlVariable.get().id)); + // Alias is not supported in expand yet, we pass in an empty list relBuilder.uncollect(List.of(), false); - List allFields = - relBuilder.peek().getRowType().getFieldList().stream() - .map(f -> (RexNode) relBuilder.field(f.getName())) - .toList(); - - relBuilder.correlate(JoinRelType.INNER, correlVariable.get().id, relBuilder.fields()); + // ImmutableBitSet requiredFields = originalTable.getRowType().getFieldList() + relBuilder.correlate(JoinRelType.INNER, correlVariable.get().id, arrayFieldRex); return relBuilder.peek(); } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExpandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExpandCommandIT.java similarity index 100% rename from integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExpandIT.java rename to integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExpandCommandIT.java diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java index 7c82dbe8995..737187c579d 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java @@ -9,6 +9,7 @@ import static org.opensearch.sql.legacy.TestUtils.createIndexByRestClient; import static org.opensearch.sql.legacy.TestUtils.getAccountIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getAliasIndexMapping; +import static org.opensearch.sql.legacy.TestUtils.getArrayIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getBankIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getBankWithNullValuesIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getDataTypeNonnumericIndexMapping; @@ -888,7 +889,12 @@ public enum Index { "customer", "tpch", getTpchMappingFile("customer_index_mapping.json"), - "src/test/resources/tpch/data/customer.json"); + "src/test/resources/tpch/data/customer.json"), + ARRAY( + TestsConstants.TEST_INDEX_ARRAY, + "array", + getArrayIndexMapping(), + "src/test/resources/array.json"); private final String name; private final String type; diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java b/integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java index 5adca42e46c..b1f825726ae 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java @@ -295,6 +295,11 @@ public static String getDuplicationNullableIndexMapping() { return getMappingFile(mappingFile); } + public static String getArrayIndexMapping() { + String mappingFile = "array_index_mapping.json"; + return getMappingFile(mappingFile); + } + public static void loadBulk(Client client, String jsonPath, String defaultIndex) throws Exception { System.out.println(String.format("Loading file %s into opensearch cluster", jsonPath)); diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java b/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java index e2057ee1637..a57ab04f8a7 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java @@ -74,6 +74,7 @@ public class TestsConstants { public static final String TEST_INDEX_WORKER = TEST_INDEX + "_worker"; public static final String TEST_INDEX_WORK_INFORMATION = TEST_INDEX + "_work_information"; public static final String TEST_INDEX_DUPLICATION_NULLABLE = TEST_INDEX + "_duplication_nullable"; + public static final String TEST_INDEX_ARRAY = TEST_INDEX + "_array"; public static final String DATE_FORMAT = "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"; public static final String TS_DATE_FORMAT = "yyyy-MM-dd HH:mm:ss.SSS"; diff --git a/integ-test/src/test/resources/array.json b/integ-test/src/test/resources/array.json new file mode 100644 index 00000000000..70af5474786 --- /dev/null +++ b/integ-test/src/test/resources/array.json @@ -0,0 +1,4 @@ +{"index":{"_id":"1"}} +{"numbers":[1, 2],"strings": ["a", "b"]} +{"index":{"_id":"2"}} +{"numbers":[3, 4],"strings": ["c", "d", "e"]} diff --git a/integ-test/src/test/resources/indexDefinitions/array_index_mapping.json b/integ-test/src/test/resources/indexDefinitions/array_index_mapping.json new file mode 100644 index 00000000000..a6c93f29f21 --- /dev/null +++ b/integ-test/src/test/resources/indexDefinitions/array_index_mapping.json @@ -0,0 +1,18 @@ +{ + "mappings": { + "properties": { + "numbers": { + "type": "long" + }, + "strings": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + } + } + } +} From ae55697b71035b977a2b16f3dde7f6fb6896f605 Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Fri, 6 Jun 2025 10:15:30 +0800 Subject: [PATCH 04/20] WIP: Implementing expand command Signed-off-by: Yuanchun Shen --- .../sql/calcite/CalciteRelNodeVisitor.java | 25 ++++++++++--------- .../CalciteUnsupportedException.java | 4 +++ .../opensearch/sql/executor/QueryService.java | 2 +- .../remote/CalciteExpandCommandIT.java | 2 +- .../opensearch/sql/ppl/ExpandCommandIT.java | 17 ++++++++++--- .../executor/OpenSearchExecutionEngine.java | 15 ++++++++++- 6 files changed, 47 insertions(+), 18 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index fbbc6398897..f6a01f6fb4f 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -844,12 +844,10 @@ public RelNode visitExpand(Expand expand, CalcitePlanContext context) { // 2. Get the field to expand Field arrayField = expand.getField(); - // 3. Unnest the array field // Analyze the array field to get its RexNode RexInputRef arrayFieldRex = (RexInputRef) rexVisitor.analyze(arrayField, context); // Push the original table to the RelBuilder stack - RelNode originalTable = relBuilder.peek(); // No alias is provided in the expand command, so we remove the original array field, // then replace it with the unnest result. // relBuilder.projectExcept(arrayFieldRex); @@ -858,24 +856,27 @@ public RelNode visitExpand(Expand expand, CalcitePlanContext context) { Holder correlVariable = Holder.empty(); relBuilder.variable(correlVariable::set); - relBuilder.push(originalTable); - + // Push a copy of the original table to the RelBuilder stack as right + // side of the join. + relBuilder.push(relBuilder.peek()); RexNode correlArrayField = relBuilder.field( - context.rexBuilder.makeCorrel(originalTable.getRowType(), correlVariable.get().id), + context.rexBuilder.makeCorrel(relBuilder.peek().getRowType(), correlVariable.get().id), arrayFieldRex.getIndex()); - relBuilder.project( - List.of(correlArrayField), - List.of(arrayField.getField().toString()), - false, - List.of(correlVariable.get().id)); + // Filter rows where the array field is the same as the left side + // TODO: This is not a standard way to use correlate and uncollect together. + // Correct it in the future. + RexNode filterCondition = relBuilder.equals(correlArrayField, arrayFieldRex); + relBuilder.filter(filterCondition); + relBuilder.project(List.of(correlArrayField), List.of(arrayField.getField().toString())); // Alias is not supported in expand yet, we pass in an empty list relBuilder.uncollect(List.of(), false); - // ImmutableBitSet requiredFields = originalTable.getRowType().getFieldList() - relBuilder.correlate(JoinRelType.INNER, correlVariable.get().id, arrayFieldRex); + // The last parameter has to refer to the array to be expanded on the left side. It will + // be used by the right side to correlate with the left side. + relBuilder.correlate(JoinRelType.INNER, correlVariable.get().id, List.of(arrayFieldRex)); return relBuilder.peek(); } diff --git a/core/src/main/java/org/opensearch/sql/exception/CalciteUnsupportedException.java b/core/src/main/java/org/opensearch/sql/exception/CalciteUnsupportedException.java index 139e7590489..ad68c28e7fe 100644 --- a/core/src/main/java/org/opensearch/sql/exception/CalciteUnsupportedException.java +++ b/core/src/main/java/org/opensearch/sql/exception/CalciteUnsupportedException.java @@ -10,4 +10,8 @@ public class CalciteUnsupportedException extends QueryEngineException { public CalciteUnsupportedException(String message) { super(message); } + + public CalciteUnsupportedException(String message, Throwable cause) { + super(message, cause); + } } diff --git a/core/src/main/java/org/opensearch/sql/executor/QueryService.java b/core/src/main/java/org/opensearch/sql/executor/QueryService.java index a6a6f9c9520..8a9bafa9bf6 100644 --- a/core/src/main/java/org/opensearch/sql/executor/QueryService.java +++ b/core/src/main/java/org/opensearch/sql/executor/QueryService.java @@ -113,7 +113,7 @@ public void executeWithCalcite( } else { if (t instanceof Error) { // Calcite may throw AssertError during query execution. - listener.onFailure(new CalciteUnsupportedException(t.getMessage())); + listener.onFailure(new CalciteUnsupportedException(t.getMessage(), t)); } else { listener.onFailure((Exception) t); } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExpandCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExpandCommandIT.java index 8365cfd915e..11c3e31de8c 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExpandCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExpandCommandIT.java @@ -9,7 +9,7 @@ import org.opensearch.sql.ppl.ExpandCommandIT; -public class CalciteExpandIT extends ExpandCommandIT { +public class CalciteExpandCommandIT extends ExpandCommandIT { @Override public void init() throws Exception { super.init(); diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/ExpandCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/ExpandCommandIT.java index 08874daf549..dfea3ff893d 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/ExpandCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/ExpandCommandIT.java @@ -7,6 +7,7 @@ package org.opensearch.sql.ppl; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_ARRAY; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_NESTED_SIMPLE; import static org.opensearch.sql.util.MatcherUtils.schema; import static org.opensearch.sql.util.MatcherUtils.verifyNumOfRows; @@ -21,10 +22,11 @@ public class ExpandCommandIT extends PPLIntegTestCase { public void init() throws Exception { super.init(); loadIndex(Index.NESTED_SIMPLE); + loadIndex(Index.ARRAY); } @Test - public void testExpand() throws Exception { + public void testExpandOnNested() throws Exception { JSONObject response = executeQuery(String.format("source=%s | expand address", TEST_INDEX_NESTED_SIMPLE)); verifySchema( @@ -32,10 +34,19 @@ public void testExpand() throws Exception { schema("name", "string"), schema("age", "integer"), schema("id", "integer"), - schema("address", "object")); + schema("address", "struct")); verifyNumOfRows(response, 11); } + @Ignore + @Test + public void testExpandOnArray() throws Exception { + JSONObject response = + executeQuery(String.format("source=%s | expand strings", TEST_INDEX_ARRAY)); + verifySchema(response, schema("numbers", "array"), schema("strings", "string")); + verifyNumOfRows(response, 5); + } + // TODO: double check if expand with alias is supported @Ignore @Test @@ -48,7 +59,7 @@ public void testExpandWithAlias() throws Exception { schema("age", "integer"), schema("id", "integer"), schema("address", "array"), - schema("addr", "object")); + schema("addr", "struct")); verifyNumOfRows(response, 11); } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java index 6183aa109bf..4b51d83cf73 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java @@ -25,12 +25,14 @@ import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.runtime.Hook; import org.apache.calcite.sql.SqlExplainLevel; +import org.apache.calcite.sql.type.SqlTypeName; import org.opensearch.sql.ast.statement.Explain.ExplainFormat; import org.opensearch.sql.calcite.CalcitePlanContext; import org.opensearch.sql.calcite.utils.CalciteToolsHelper.OpenSearchRelRunners; import org.opensearch.sql.common.response.ResponseListener; import org.opensearch.sql.data.model.ExprTupleValue; import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.executor.ExecutionContext; import org.opensearch.sql.executor.ExecutionEngine; @@ -232,7 +234,18 @@ private void buildResultSet( for (int i = 1; i <= columnCount; ++i) { String columnName = metaData.getColumnName(i); RelDataType fieldType = fieldTypes.get(i - 1); - ExprType exprType = convertRelDataTypeToExprType(fieldType); + // The element type of struct and array is currently set to ANY. + // We set them using the runtime type as a workaround. + ExprType exprType; + if (fieldType.getSqlTypeName() == SqlTypeName.ANY) { + if (!values.isEmpty()) { + exprType = values.getFirst().tupleValue().get(columnName).type(); + } else { + exprType = ExprCoreType.UNDEFINED; + } + } else { + exprType = convertRelDataTypeToExprType(fieldType); + } columns.add(new Column(columnName, null, exprType)); } Schema schema = new Schema(columns); From 7003abb0ef1227532f1444827e75f01bcefa4b11 Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Fri, 6 Jun 2025 11:31:52 +0800 Subject: [PATCH 05/20] Implement a minimal viable version of expand Signed-off-by: Yuanchun Shen --- .../org/opensearch/sql/analysis/Analyzer.java | 7 ++ .../org/opensearch/sql/ast/tree/Expand.java | 2 +- .../sql/calcite/CalciteRelNodeVisitor.java | 27 ++++---- .../remote/CalciteExpandCommandIT.java | 56 +++++++++++++++- .../opensearch/sql/ppl/ExpandCommandIT.java | 65 ------------------- 5 files changed, 76 insertions(+), 81 deletions(-) delete mode 100644 integ-test/src/test/java/org/opensearch/sql/ppl/ExpandCommandIT.java diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index 662cacf28da..fee7c961edb 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -50,6 +50,7 @@ import org.opensearch.sql.ast.tree.CloseCursor; import org.opensearch.sql.ast.tree.Dedupe; import org.opensearch.sql.ast.tree.Eval; +import org.opensearch.sql.ast.tree.Expand; import org.opensearch.sql.ast.tree.FetchCursor; import org.opensearch.sql.ast.tree.FillNull; import org.opensearch.sql.ast.tree.Filter; @@ -623,6 +624,12 @@ public LogicalPlan visitML(ML node, AnalysisContext context) { return new LogicalML(child, node.getArguments()); } + @Override + public LogicalPlan visitExpand(Expand expand, AnalysisContext context) { + throw new UnsupportedOperationException( + "Expand is supported only when " + CALCITE_ENGINE_ENABLED.getKeyValue() + "=true"); + } + /** Build {@link LogicalTrendline} for Trendline command. */ @Override public LogicalPlan visitTrendline(Trendline node, AnalysisContext context) { diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Expand.java b/core/src/main/java/org/opensearch/sql/ast/tree/Expand.java index 77aaac885de..60e107d7151 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/Expand.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Expand.java @@ -30,7 +30,7 @@ public Expand attach(UnresolvedPlan child) { @Override public List getChild() { - return ImmutableList.of(child); + return this.child == null ? ImmutableList.of() : ImmutableList.of(this.child); } @Override diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index f6a01f6fb4f..640461eb30e 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -840,44 +840,45 @@ public RelNode visitExpand(Expand expand, CalcitePlanContext context) { // 1. Visit Children visitChildren(expand, context); - var relBuilder = context.relBuilder; + RelBuilder relBuilder = context.relBuilder; // 2. Get the field to expand Field arrayField = expand.getField(); - // 3. Unnest the array field - // Analyze the array field to get its RexNode RexInputRef arrayFieldRex = (RexInputRef) rexVisitor.analyze(arrayField, context); - // Push the original table to the RelBuilder stack - // No alias is provided in the expand command, so we remove the original array field, - // then replace it with the unnest result. - // relBuilder.projectExcept(arrayFieldRex); - // Capture the outer row in a CorrelationId + // 3. Capture the outer row in a CorrelationId Holder correlVariable = Holder.empty(); relBuilder.variable(correlVariable::set); - // Push a copy of the original table to the RelBuilder stack as right - // side of the join. + // 4. Push a copy of the original table to the RelBuilder stack as right + // side of the correlate (join). relBuilder.push(relBuilder.peek()); RexNode correlArrayField = relBuilder.field( context.rexBuilder.makeCorrel(relBuilder.peek().getRowType(), correlVariable.get().id), arrayFieldRex.getIndex()); - // Filter rows where the array field is the same as the left side + // 5. Filter rows where the array field is the same as the left side // TODO: This is not a standard way to use correlate and uncollect together. - // Correct it in the future. + // A filter should not be necessary. Correct it in the future. RexNode filterCondition = relBuilder.equals(correlArrayField, arrayFieldRex); relBuilder.filter(filterCondition); + + // 6. Project only the array field for the uncollect operation relBuilder.project(List.of(correlArrayField), List.of(arrayField.getField().toString())); - // Alias is not supported in expand yet, we pass in an empty list + // 7. Expand the array field using uncollect relBuilder.uncollect(List.of(), false); + // 8. Perform a nested-loop join (correlate) between the original table and the expanded + // array field. // The last parameter has to refer to the array to be expanded on the left side. It will // be used by the right side to correlate with the left side. relBuilder.correlate(JoinRelType.INNER, correlVariable.get().id, List.of(arrayFieldRex)); + // 8. Remove the original array field from the output. No alias is currently supported in the + // expand command, so it can be safely deleted. Its name is re-used for the expanded element. + relBuilder.projectExcept(arrayFieldRex); return relBuilder.peek(); } } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExpandCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExpandCommandIT.java index 11c3e31de8c..76e34e322a3 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExpandCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExpandCommandIT.java @@ -7,13 +7,65 @@ package org.opensearch.sql.calcite.remote; -import org.opensearch.sql.ppl.ExpandCommandIT; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_ARRAY; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_NESTED_SIMPLE; +import static org.opensearch.sql.util.MatcherUtils.schema; +import static org.opensearch.sql.util.MatcherUtils.verifyNumOfRows; +import static org.opensearch.sql.util.MatcherUtils.verifySchema; -public class CalciteExpandCommandIT extends ExpandCommandIT { +import org.json.JSONObject; +import org.junit.Ignore; +import org.junit.jupiter.api.Test; +import org.opensearch.sql.ppl.PPLIntegTestCase; + +public class CalciteExpandCommandIT extends PPLIntegTestCase { @Override public void init() throws Exception { super.init(); + loadIndex(Index.NESTED_SIMPLE); + loadIndex(Index.ARRAY); enableCalcite(); disallowCalciteFallback(); } + + @Test + public void testExpandOnNested() throws Exception { + JSONObject response = + executeQuery(String.format("source=%s | expand address", TEST_INDEX_NESTED_SIMPLE)); + verifySchema( + response, + schema("name", "string"), + schema("age", "bigint"), + schema("id", "bigint"), + schema("address", "struct")); + verifyNumOfRows(response, 11); + } + + // TODO: confirm if expand on array (instead of nested) will be supported. + // In Opensearch, a string field can store either a single string or an array of strings. + // This makes it difficult to implement expand on array. + @Ignore + @Test + public void testExpandOnArray() throws Exception { + JSONObject response = + executeQuery(String.format("source=%s | expand strings", TEST_INDEX_ARRAY)); + verifySchema(response, schema("numbers", "array"), schema("strings", "string")); + verifyNumOfRows(response, 5); + } + + // TODO: confirm if expand with alias will be supported + @Ignore + @Test + public void testExpandWithAlias() throws Exception { + JSONObject response = + executeQuery(String.format("source=%s | expand address as addr", TEST_INDEX_NESTED_SIMPLE)); + verifySchema( + response, + schema("name", "string"), + schema("age", "integer"), + schema("id", "integer"), + schema("address", "array"), + schema("addr", "struct")); + verifyNumOfRows(response, 11); + } } diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/ExpandCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/ExpandCommandIT.java deleted file mode 100644 index dfea3ff893d..00000000000 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/ExpandCommandIT.java +++ /dev/null @@ -1,65 +0,0 @@ -/* - * - * * Copyright OpenSearch Contributors - * * SPDX-License-Identifier: Apache-2.0 - * - */ - -package org.opensearch.sql.ppl; - -import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_ARRAY; -import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_NESTED_SIMPLE; -import static org.opensearch.sql.util.MatcherUtils.schema; -import static org.opensearch.sql.util.MatcherUtils.verifyNumOfRows; -import static org.opensearch.sql.util.MatcherUtils.verifySchema; - -import org.json.JSONObject; -import org.junit.Ignore; -import org.junit.jupiter.api.Test; - -public class ExpandCommandIT extends PPLIntegTestCase { - @Override - public void init() throws Exception { - super.init(); - loadIndex(Index.NESTED_SIMPLE); - loadIndex(Index.ARRAY); - } - - @Test - public void testExpandOnNested() throws Exception { - JSONObject response = - executeQuery(String.format("source=%s | expand address", TEST_INDEX_NESTED_SIMPLE)); - verifySchema( - response, - schema("name", "string"), - schema("age", "integer"), - schema("id", "integer"), - schema("address", "struct")); - verifyNumOfRows(response, 11); - } - - @Ignore - @Test - public void testExpandOnArray() throws Exception { - JSONObject response = - executeQuery(String.format("source=%s | expand strings", TEST_INDEX_ARRAY)); - verifySchema(response, schema("numbers", "array"), schema("strings", "string")); - verifyNumOfRows(response, 5); - } - - // TODO: double check if expand with alias is supported - @Ignore - @Test - public void testExpandWithAlias() throws Exception { - JSONObject response = - executeQuery(String.format("source=%s | expand address as addr", TEST_INDEX_NESTED_SIMPLE)); - verifySchema( - response, - schema("name", "string"), - schema("age", "integer"), - schema("id", "integer"), - schema("address", "array"), - schema("addr", "struct")); - verifyNumOfRows(response, 11); - } -} From 9cb2ccbdbbfbf148e7e0e39b1dd0279db3057593 Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Fri, 6 Jun 2025 15:51:29 +0800 Subject: [PATCH 06/20] Support expand with alias Signed-off-by: Yuanchun Shen --- .../java/org/opensearch/sql/ast/dsl/AstDSL.java | 4 ++-- .../org/opensearch/sql/ast/tree/Expand.java | 2 ++ .../sql/calcite/CalciteRelNodeVisitor.java | 17 +++++++++++++---- .../calcite/remote/CalciteExpandCommandIT.java | 7 ++----- ppl/src/main/antlr/OpenSearchPPLParser.g4 | 2 +- .../opensearch/sql/ppl/parser/AstBuilder.java | 3 ++- 6 files changed, 22 insertions(+), 13 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java index 3f088ed7be0..2fada818223 100644 --- a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java +++ b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java @@ -97,8 +97,8 @@ public static Eval eval(UnresolvedPlan input, Let... projectList) { return new Eval(Arrays.asList(projectList)).attach(input); } - public Expand expand(UnresolvedPlan input, Field field) { - return new Expand(field).attach(input); + public Expand expand(UnresolvedPlan input, Field field, String alias) { + return new Expand(field, alias).attach(input); } public static UnresolvedPlan projectWithArg( diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Expand.java b/core/src/main/java/org/opensearch/sql/ast/tree/Expand.java index 60e107d7151..7841c708b0c 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/Expand.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Expand.java @@ -7,6 +7,7 @@ import com.google.common.collect.ImmutableList; import java.util.List; +import javax.annotation.Nullable; import lombok.Getter; import lombok.RequiredArgsConstructor; import lombok.ToString; @@ -21,6 +22,7 @@ public class Expand extends UnresolvedPlan { private UnresolvedPlan child; @Getter private final Field field; + @Getter @Nullable private final String alias; @Override public Expand attach(UnresolvedPlan child) { diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 640461eb30e..4a837ed78e3 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -842,9 +842,10 @@ public RelNode visitExpand(Expand expand, CalcitePlanContext context) { RelBuilder relBuilder = context.relBuilder; - // 2. Get the field to expand + // 2. Get the field to expand and an optional alias. Field arrayField = expand.getField(); RexInputRef arrayFieldRex = (RexInputRef) rexVisitor.analyze(arrayField, context); + String alias = expand.getAlias(); // 3. Capture the outer row in a CorrelationId Holder correlVariable = Holder.empty(); @@ -860,7 +861,7 @@ public RelNode visitExpand(Expand expand, CalcitePlanContext context) { // 5. Filter rows where the array field is the same as the left side // TODO: This is not a standard way to use correlate and uncollect together. - // A filter should not be necessary. Correct it in the future. + // A filter should not be necessary. Correct it in the future. RexNode filterCondition = relBuilder.equals(correlArrayField, arrayFieldRex); relBuilder.filter(filterCondition); @@ -876,9 +877,17 @@ public RelNode visitExpand(Expand expand, CalcitePlanContext context) { // be used by the right side to correlate with the left side. relBuilder.correlate(JoinRelType.INNER, correlVariable.get().id, List.of(arrayFieldRex)); - // 8. Remove the original array field from the output. No alias is currently supported in the - // expand command, so it can be safely deleted. Its name is re-used for the expanded element. + // 8. Remove the original array field from the output. + // TODO: RFC: should we keep the original array field when alias is present? relBuilder.projectExcept(arrayFieldRex); + if (alias != null) { + // Sub-nested fields cannot be removed after renaming the nested field. + tryToRemoveNestedFields(context); + RexInputRef expandedField = relBuilder.field(arrayField.getField().toString()); + List names = new ArrayList<>(relBuilder.peek().getRowType().getFieldNames()); + names.set(expandedField.getIndex(), alias); + relBuilder.rename(names); + } return relBuilder.peek(); } } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExpandCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExpandCommandIT.java index 76e34e322a3..5ab077759a5 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExpandCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExpandCommandIT.java @@ -53,8 +53,6 @@ public void testExpandOnArray() throws Exception { verifyNumOfRows(response, 5); } - // TODO: confirm if expand with alias will be supported - @Ignore @Test public void testExpandWithAlias() throws Exception { JSONObject response = @@ -62,9 +60,8 @@ public void testExpandWithAlias() throws Exception { verifySchema( response, schema("name", "string"), - schema("age", "integer"), - schema("id", "integer"), - schema("address", "array"), + schema("age", "bigint"), + schema("id", "bigint"), schema("addr", "struct")); verifyNumOfRows(response, 11); } diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 5f85f647738..7fa19fea237 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -240,7 +240,7 @@ trendlineType ; expandCommand - : EXPAND fieldExpression + : EXPAND fieldExpression (AS alias = qualifiedName)? ; kmeansCommand diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index 3cb352c6e55..37ee5cf31a3 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -419,7 +419,8 @@ public UnresolvedPlan visitTopCommand(OpenSearchPPLParser.TopCommandContext ctx) @Override public UnresolvedPlan visitExpandCommand(OpenSearchPPLParser.ExpandCommandContext ctx) { Field fieldExpression = (Field) internalVisitExpression(ctx.fieldExpression()); - return new Expand(fieldExpression); + String alias = ctx.alias != null ? internalVisitExpression(ctx.alias).toString() : null; + return new Expand(fieldExpression, alias); } @Override From 08b5c430e23cecef9de876c47dca94d39740bd09 Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Fri, 6 Jun 2025 16:15:30 +0800 Subject: [PATCH 07/20] Add doc for expand Signed-off-by: Yuanchun Shen --- docs/user/ppl/cmd/expand.rst | 46 ++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 docs/user/ppl/cmd/expand.rst diff --git a/docs/user/ppl/cmd/expand.rst b/docs/user/ppl/cmd/expand.rst new file mode 100644 index 00000000000..af89b847c7e --- /dev/null +++ b/docs/user/ppl/cmd/expand.rst @@ -0,0 +1,46 @@ +PPL ``expand`` command +======================= + +Description +----------- + +Using ``expand`` command to flatten an array of nested type. + +Syntax +------ + +``expand [As alias]`` + +- *field*: The field to be expanded (exploded). Currently only nested arrays are supported. +- *alias*: (Optional) The name to use instead of the original field name. + +Usage Guidelines +---------------- + +The expand command produces a row for each element in the specified array or map field, where: + +- Array elements become individual rows. +- When an alias is provided, the exploded values are represented under the alias instead of the original field name. +- This can be used in combination with other commands, such as ``stats``, ``eval``, and ``parse`` to manipulate or extract data post-expansion. + +Examples +-------- + +Given a dataset ``move`` with the following data: +``` +{"name":"abbas","age":24,"address":[{"city":"New york city","state":"NY","moveInDate":{"dateAndTime":"19840412T090742.000Z"}}]} +{"name":"chen","age":32,"address":[{"city":"Miami","state":"Florida","moveInDate":{"dateAndTime":"19010811T040333.000Z"}},{"city":"los angeles","state":"CA","moveInDate":{"dateAndTime":"20230503T080742.000Z"}}]} +``` +The following query expand the address field and rename it to addr: + +PPL query:: + + PPL> source=move | expand address as addr; + fetched rows / total rows = 3/3 + +-------+-----+-------------------------------------------------------------------------------------------+ + | name | age | addr | + |-------+-----+-------------------------------------------------------------------------------------------| + | abbas | 24 | {"city":"New york city","state":"NY","moveInDate":{"dateAndTime":"19840412T090742.000Z"}} | + | chen | 32 | {"city":"Miami","state":"Florida","moveInDate":{"dateAndTime":"19010811T040333.000Z"}} | + | chen | 32 | {"city":"los angeles","state":"CA","moveInDate":{"dateAndTime":"20230503T080742.000Z"}} | + +-------+-----+-------------------------------------------------------------------------------------------+ From 10f81b006a4a94318acfacb9b46b4fb34b6cd8e9 Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Fri, 6 Jun 2025 16:58:44 +0800 Subject: [PATCH 08/20] WIP: add unit tests for expand command Signed-off-by: Yuanchun Shen --- .../sql/ppl/calcite/CalcitePPLExpandTest.java | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLExpandTest.java diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLExpandTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLExpandTest.java new file mode 100644 index 00000000000..7093e8656dc --- /dev/null +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLExpandTest.java @@ -0,0 +1,22 @@ +/* + * + * * Copyright OpenSearch Contributors + * * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.opensearch.sql.ppl.calcite; + +import org.apache.calcite.test.CalciteAssert; +import org.junit.Test; + +public class CalcitePPLExpandTest extends CalcitePPLAbstractTest { + public CalcitePPLExpandTest() { + super(CalciteAssert.SchemaSpec.SCOTT_WITH_TEMPORAL); + } + + @Test + public void testExpand() { + String ppl = "source=EMP | expand JOB"; + } +} From 4587b5da601764bc8b4fd2ebcd953b4749f8cac8 Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Sat, 7 Jun 2025 23:51:43 +0800 Subject: [PATCH 09/20] Remove unused logical expand & add test cases Signed-off-by: Yuanchun Shen --- .../org/opensearch/sql/ast/dsl/AstDSL.java | 23 +- .../org/opensearch/sql/ast/tree/Expand.java | 3 +- .../sql/calcite/CalciteRelNodeVisitor.java | 2 +- .../sql/planner/logical/LogicalExpand.java | 31 --- .../logical/LogicalPlanNodeVisitor.java | 4 - .../remote/CalciteExpandCommandIT.java | 215 +++++++++++++++++- .../executor/OpenSearchExecutionEngine.java | 1 + 7 files changed, 239 insertions(+), 40 deletions(-) delete mode 100644 core/src/main/java/org/opensearch/sql/planner/logical/LogicalExpand.java diff --git a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java index 8c038f91e99..4c7736b7f24 100644 --- a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java +++ b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java @@ -49,9 +49,30 @@ import org.opensearch.sql.ast.expression.When; import org.opensearch.sql.ast.expression.WindowFunction; import org.opensearch.sql.ast.expression.Xor; -import org.opensearch.sql.ast.tree.*; +import org.opensearch.sql.ast.tree.Aggregation; +import org.opensearch.sql.ast.tree.Dedupe; +import org.opensearch.sql.ast.tree.DescribeRelation; +import org.opensearch.sql.ast.tree.Eval; +import org.opensearch.sql.ast.tree.Expand; +import org.opensearch.sql.ast.tree.FillNull; +import org.opensearch.sql.ast.tree.Filter; +import org.opensearch.sql.ast.tree.Head; +import org.opensearch.sql.ast.tree.Limit; +import org.opensearch.sql.ast.tree.Parse; +import org.opensearch.sql.ast.tree.Patterns; +import org.opensearch.sql.ast.tree.Project; +import org.opensearch.sql.ast.tree.RareTopN; import org.opensearch.sql.ast.tree.RareTopN.CommandType; +import org.opensearch.sql.ast.tree.Relation; +import org.opensearch.sql.ast.tree.RelationSubquery; +import org.opensearch.sql.ast.tree.Rename; +import org.opensearch.sql.ast.tree.Sort; import org.opensearch.sql.ast.tree.Sort.SortOption; +import org.opensearch.sql.ast.tree.SubqueryAlias; +import org.opensearch.sql.ast.tree.TableFunction; +import org.opensearch.sql.ast.tree.Trendline; +import org.opensearch.sql.ast.tree.UnresolvedPlan; +import org.opensearch.sql.ast.tree.Values; /** Class of static methods to create specific node instances. */ @UtilityClass diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Expand.java b/core/src/main/java/org/opensearch/sql/ast/tree/Expand.java index 7841c708b0c..02e8402aaf2 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/Expand.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Expand.java @@ -8,6 +8,7 @@ import com.google.common.collect.ImmutableList; import java.util.List; import javax.annotation.Nullable; +import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.RequiredArgsConstructor; import lombok.ToString; @@ -15,9 +16,9 @@ import org.opensearch.sql.ast.expression.Field; /** AST node representing an {@code expand } operation. */ -@Getter @ToString @RequiredArgsConstructor +@EqualsAndHashCode(callSuper = false) public class Expand extends UnresolvedPlan { private UnresolvedPlan child; diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 37499866896..79534f4220b 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -1032,7 +1032,7 @@ public RelNode visitExpand(Expand expand, CalcitePlanContext context) { // be used by the right side to correlate with the left side. relBuilder.correlate(JoinRelType.INNER, correlVariable.get().id, List.of(arrayFieldRex)); - // 8. Remove the original array field from the output. + // 9. Remove the original array field from the output. // TODO: RFC: should we keep the original array field when alias is present? relBuilder.projectExcept(arrayFieldRex); if (alias != null) { diff --git a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalExpand.java b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalExpand.java deleted file mode 100644 index 690881bf733..00000000000 --- a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalExpand.java +++ /dev/null @@ -1,31 +0,0 @@ -/* - * - * * Copyright OpenSearch Contributors - * * SPDX-License-Identifier: Apache-2.0 - * - */ - -package org.opensearch.sql.planner.logical; - -import java.util.Collections; -import lombok.EqualsAndHashCode; -import lombok.Getter; -import lombok.ToString; -import org.opensearch.sql.expression.Expression; - -@ToString -@EqualsAndHashCode(callSuper = true) -public class LogicalExpand extends LogicalPlan { - - @Getter private final Expression field; - - public LogicalExpand(LogicalPlan child, Expression field) { - super(Collections.singletonList(child)); - this.field = field; - } - - @Override - public R accept(LogicalPlanNodeVisitor visitor, C context) { - return visitor.visitExpand(this, context); - } -} diff --git a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitor.java b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitor.java index 4ba2459fad4..c9eedd8efc8 100644 --- a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitor.java @@ -104,10 +104,6 @@ public R visitAD(LogicalAD plan, C context) { return visitNode(plan, context); } - public R visitExpand(LogicalExpand plan, C context) { - return visitNode(plan, context); - } - public R visitTrendline(LogicalTrendline plan, C context) { return visitNode(plan, context); } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExpandCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExpandCommandIT.java index 5ab077759a5..b4aa1449d16 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExpandCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExpandCommandIT.java @@ -9,10 +9,13 @@ import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_ARRAY; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_NESTED_SIMPLE; +import static org.opensearch.sql.util.MatcherUtils.rows; import static org.opensearch.sql.util.MatcherUtils.schema; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; import static org.opensearch.sql.util.MatcherUtils.verifyNumOfRows; import static org.opensearch.sql.util.MatcherUtils.verifySchema; +import org.json.JSONArray; import org.json.JSONObject; import org.junit.Ignore; import org.junit.jupiter.api.Test; @@ -39,11 +42,108 @@ public void testExpandOnNested() throws Exception { schema("id", "bigint"), schema("address", "struct")); verifyNumOfRows(response, 11); + verifyDataRows( + response, + rows( + "abbas", + null, + 24, + new JSONObject() + .put("city", "New york city") + .put("state", "NY") + .put("moveInDate", new JSONObject().put("dateAndTime", "1984-04-12 09:07:42"))), + rows( + "abbas", + null, + 24, + new JSONObject() + .put("city", "bellevue") + .put("state", "WA") + .put( + "moveInDate", + new JSONArray() + .put(new JSONObject().put("dateAndTime", "2023-05-03 08:07:42")) + .put(new JSONObject().put("dateAndTime", "2001-11-11 04:07:44")))), + rows( + "abbas", + null, + 24, + new JSONObject() + .put("city", "seattle") + .put("state", "WA") + .put("moveInDate", new JSONObject().put("dateAndTime", "1966-03-19 03:04:55"))), + rows( + "abbas", + null, + 24, + new JSONObject() + .put("city", "chicago") + .put("state", "IL") + .put("moveInDate", new JSONObject().put("dateAndTime", "2011-06-01 01:01:42"))), + rows( + "chen", + null, + 32, + new JSONObject() + .put("city", "Miami") + .put("state", "Florida") + .put("moveInDate", new JSONObject().put("dateAndTime", "1901-08-11 04:03:33"))), + rows( + "chen", + null, + 32, + new JSONObject() + .put("city", "los angeles") + .put("state", "CA") + .put("moveInDate", new JSONObject().put("dateAndTime", "2023-05-03 08:07:42"))), + rows( + "peng", + null, + 26, + new JSONObject() + .put("city", "san diego") + .put("state", "CA") + .put("moveInDate", new JSONObject().put("dateAndTime", "2001-11-11 04:07:44"))), + rows( + "peng", + null, + 26, + new JSONObject() + .put("city", "austin") + .put("state", "TX") + .put("moveInDate", new JSONObject().put("dateAndTime", "1977-07-13 09:04:41"))), + rows( + "andy", + 4, + 19, + new JSONObject() + .put("city", "houston") + .put("state", "TX") + .put("moveInDate", new JSONObject().put("dateAndTime", "1933-12-12 05:05:45"))), + rows( + "david", + null, + 25, + new JSONObject() + .put("city", "raleigh") + .put("state", "NC") + .put("moveInDate", new JSONObject().put("dateAndTime", "1909-06-17 01:04:21"))), + rows( + "david", + null, + 25, + new JSONObject() + .put("city", "charlotte") + .put("state", "SC") + .put( + "moveInDate", + new JSONArray() + .put(new JSONObject().put("dateAndTime", "2001-11-11 04:07:44"))))); } - // TODO: confirm if expand on array (instead of nested) will be supported. + // To consider in future releases: will expand on array (instead of nested) be supported. // In Opensearch, a string field can store either a single string or an array of strings. - // This makes it difficult to implement expand on array. + // This makes it difficult to implement expand on arries. @Ignore @Test public void testExpandOnArray() throws Exception { @@ -63,6 +163,117 @@ public void testExpandWithAlias() throws Exception { schema("age", "bigint"), schema("id", "bigint"), schema("addr", "struct")); + verifyDataRows( + response, + rows( + "abbas", + null, + 24, + new JSONObject() + .put("city", "New york city") + .put("state", "NY") + .put("moveInDate", new JSONObject().put("dateAndTime", "1984-04-12 09:07:42"))), + rows( + "abbas", + null, + 24, + new JSONObject() + .put("city", "bellevue") + .put("state", "WA") + .put( + "moveInDate", + new JSONArray() + .put(new JSONObject().put("dateAndTime", "2023-05-03 08:07:42")) + .put(new JSONObject().put("dateAndTime", "2001-11-11 04:07:44")))), + rows( + "abbas", + null, + 24, + new JSONObject() + .put("city", "seattle") + .put("state", "WA") + .put("moveInDate", new JSONObject().put("dateAndTime", "1966-03-19 03:04:55"))), + rows( + "abbas", + null, + 24, + new JSONObject() + .put("city", "chicago") + .put("state", "IL") + .put("moveInDate", new JSONObject().put("dateAndTime", "2011-06-01 01:01:42"))), + rows( + "chen", + null, + 32, + new JSONObject() + .put("city", "Miami") + .put("state", "Florida") + .put("moveInDate", new JSONObject().put("dateAndTime", "1901-08-11 04:03:33"))), + rows( + "chen", + null, + 32, + new JSONObject() + .put("city", "los angeles") + .put("state", "CA") + .put("moveInDate", new JSONObject().put("dateAndTime", "2023-05-03 08:07:42"))), + rows( + "peng", + null, + 26, + new JSONObject() + .put("city", "san diego") + .put("state", "CA") + .put("moveInDate", new JSONObject().put("dateAndTime", "2001-11-11 04:07:44"))), + rows( + "peng", + null, + 26, + new JSONObject() + .put("city", "austin") + .put("state", "TX") + .put("moveInDate", new JSONObject().put("dateAndTime", "1977-07-13 09:04:41"))), + rows( + "andy", + 4, + 19, + new JSONObject() + .put("city", "houston") + .put("state", "TX") + .put("moveInDate", new JSONObject().put("dateAndTime", "1933-12-12 05:05:45"))), + rows( + "david", + null, + 25, + new JSONObject() + .put("city", "raleigh") + .put("state", "NC") + .put("moveInDate", new JSONObject().put("dateAndTime", "1909-06-17 01:04:21"))), + rows( + "david", + null, + 25, + new JSONObject() + .put("city", "charlotte") + .put("state", "SC") + .put( + "moveInDate", + new JSONArray() + .put(new JSONObject().put("dateAndTime", "2001-11-11 04:07:44"))))); + } + + @Test + public void testExpandWithEval() throws Exception { + JSONObject response = + executeQuery( + String.format("source=%s | eval addr=address | expand addr", TEST_INDEX_NESTED_SIMPLE)); + verifySchema( + response, + schema("name", "string"), + schema("age", "bigint"), + schema("address", "array"), + schema("id", "bigint"), + schema("addr", "struct")); verifyNumOfRows(response, 11); } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java index 4b51d83cf73..2b7c5b530df 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java @@ -241,6 +241,7 @@ private void buildResultSet( if (!values.isEmpty()) { exprType = values.getFirst().tupleValue().get(columnName).type(); } else { + // Using UNDEFINED instead of UNKNOWN to avoid throwing exception exprType = ExprCoreType.UNDEFINED; } } else { From 37bfac3df249bd5297544ed766d1d85c4a816e42 Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Sun, 8 Jun 2025 09:49:24 +0800 Subject: [PATCH 10/20] Use left join in expand to keep documents where their expanded array field is empty Signed-off-by: Yuanchun Shen --- .../sql/calcite/CalciteRelNodeVisitor.java | 4 +- .../remote/CalciteExpandCommandIT.java | 124 +++++++++++++++++- 2 files changed, 126 insertions(+), 2 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 79534f4220b..c20580e2d29 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -1030,7 +1030,9 @@ public RelNode visitExpand(Expand expand, CalcitePlanContext context) { // array field. // The last parameter has to refer to the array to be expanded on the left side. It will // be used by the right side to correlate with the left side. - relBuilder.correlate(JoinRelType.INNER, correlVariable.get().id, List.of(arrayFieldRex)); + // Using left join to keep the records where the array field is empty. The corresponding + // field in the result will be null. + relBuilder.correlate(JoinRelType.LEFT, correlVariable.get().id, List.of(arrayFieldRex)); // 9. Remove the original array field from the output. // TODO: RFC: should we keep the original array field when alias is present? diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExpandCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExpandCommandIT.java index b4aa1449d16..87374967e42 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExpandCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExpandCommandIT.java @@ -19,6 +19,7 @@ import org.json.JSONObject; import org.junit.Ignore; import org.junit.jupiter.api.Test; +import org.opensearch.client.Request; import org.opensearch.sql.ppl.PPLIntegTestCase; public class CalciteExpandCommandIT extends PPLIntegTestCase { @@ -41,7 +42,6 @@ public void testExpandOnNested() throws Exception { schema("age", "bigint"), schema("id", "bigint"), schema("address", "struct")); - verifyNumOfRows(response, 11); verifyDataRows( response, rows( @@ -276,4 +276,126 @@ public void testExpandWithEval() throws Exception { schema("addr", "struct")); verifyNumOfRows(response, 11); } + + @Test + public void testExpandEmptyArray() throws Exception { + final int docId = 6; + Request insertRequest = + new Request( + "PUT", String.format("/%s/_doc/%d?refresh=true", TEST_INDEX_NESTED_SIMPLE, docId)); + insertRequest.setJsonEntity("{\"name\":\"ben\",\"age\":47, \"id\": 437821, \"address\":[]}\n"); + client().performRequest(insertRequest); + + JSONObject response = + executeQuery(String.format("source=%s | expand address", TEST_INDEX_NESTED_SIMPLE)); + verifySchema( + response, + schema("name", "string"), + schema("age", "bigint"), + schema("id", "bigint"), + schema("address", "struct")); + verifyDataRows( + response, + rows( + "abbas", + null, + 24, + new JSONObject() + .put("city", "New york city") + .put("state", "NY") + .put("moveInDate", new JSONObject().put("dateAndTime", "1984-04-12 09:07:42"))), + rows( + "abbas", + null, + 24, + new JSONObject() + .put("city", "bellevue") + .put("state", "WA") + .put( + "moveInDate", + new JSONArray() + .put(new JSONObject().put("dateAndTime", "2023-05-03 08:07:42")) + .put(new JSONObject().put("dateAndTime", "2001-11-11 04:07:44")))), + rows( + "abbas", + null, + 24, + new JSONObject() + .put("city", "seattle") + .put("state", "WA") + .put("moveInDate", new JSONObject().put("dateAndTime", "1966-03-19 03:04:55"))), + rows( + "abbas", + null, + 24, + new JSONObject() + .put("city", "chicago") + .put("state", "IL") + .put("moveInDate", new JSONObject().put("dateAndTime", "2011-06-01 01:01:42"))), + rows( + "chen", + null, + 32, + new JSONObject() + .put("city", "Miami") + .put("state", "Florida") + .put("moveInDate", new JSONObject().put("dateAndTime", "1901-08-11 04:03:33"))), + rows( + "chen", + null, + 32, + new JSONObject() + .put("city", "los angeles") + .put("state", "CA") + .put("moveInDate", new JSONObject().put("dateAndTime", "2023-05-03 08:07:42"))), + rows( + "peng", + null, + 26, + new JSONObject() + .put("city", "san diego") + .put("state", "CA") + .put("moveInDate", new JSONObject().put("dateAndTime", "2001-11-11 04:07:44"))), + rows( + "peng", + null, + 26, + new JSONObject() + .put("city", "austin") + .put("state", "TX") + .put("moveInDate", new JSONObject().put("dateAndTime", "1977-07-13 09:04:41"))), + rows( + "andy", + 4, + 19, + new JSONObject() + .put("city", "houston") + .put("state", "TX") + .put("moveInDate", new JSONObject().put("dateAndTime", "1933-12-12 05:05:45"))), + rows( + "david", + null, + 25, + new JSONObject() + .put("city", "raleigh") + .put("state", "NC") + .put("moveInDate", new JSONObject().put("dateAndTime", "1909-06-17 01:04:21"))), + rows( + "david", + null, + 25, + new JSONObject() + .put("city", "charlotte") + .put("state", "SC") + .put( + "moveInDate", + new JSONArray() + .put(new JSONObject().put("dateAndTime", "2001-11-11 04:07:44")))), + rows("ben", 437821, 47, null)); + + verifyNumOfRows(response, 12); + Request deleteRequest = + new Request("DELETE", String.format("/%s/_doc/%d", TEST_INDEX_NESTED_SIMPLE, docId)); + client().performRequest(deleteRequest); + } } From 776630a951c9d94e7e50bd9e7087602a5115438b Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Sun, 8 Jun 2025 11:01:21 +0800 Subject: [PATCH 11/20] Add unit test for expand command Signed-off-by: Yuanchun Shen --- .../sql/ppl/calcite/CalcitePPLExpandTest.java | 122 +++++++++++++++++- 1 file changed, 121 insertions(+), 1 deletion(-) diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLExpandTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLExpandTest.java index 7093e8656dc..cd2763c7101 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLExpandTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLExpandTest.java @@ -7,16 +7,136 @@ package org.opensearch.sql.ppl.calcite; +import com.google.common.collect.ImmutableList; +import java.util.List; +import org.apache.calcite.config.CalciteConnectionConfig; +import org.apache.calcite.plan.RelTraitDef; +import org.apache.calcite.rel.RelCollations; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rel.type.RelProtoDataType; +import org.apache.calcite.schema.Schema; +import org.apache.calcite.schema.SchemaPlus; +import org.apache.calcite.schema.Statistic; +import org.apache.calcite.schema.Statistics; +import org.apache.calcite.schema.Table; +import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.parser.SqlParser; +import org.apache.calcite.sql.type.SqlTypeName; import org.apache.calcite.test.CalciteAssert; +import org.apache.calcite.tools.Frameworks; +import org.apache.calcite.tools.Programs; +import org.checkerframework.checker.nullness.qual.Nullable; import org.junit.Test; public class CalcitePPLExpandTest extends CalcitePPLAbstractTest { + public CalcitePPLExpandTest() { super(CalciteAssert.SchemaSpec.SCOTT_WITH_TEMPORAL); } + // There is no existing table with arrays. We create one for test purpose. + public static class TableWithArray implements Table { + protected final RelProtoDataType protoRowType = + factory -> + factory + .builder() + .add("DEPTNO", SqlTypeName.INTEGER) + .add( + "EMPNOS", + factory.createArrayType(factory.createSqlType(SqlTypeName.INTEGER), -1)) + .build(); + + @Override + public RelDataType getRowType(RelDataTypeFactory typeFactory) { + return protoRowType.apply(typeFactory); + } + + @Override + public Statistic getStatistic() { + return Statistics.of(0d, ImmutableList.of(), RelCollations.createSingleton(0)); + } + + @Override + public Schema.TableType getJdbcTableType() { + return Schema.TableType.TABLE; + } + + @Override + public boolean isRolledUp(String column) { + return false; + } + + @Override + public boolean rolledUpColumnValidInsideAgg( + String column, + SqlCall call, + @Nullable SqlNode parent, + @Nullable CalciteConnectionConfig config) { + return false; + } + } + + @Override + protected Frameworks.ConfigBuilder config(CalciteAssert.SchemaSpec... schemaSpecs) { + final SchemaPlus rootSchema = Frameworks.createRootSchema(true); + final SchemaPlus schema = CalciteAssert.addSchema(rootSchema, schemaSpecs); + // Add an empty table with name DEPT for test purpose + schema.add("DEPT", new TableWithArray()); + return Frameworks.newConfigBuilder() + .parserConfig(SqlParser.Config.DEFAULT) + .defaultSchema(schema) + .traitDefs((List) null) + .programs(Programs.heuristicJoinOrder(Programs.RULE_SET, true, 2)); + } + @Test public void testExpand() { - String ppl = "source=EMP | expand JOB"; + String ppl = "source=DEPT | expand EMPNOS"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(DEPTNO=[$0], EMPNOS=[$2])\n" + + " LogicalCorrelate(correlation=[$cor0], joinType=[left], requiredColumns=[{1}])\n" + + " LogicalTableScan(table=[[scott, DEPT]])\n" + + " Uncollect\n" + + " LogicalProject(EMPNOS=[$cor0.EMPNOS])\n" + + " LogicalFilter(condition=[=($cor0.EMPNOS, $1)])\n" + + " LogicalTableScan(table=[[scott, DEPT]])\n"; + verifyLogical(root, expectedLogical); + String expectedSparkSql = + "SELECT `$cor0`.`DEPTNO`, `t00`.`EMPNOS`\n" + + "FROM `scott`.`DEPT` `$cor0`,\n" + + "LATERAL UNNEST (SELECT `$cor0`.`EMPNOS`\n" + + "FROM `scott`.`DEPT`\n" + + "WHERE `$cor0`.`EMPNOS` = `EMPNOS`) `t0` (`EMPNOS`) `t00`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testExpandWithEval() { + String ppl = "source=DEPT | eval employee_no = EMPNOS | expand employee_no"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(DEPTNO=[$0], EMPNOS=[$1], employee_no=[$3])\n" + + " LogicalCorrelate(correlation=[$cor0], joinType=[left], requiredColumns=[{2}])\n" + + " LogicalProject(DEPTNO=[$0], EMPNOS=[$1], employee_no=[$1])\n" + + " LogicalTableScan(table=[[scott, DEPT]])\n" + + " Uncollect\n" + + " LogicalProject(employee_no=[$cor0.employee_no])\n" + + " LogicalFilter(condition=[=($cor0.employee_no, $2)])\n" + + " LogicalProject(DEPTNO=[$0], EMPNOS=[$1], employee_no=[$1])\n" + + " LogicalTableScan(table=[[scott, DEPT]])\n"; + verifyLogical(root, expectedLogical); + String expectedSparkSql = + "SELECT `$cor0`.`DEPTNO`, `$cor0`.`EMPNOS`, `t20`.`employee_no`\n" + + "FROM (SELECT `DEPTNO`, `EMPNOS`, `EMPNOS` `employee_no`\n" + + "FROM `scott`.`DEPT`) `$cor0`,\n" + + "LATERAL UNNEST (SELECT `$cor0`.`employee_no`\n" + + "FROM (SELECT `DEPTNO`, `EMPNOS`, `EMPNOS` `employee_no`\n" + + "FROM `scott`.`DEPT`) `t0`\n" + + "WHERE `$cor0`.`employee_no` = `employee_no`) `t2` (`employee_no`) `t20`"; + verifyPPLToSparkSQL(root, expectedSparkSql); } } From 5f36a7536d7e8484cc7b2888b67b6f6b38963c3d Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Sun, 8 Jun 2025 12:10:56 +0800 Subject: [PATCH 12/20] Update expand doc format Signed-off-by: Yuanchun Shen --- docs/user/ppl/cmd/expand.rst | 53 ++++++++++++++++++++++-------------- 1 file changed, 32 insertions(+), 21 deletions(-) diff --git a/docs/user/ppl/cmd/expand.rst b/docs/user/ppl/cmd/expand.rst index af89b847c7e..9a0b345d593 100644 --- a/docs/user/ppl/cmd/expand.rst +++ b/docs/user/ppl/cmd/expand.rst @@ -1,41 +1,52 @@ -PPL ``expand`` command -======================= +============= +expand +============= + +.. rubric:: Table of contents + +.. contents:: + :local: + :depth: 2 + Description ------------ +============ + (From 3.1.0) Using ``expand`` command to flatten an array of nested type. +The expand command produces a row for each element in the specified array or map field, where: + +* Array elements become individual rows. +* When an alias is provided, the exploded values are represented under the alias instead of the original field name. +* In case where the expanded field is an empty array, the row will be kept the leaving the expanded field as null. + + + Syntax ------- +====== -``expand [As alias]`` +expand [As alias] -- *field*: The field to be expanded (exploded). Currently only nested arrays are supported. -- *alias*: (Optional) The name to use instead of the original field name. +* field: The field to be expanded (exploded). Currently only nested arrays are supported. +* alias: (Optional) The name to use instead of the original field name. -Usage Guidelines ----------------- -The expand command produces a row for each element in the specified array or map field, where: +Example: expand address field with an alias +=========================================== + +Given a dataset ``mirgration`` with the following data: -- Array elements become individual rows. -- When an alias is provided, the exploded values are represented under the alias instead of the original field name. -- This can be used in combination with other commands, such as ``stats``, ``eval``, and ``parse`` to manipulate or extract data post-expansion. +.. code-block:: -Examples --------- + {"name":"abbas","age":24,"address":[{"city":"New york city","state":"NY","moveInDate":{"dateAndTime":"19840412T090742.000Z"}}]} + {"name":"chen","age":32,"address":[{"city":"Miami","state":"Florida","moveInDate":{"dateAndTime":"19010811T040333.000Z"}},{"city":"los angeles","state":"CA","moveInDate":{"dateAndTime":"20230503T080742.000Z"}}]} -Given a dataset ``move`` with the following data: -``` -{"name":"abbas","age":24,"address":[{"city":"New york city","state":"NY","moveInDate":{"dateAndTime":"19840412T090742.000Z"}}]} -{"name":"chen","age":32,"address":[{"city":"Miami","state":"Florida","moveInDate":{"dateAndTime":"19010811T040333.000Z"}},{"city":"los angeles","state":"CA","moveInDate":{"dateAndTime":"20230503T080742.000Z"}}]} -``` The following query expand the address field and rename it to addr: PPL query:: - PPL> source=move | expand address as addr; + PPL> source=mirgration | expand address as addr; fetched rows / total rows = 3/3 +-------+-----+-------------------------------------------------------------------------------------------+ | name | age | addr | From 37676ff26ee53b3e35302ac855e8e86717f42b1a Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Sun, 8 Jun 2025 12:26:51 +0800 Subject: [PATCH 13/20] Fix: delete test doc with refresh Signed-off-by: Yuanchun Shen --- .../opensearch/sql/calcite/remote/CalciteExpandCommandIT.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExpandCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExpandCommandIT.java index 87374967e42..5bd4e50024a 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExpandCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExpandCommandIT.java @@ -395,7 +395,8 @@ public void testExpandEmptyArray() throws Exception { verifyNumOfRows(response, 12); Request deleteRequest = - new Request("DELETE", String.format("/%s/_doc/%d", TEST_INDEX_NESTED_SIMPLE, docId)); + new Request( + "DELETE", String.format("/%s/_doc/%d?refresh=true", TEST_INDEX_NESTED_SIMPLE, docId)); client().performRequest(deleteRequest); } } From bc7899cdbdc53abbdb9f5f4266edeee1629bfe72 Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Sun, 8 Jun 2025 15:10:26 +0800 Subject: [PATCH 14/20] Improve expand documentation Signed-off-by: Yuanchun Shen --- docs/user/ppl/cmd/expand.rst | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/docs/user/ppl/cmd/expand.rst b/docs/user/ppl/cmd/expand.rst index 9a0b345d593..1c7d406efb8 100644 --- a/docs/user/ppl/cmd/expand.rst +++ b/docs/user/ppl/cmd/expand.rst @@ -13,20 +13,23 @@ Description ============ (From 3.1.0) -Using ``expand`` command to flatten an array of nested type. - -The expand command produces a row for each element in the specified array or map field, where: - -* Array elements become individual rows. -* When an alias is provided, the exploded values are represented under the alias instead of the original field name. -* In case where the expanded field is an empty array, the row will be kept the leaving the expanded field as null. +Use the ``expand`` command on a nested array field to transform a single +document into multiple documents—each containing one element from the array. +All other fields in the original document are duplicated across the resulting +documents. +The expand command generates one row per element in the specified array field: +* The specified array field is converted into individual rows. +* If an alias is provided, the expanded values appear under the alias instead + of the original field name. +* If the specified field is an empty array, the row is retained with the + expanded field set to null. Syntax ====== -expand [As alias] +expand [AS alias] * field: The field to be expanded (exploded). Currently only nested arrays are supported. * alias: (Optional) The name to use instead of the original field name. From 6b264d899a93d0e3f10eeefcc1725d5666fce6e0 Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Mon, 9 Jun 2025 10:30:43 +0800 Subject: [PATCH 15/20] Fix typos Signed-off-by: Yuanchun Shen --- docs/user/ppl/cmd/expand.rst | 6 +++--- .../sql/calcite/remote/CalciteExpandCommandIT.java | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/user/ppl/cmd/expand.rst b/docs/user/ppl/cmd/expand.rst index 1c7d406efb8..a063715cc47 100644 --- a/docs/user/ppl/cmd/expand.rst +++ b/docs/user/ppl/cmd/expand.rst @@ -29,7 +29,7 @@ The expand command generates one row per element in the specified array field: Syntax ====== -expand [AS alias] +expand [as alias] * field: The field to be expanded (exploded). Currently only nested arrays are supported. * alias: (Optional) The name to use instead of the original field name. @@ -38,7 +38,7 @@ expand [AS alias] Example: expand address field with an alias =========================================== -Given a dataset ``mirgration`` with the following data: +Given a dataset ``migration`` with the following data: .. code-block:: @@ -49,7 +49,7 @@ The following query expand the address field and rename it to addr: PPL query:: - PPL> source=mirgration | expand address as addr; + PPL> source=migration | expand address as addr; fetched rows / total rows = 3/3 +-------+-----+-------------------------------------------------------------------------------------------+ | name | age | addr | diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExpandCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExpandCommandIT.java index 5bd4e50024a..e995240dd5c 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExpandCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExpandCommandIT.java @@ -143,7 +143,7 @@ public void testExpandOnNested() throws Exception { // To consider in future releases: will expand on array (instead of nested) be supported. // In Opensearch, a string field can store either a single string or an array of strings. - // This makes it difficult to implement expand on arries. + // This makes it difficult to implement expand on arrays. @Ignore @Test public void testExpandOnArray() throws Exception { From fe11b61d48a9bb0bb1396f63f307441211dd92ac Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Mon, 9 Jun 2025 11:04:52 +0800 Subject: [PATCH 16/20] Add a version section to expand documentation Signed-off-by: Yuanchun Shen --- docs/user/ppl/cmd/expand.rst | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/docs/user/ppl/cmd/expand.rst b/docs/user/ppl/cmd/expand.rst index a063715cc47..c43fc816fe2 100644 --- a/docs/user/ppl/cmd/expand.rst +++ b/docs/user/ppl/cmd/expand.rst @@ -26,12 +26,17 @@ The expand command generates one row per element in the specified array field: * If the specified field is an empty array, the row is retained with the expanded field set to null. +Version +======= +Since 3.1.0 + Syntax ====== expand [as alias] -* field: The field to be expanded (exploded). Currently only nested arrays are supported. +* field: The field to be expanded (exploded). Currently only nested arrays are + supported. * alias: (Optional) The name to use instead of the original field name. From 98fbb12c4801ca7b42205fc80b2fbda74d631661 Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Tue, 10 Jun 2025 09:00:55 +0800 Subject: [PATCH 17/20] Make expand empty array IT more specific Signed-off-by: Yuanchun Shen --- .../remote/CalciteExpandCommandIT.java | 109 ++---------------- 1 file changed, 8 insertions(+), 101 deletions(-) diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExpandCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExpandCommandIT.java index e995240dd5c..059d439cec2 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExpandCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExpandCommandIT.java @@ -287,113 +287,20 @@ public void testExpandEmptyArray() throws Exception { client().performRequest(insertRequest); JSONObject response = - executeQuery(String.format("source=%s | expand address", TEST_INDEX_NESTED_SIMPLE)); + executeQuery( + String.format( + "source=%s | where name='ben' | expand address", TEST_INDEX_NESTED_SIMPLE)); verifySchema( response, schema("name", "string"), schema("age", "bigint"), schema("id", "bigint"), - schema("address", "struct")); - verifyDataRows( - response, - rows( - "abbas", - null, - 24, - new JSONObject() - .put("city", "New york city") - .put("state", "NY") - .put("moveInDate", new JSONObject().put("dateAndTime", "1984-04-12 09:07:42"))), - rows( - "abbas", - null, - 24, - new JSONObject() - .put("city", "bellevue") - .put("state", "WA") - .put( - "moveInDate", - new JSONArray() - .put(new JSONObject().put("dateAndTime", "2023-05-03 08:07:42")) - .put(new JSONObject().put("dateAndTime", "2001-11-11 04:07:44")))), - rows( - "abbas", - null, - 24, - new JSONObject() - .put("city", "seattle") - .put("state", "WA") - .put("moveInDate", new JSONObject().put("dateAndTime", "1966-03-19 03:04:55"))), - rows( - "abbas", - null, - 24, - new JSONObject() - .put("city", "chicago") - .put("state", "IL") - .put("moveInDate", new JSONObject().put("dateAndTime", "2011-06-01 01:01:42"))), - rows( - "chen", - null, - 32, - new JSONObject() - .put("city", "Miami") - .put("state", "Florida") - .put("moveInDate", new JSONObject().put("dateAndTime", "1901-08-11 04:03:33"))), - rows( - "chen", - null, - 32, - new JSONObject() - .put("city", "los angeles") - .put("state", "CA") - .put("moveInDate", new JSONObject().put("dateAndTime", "2023-05-03 08:07:42"))), - rows( - "peng", - null, - 26, - new JSONObject() - .put("city", "san diego") - .put("state", "CA") - .put("moveInDate", new JSONObject().put("dateAndTime", "2001-11-11 04:07:44"))), - rows( - "peng", - null, - 26, - new JSONObject() - .put("city", "austin") - .put("state", "TX") - .put("moveInDate", new JSONObject().put("dateAndTime", "1977-07-13 09:04:41"))), - rows( - "andy", - 4, - 19, - new JSONObject() - .put("city", "houston") - .put("state", "TX") - .put("moveInDate", new JSONObject().put("dateAndTime", "1933-12-12 05:05:45"))), - rows( - "david", - null, - 25, - new JSONObject() - .put("city", "raleigh") - .put("state", "NC") - .put("moveInDate", new JSONObject().put("dateAndTime", "1909-06-17 01:04:21"))), - rows( - "david", - null, - 25, - new JSONObject() - .put("city", "charlotte") - .put("state", "SC") - .put( - "moveInDate", - new JSONArray() - .put(new JSONObject().put("dateAndTime", "2001-11-11 04:07:44")))), - rows("ben", 437821, 47, null)); + // The type is inferred at runtime. When the array is empty and is the + // first element of the column, it is set to "undefined". + schema("address", "undefined")); + verifyDataRows(response, rows("ben", 437821, 47, null)); - verifyNumOfRows(response, 12); + verifyNumOfRows(response, 1); Request deleteRequest = new Request( "DELETE", String.format("/%s/_doc/%d?refresh=true", TEST_INDEX_NESTED_SIMPLE, docId)); From 5ea6d60d22ed8446c23b2f64c59641ccdedf78a2 Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Tue, 10 Jun 2025 09:26:35 +0800 Subject: [PATCH 18/20] Add a limitation section in expand doc & link a issue tracker for a workaround Signed-off-by: Yuanchun Shen --- docs/user/ppl/cmd/expand.rst | 9 +++++++++ .../opensearch/executor/OpenSearchExecutionEngine.java | 5 +++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/docs/user/ppl/cmd/expand.rst b/docs/user/ppl/cmd/expand.rst index c43fc816fe2..68023fb3d81 100644 --- a/docs/user/ppl/cmd/expand.rst +++ b/docs/user/ppl/cmd/expand.rst @@ -63,3 +63,12 @@ PPL query:: | chen | 32 | {"city":"Miami","state":"Florida","moveInDate":{"dateAndTime":"19010811T040333.000Z"}} | | chen | 32 | {"city":"los angeles","state":"CA","moveInDate":{"dateAndTime":"20230503T080742.000Z"}} | +-------+-----+-------------------------------------------------------------------------------------------+ + +Limitations +============ + +* The ``expand`` command currently only supports nested arrays. Primitive + fields storing arrays are not currently supported. E.g. a string field + storing an array of strings cannot be expanded with the current + implementation. +* The ``expand`` command is only available since 3.1.0. diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java index 2b7c5b530df..e6906e3901b 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java @@ -234,8 +234,9 @@ private void buildResultSet( for (int i = 1; i <= columnCount; ++i) { String columnName = metaData.getColumnName(i); RelDataType fieldType = fieldTypes.get(i - 1); - // The element type of struct and array is currently set to ANY. - // We set them using the runtime type as a workaround. + // TODO: Correct this after fixing issue github.com/opensearch-project/sql/issues/3751 + // The element type of struct and array is currently set to ANY. + // We set them using the runtime type as a workaround. ExprType exprType; if (fieldType.getSqlTypeName() == SqlTypeName.ANY) { if (!values.isEmpty()) { From 99ad92d07d843f854c09c6c95d728c0c2bd78867 Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Tue, 10 Jun 2025 10:41:44 +0800 Subject: [PATCH 19/20] Tweak expand command doc Signed-off-by: Yuanchun Shen --- docs/user/ppl/cmd/expand.rst | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/docs/user/ppl/cmd/expand.rst b/docs/user/ppl/cmd/expand.rst index 68023fb3d81..46cf6a37e18 100644 --- a/docs/user/ppl/cmd/expand.rst +++ b/docs/user/ppl/cmd/expand.rst @@ -68,7 +68,16 @@ Limitations ============ * The ``expand`` command currently only supports nested arrays. Primitive - fields storing arrays are not currently supported. E.g. a string field - storing an array of strings cannot be expanded with the current - implementation. -* The ``expand`` command is only available since 3.1.0. + fields storing arrays are not supported. E.g. a string field storing an array + of strings cannot be expanded with the current implementation. +* The command works only with Calcite enabled. This can be set with the + following command: + + .. code-block:: + + PUT /_cluster/settings + { + "persistent":{ + "plugins.calcite.enabled": true + } + } From 42f5aa3e9d4852446e138e1bfbdc65d869abc2d4 Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Tue, 10 Jun 2025 11:42:17 +0800 Subject: [PATCH 20/20] Test expand null field Signed-off-by: Yuanchun Shen --- .../remote/CalciteExpandCommandIT.java | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExpandCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExpandCommandIT.java index 059d439cec2..3788f8fc852 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExpandCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExpandCommandIT.java @@ -299,8 +299,37 @@ public void testExpandEmptyArray() throws Exception { // first element of the column, it is set to "undefined". schema("address", "undefined")); verifyDataRows(response, rows("ben", 437821, 47, null)); + verifyNumOfRows(response, 1); + + Request deleteRequest = + new Request( + "DELETE", String.format("/%s/_doc/%d?refresh=true", TEST_INDEX_NESTED_SIMPLE, docId)); + client().performRequest(deleteRequest); + } + + @Test + public void testExpandOnNullField() throws Exception { + final int docId = 6; + Request insertRequest = + new Request( + "PUT", String.format("/%s/_doc/%d?refresh=true", TEST_INDEX_NESTED_SIMPLE, docId)); + insertRequest.setJsonEntity( + "{\"name\":\"ben\",\"age\":47, \"id\": 437821, \"address\":null}\n"); + client().performRequest(insertRequest); + JSONObject response = + executeQuery( + String.format( + "source=%s | where name='ben' | expand address", TEST_INDEX_NESTED_SIMPLE)); + verifySchema( + response, + schema("name", "string"), + schema("age", "bigint"), + schema("id", "bigint"), + schema("address", "undefined")); + verifyDataRows(response, rows("ben", 437821, 47, null)); verifyNumOfRows(response, 1); + Request deleteRequest = new Request( "DELETE", String.format("/%s/_doc/%d?refresh=true", TEST_INDEX_NESTED_SIMPLE, docId));