From 06867531a0954fae0b62d6534e6c01b54d318ec7 Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Fri, 13 Mar 2026 15:08:01 -0700 Subject: [PATCH 01/25] initial commit for highlight cmd Signed-off-by: Jialiang Liang --- .../org/opensearch/sql/analysis/Analyzer.java | 6 +++ .../sql/ast/AbstractNodeVisitor.java | 5 +++ .../org/opensearch/sql/ast/dsl/AstDSL.java | 5 +++ .../opensearch/sql/ast/tree/Highlight.java | 41 +++++++++++++++++++ .../sql/calcite/CalcitePlanContext.java | 3 ++ .../sql/calcite/CalciteRelNodeVisitor.java | 26 ++++++++++++ .../sql/calcite/plan/HighlightPushable.java | 26 ++++++++++++ .../sql/expression/HighlightExpression.java | 2 + .../scan/AbstractCalciteIndexScan.java | 30 ++++++++++++++ .../scan/CalciteEnumerableIndexScan.java | 1 + .../storage/scan/CalciteLogicalIndexScan.java | 22 +++++++++- .../scan/OpenSearchIndexEnumerator.java | 4 ++ .../storage/scan/context/PushDownContext.java | 3 ++ ppl/src/main/antlr/OpenSearchPPLLexer.g4 | 1 + ppl/src/main/antlr/OpenSearchPPLParser.g4 | 12 ++++++ .../opensearch/sql/ppl/parser/AstBuilder.java | 20 +++++++++ .../sql/ppl/utils/PPLQueryDataAnonymizer.java | 8 ++++ .../sql/ppl/parser/AstBuilderTest.java | 23 +++++++++++ .../ppl/utils/PPLQueryDataAnonymizerTest.java | 12 ++++++ 19 files changed, 248 insertions(+), 2 deletions(-) create mode 100644 core/src/main/java/org/opensearch/sql/ast/tree/Highlight.java create mode 100644 core/src/main/java/org/opensearch/sql/calcite/plan/HighlightPushable.java diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index fc96f2f389c..6303a411d53 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -76,6 +76,7 @@ import org.opensearch.sql.ast.tree.Flatten; import org.opensearch.sql.ast.tree.GraphLookup; import org.opensearch.sql.ast.tree.Head; +import org.opensearch.sql.ast.tree.Highlight; import org.opensearch.sql.ast.tree.Join; import org.opensearch.sql.ast.tree.Kmeans; import org.opensearch.sql.ast.tree.Limit; @@ -570,6 +571,11 @@ public LogicalPlan visitGraphLookup(GraphLookup node, AnalysisContext context) { throw getOnlyForCalciteException("graphlookup"); } + @Override + public LogicalPlan visitHighlight(Highlight node, AnalysisContext context) { + throw getOnlyForCalciteException("highlight"); + } + /** Build {@link ParseExpression} to context and skip to child nodes. */ @Override public LogicalPlan visitParse(Parse node, AnalysisContext context) { diff --git a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java index 7f02bb3ef1b..e7151fb5853 100644 --- a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java @@ -64,6 +64,7 @@ import org.opensearch.sql.ast.tree.Flatten; import org.opensearch.sql.ast.tree.GraphLookup; import org.opensearch.sql.ast.tree.Head; +import org.opensearch.sql.ast.tree.Highlight; import org.opensearch.sql.ast.tree.Join; import org.opensearch.sql.ast.tree.Kmeans; import org.opensearch.sql.ast.tree.Limit; @@ -495,4 +496,8 @@ public T visitMvExpand(MvExpand node, C context) { public T visitGraphLookup(GraphLookup node, C context) { return visitChildren(node, context); } + + public T visitHighlight(Highlight node, C context) { + return visitChildren(node, context); + } } diff --git a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java index b2731ebbd40..6d6efb1e49f 100644 --- a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java +++ b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java @@ -60,6 +60,7 @@ import org.opensearch.sql.ast.tree.FillNull; import org.opensearch.sql.ast.tree.Filter; import org.opensearch.sql.ast.tree.Head; +import org.opensearch.sql.ast.tree.Highlight; import org.opensearch.sql.ast.tree.Limit; import org.opensearch.sql.ast.tree.MinSpanBin; import org.opensearch.sql.ast.tree.MvCombine; @@ -584,6 +585,10 @@ public static Trendline trendline( return new Trendline(sortField, Arrays.asList(computations)).attach(input); } + public static Highlight highlight(UnresolvedPlan input, List highlightArgs) { + return new Highlight(highlightArgs).attach(input); + } + public static AppendPipe appendPipe(UnresolvedPlan input, UnresolvedPlan subquery) { return new AppendPipe(subquery).attach(input); diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Highlight.java b/core/src/main/java/org/opensearch/sql/ast/tree/Highlight.java new file mode 100644 index 00000000000..ef445c6bae5 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Highlight.java @@ -0,0 +1,41 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ast.tree; + +import com.google.common.collect.ImmutableList; +import java.util.List; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.RequiredArgsConstructor; +import lombok.ToString; +import org.opensearch.sql.ast.AbstractNodeVisitor; +import org.opensearch.sql.ast.Node; + +@ToString +@Getter +@RequiredArgsConstructor +@EqualsAndHashCode(callSuper = false) +public class Highlight extends UnresolvedPlan { + + private UnresolvedPlan child; + private final List highlightArgs; + + @Override + public Highlight attach(UnresolvedPlan child) { + this.child = child; + return this; + } + + @Override + public List getChild() { + return this.child == null ? ImmutableList.of() : ImmutableList.of(this.child); + } + + @Override + public T accept(AbstractNodeVisitor visitor, C context) { + return visitor.visitHighlight(this, context); + } +} diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java b/core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java index 6ad935e59da..022cc49e283 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java @@ -45,6 +45,9 @@ public class CalcitePlanContext { private static final ThreadLocal legacyPreferredFlag = ThreadLocal.withInitial(() -> true); + /** Highlight field patterns set by the {@code | highlight} command. */ + @Getter @Setter private List highlightArgs; + @Getter @Setter private boolean isResolvingJoinCondition = false; @Getter @Setter private boolean isResolvingSubquery = false; @Getter @Setter private boolean inCoalesceFunction = false; diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index ed68dfbcb1b..8c189f8f7d1 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -123,6 +123,7 @@ import org.opensearch.sql.ast.tree.GraphLookup; import org.opensearch.sql.ast.tree.GraphLookup.Direction; import org.opensearch.sql.ast.tree.Head; +import org.opensearch.sql.ast.tree.Highlight; import org.opensearch.sql.ast.tree.Join; import org.opensearch.sql.ast.tree.Kmeans; import org.opensearch.sql.ast.tree.Lookup; @@ -156,6 +157,7 @@ import org.opensearch.sql.ast.tree.Values; import org.opensearch.sql.ast.tree.Window; import org.opensearch.sql.calcite.plan.AliasFieldsWrappable; +import org.opensearch.sql.calcite.plan.HighlightPushable; import org.opensearch.sql.calcite.plan.OpenSearchConstants; import org.opensearch.sql.calcite.plan.rel.LogicalGraphLookup; import org.opensearch.sql.calcite.plan.rel.LogicalSystemLimit; @@ -171,6 +173,7 @@ import org.opensearch.sql.datasource.DataSourceService; import org.opensearch.sql.exception.CalciteUnsupportedException; import org.opensearch.sql.exception.SemanticCheckException; +import org.opensearch.sql.expression.HighlightExpression; import org.opensearch.sql.expression.function.BuiltinFunctionName; import org.opensearch.sql.expression.function.PPLFuncImpTable; import org.opensearch.sql.expression.parse.RegexCommonUtils; @@ -225,6 +228,16 @@ public RelNode visitRelation(Relation node, CalcitePlanContext context) { } context.relBuilder.scan(node.getTableQualifiedName().getParts()); RelNode scan = context.relBuilder.peek(); + + if (context.getHighlightArgs() != null && scan instanceof HighlightPushable highlightPushable) { + RelNode newScan = highlightPushable.pushDownHighlight(context.getHighlightArgs()); + context.relBuilder.build(); // pop old scan + context.relBuilder.push(newScan); + scan = newScan; + // Clear so that join's right-side scan is not affected + context.setHighlightArgs(null); + } + if (scan instanceof AliasFieldsWrappable) { return ((AliasFieldsWrappable) scan).wrapProjectForAliasFields(context.relBuilder); } @@ -419,6 +432,12 @@ public RelNode visitProject(Project node, CalcitePlanContext context) { List expandedFields = expandProjectFields(node.getProjectList(), currentFields, context); + // Include _highlight in projections when the highlight column is present in the schema + int hlIndex = currentFields.indexOf(HighlightExpression.HIGHLIGHT_FIELD); + if (hlIndex >= 0) { + expandedFields.add(context.relBuilder.field(hlIndex)); + } + if (node.isExcluded()) { validateExclusion(expandedFields, currentFields); context.relBuilder.projectExcept(expandedFields); @@ -3180,6 +3199,13 @@ static ChartConfig fromArguments(ArgumentMap argMap) { } } + @Override + public RelNode visitHighlight(Highlight node, CalcitePlanContext context) { + context.setHighlightArgs(node.getHighlightArgs()); + visitChildren(node, context); + return context.relBuilder.peek(); + } + @Override public RelNode visitTrendline(Trendline node, CalcitePlanContext context) { visitChildren(node, context); diff --git a/core/src/main/java/org/opensearch/sql/calcite/plan/HighlightPushable.java b/core/src/main/java/org/opensearch/sql/calcite/plan/HighlightPushable.java new file mode 100644 index 00000000000..0a50f5fb609 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/calcite/plan/HighlightPushable.java @@ -0,0 +1,26 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.plan; + +import java.util.List; +import org.apache.calcite.rel.RelNode; + +/** + * Interface for scan operators that support highlight push-down. Allows the visitor in the core + * module to push highlight configuration to the scan without knowing the concrete scan type. + */ +public interface HighlightPushable { + + /** + * Returns a new scan with the {@code _highlight} column added to its schema and the highlight + * arguments stored for later use during execution. + * + * @param highlightArgs the highlight arguments — {@code ["*"]} to highlight search query matches + * in all fields, or specific terms like {@code ["login", "logout"]} to highlight + * @return a new scan RelNode with highlight support + */ + RelNode pushDownHighlight(List highlightArgs); +} diff --git a/core/src/main/java/org/opensearch/sql/expression/HighlightExpression.java b/core/src/main/java/org/opensearch/sql/expression/HighlightExpression.java index 79cc07f048b..4a373b0e05f 100644 --- a/core/src/main/java/org/opensearch/sql/expression/HighlightExpression.java +++ b/core/src/main/java/org/opensearch/sql/expression/HighlightExpression.java @@ -23,6 +23,8 @@ /** Highlight Expression. */ @Getter public class HighlightExpression extends FunctionExpression { + public static final String HIGHLIGHT_FIELD = "_highlight"; + private final Expression highlightField; private final ExprType type; diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java index 3ab40caee27..eea2f719048 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java @@ -495,4 +495,34 @@ public boolean isScriptPushed() { public boolean isProjectPushed() { return this.getPushDownContext().isProjectPushed(); } + + protected static void applyHighlightPushDown( + org.opensearch.sql.opensearch.request.OpenSearchRequestBuilder requestBuilder, + java.util.List highlightArgs) { + if (highlightArgs == null || highlightArgs.isEmpty()) { + return; + } + org.opensearch.search.fetch.subphase.highlight.HighlightBuilder hb = + new org.opensearch.search.fetch.subphase.highlight.HighlightBuilder(); + if (highlightArgs.size() == 1 && "*".equals(highlightArgs.get(0))) { + // Wildcard: highlight search query matches in all fields + hb.field("*"); + } else { + // Splunk-like: highlight specific terms across all fields + String queryStr = + highlightArgs.stream() + .map(term -> "\"" + term + "\"") + .collect(java.util.stream.Collectors.joining(" OR ")); + org.opensearch.search.fetch.subphase.highlight.HighlightBuilder.Field field = + new org.opensearch.search.fetch.subphase.highlight.HighlightBuilder.Field("*") + .highlightQuery( + org.opensearch.index.query.QueryBuilders.queryStringQuery(queryStr) + .defaultField("*")); + hb.field(field); + } + hb.fragmentSize(Integer.MAX_VALUE); + hb.preTags("@opensearch-dashboards-highlighted-field@"); + hb.postTags("@/opensearch-dashboards-highlighted-field@"); + requestBuilder.getSourceBuilder().highlighter(hb); + } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableIndexScan.java index 7ba75e46ba0..826959361ff 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableIndexScan.java @@ -119,6 +119,7 @@ public Result implement(EnumerableRelImplementor implementor, Prefer pref) { @Override public Enumerator enumerator() { OpenSearchRequestBuilder requestBuilder = pushDownContext.createRequestBuilder(); + applyHighlightPushDown(requestBuilder, pushDownContext.getHighlightArgs()); return new OpenSearchIndexEnumerator( osIndex.getClient(), getRowType().getFieldNames(), diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java index dbe8306d4b2..98ac4d0dc66 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java @@ -23,6 +23,7 @@ import org.apache.calcite.rel.RelCollation; import org.apache.calcite.rel.RelCollations; import org.apache.calcite.rel.RelFieldCollation; +import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Aggregate; import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.core.Project; @@ -35,16 +36,19 @@ import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexNode; import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.sql.type.SqlTypeName; import org.apache.commons.lang3.tuple.Pair; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.opensearch.search.aggregations.AggregationBuilder; import org.opensearch.search.aggregations.bucket.composite.CompositeAggregationBuilder; +import org.opensearch.sql.calcite.plan.HighlightPushable; import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory; import org.opensearch.sql.calcite.utils.PPLHintUtils; import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.data.type.ExprType; +import org.opensearch.sql.expression.HighlightExpression; import org.opensearch.sql.opensearch.data.type.OpenSearchDataType; import org.opensearch.sql.opensearch.data.type.OpenSearchTextType; import org.opensearch.sql.opensearch.planner.rules.OpenSearchIndexRules; @@ -67,7 +71,7 @@ /** The logical relational operator representing a scan of an OpenSearchIndex type. */ @Getter -public class CalciteLogicalIndexScan extends AbstractCalciteIndexScan { +public class CalciteLogicalIndexScan extends AbstractCalciteIndexScan implements HighlightPushable { private static final Logger LOG = LogManager.getLogger(CalciteLogicalIndexScan.class); public CalciteLogicalIndexScan( @@ -82,6 +86,18 @@ public CalciteLogicalIndexScan( new PushDownContext(osIndex)); } + @Override + public RelNode pushDownHighlight(List highlightArgs) { + RelDataTypeFactory.Builder schemaBuilder = getCluster().getTypeFactory().builder(); + schemaBuilder.addAll(getRowType().getFieldList()); + schemaBuilder.add( + HighlightExpression.HIGHLIGHT_FIELD, + getCluster().getTypeFactory().createSqlType(SqlTypeName.ANY)); + CalciteLogicalIndexScan newScan = copyWithNewSchema(schemaBuilder.build()); + newScan.getPushDownContext().setHighlightArgs(highlightArgs); + return newScan; + } + protected CalciteLogicalIndexScan( RelOptCluster cluster, RelTraitSet traitSet, @@ -279,7 +295,9 @@ public CalciteLogicalIndexScan pushDownProject(List selectedColumns) { action = (OSRequestBuilderAction) requestBuilder -> - requestBuilder.pushDownProjectStream(newSchema.getFieldNames().stream()); + requestBuilder.pushDownProjectStream( + newSchema.getFieldNames().stream() + .filter(f -> !HighlightExpression.HIGHLIGHT_FIELD.equals(f))); } newScan.pushDownContext.add( PushDownType.PROJECT, diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexEnumerator.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexEnumerator.java index 05bd00dcf2c..3d6c91acb4a 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexEnumerator.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexEnumerator.java @@ -92,6 +92,10 @@ public Object current() { } private Object resolveForCalcite(ExprValue value, String rawPath) { + if ("_highlight".equals(rawPath)) { + ExprValue hl = ExprValueUtils.getTupleValue(value).get("_highlight"); + return (hl != null && !hl.isMissing()) ? hl : null; + } return ExprValueUtils.resolveRefPaths(value, List.of(rawPath.split("\\."))).valueForCalcite(); } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java index 2d236207c10..1d0973d7c1f 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java @@ -42,6 +42,8 @@ public class PushDownContext extends AbstractCollection { private boolean isRareTopPushed = false; private boolean isScriptPushed = false; + @lombok.Setter private List highlightArgs; + public PushDownContext(OpenSearchIndex osIndex) { this.osIndex = osIndex; } @@ -50,6 +52,7 @@ public PushDownContext(OpenSearchIndex osIndex) { public PushDownContext clone() { PushDownContext newContext = new PushDownContext(osIndex); newContext.addAll(this); + newContext.highlightArgs = this.highlightArgs; return newContext; } diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index 3ada5c96b72..1631ab010b1 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -48,6 +48,7 @@ FILLNULL: 'FILLNULL'; FLATTEN: 'FLATTEN'; CONVERT: 'CONVERT'; TRENDLINE: 'TRENDLINE'; +HIGHLIGHT: 'HIGHLIGHT'; TRANSPOSE: 'TRANSPOSE'; CHART: 'CHART'; TIMECHART: 'TIMECHART'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 72d5d0fd76d..476daffdbdd 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -95,6 +95,7 @@ commands | fieldformatCommand | nomvCommand | graphLookupCommand + | highlightCommand ; commandName @@ -145,6 +146,7 @@ commandName | NOMV | TRANSPOSE | GRAPHLOOKUP + | HIGHLIGHT ; searchCommand @@ -563,6 +565,16 @@ trendlineType | WMA ; +highlightCommand + : HIGHLIGHT highlightArg (COMMA highlightArg)* + ; + +highlightArg + : STAR + | stringLiteral + | fieldExpression + ; + expandCommand : EXPAND fieldExpression (AS alias = qualifiedName)? ; diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index 9a92126d2e6..7fb90b51de4 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -89,6 +89,7 @@ import org.opensearch.sql.ast.tree.GraphLookup; import org.opensearch.sql.ast.tree.GraphLookup.Direction; import org.opensearch.sql.ast.tree.Head; +import org.opensearch.sql.ast.tree.Highlight; import org.opensearch.sql.ast.tree.Join; import org.opensearch.sql.ast.tree.Kmeans; import org.opensearch.sql.ast.tree.Lookup; @@ -1284,6 +1285,25 @@ public UnresolvedPlan visitTrendlineCommand(OpenSearchPPLParser.TrendlineCommand .orElse(new Trendline(Optional.empty(), trendlineComputations)); } + /** highlight command. */ + @Override + public UnresolvedPlan visitHighlightCommand(OpenSearchPPLParser.HighlightCommandContext ctx) { + List highlightArgs = + ctx.highlightArg().stream() + .map( + arg -> { + if (arg.STAR() != null) { + return "*"; + } else if (arg.stringLiteral() != null) { + return StringUtils.unquoteText(arg.stringLiteral().getText()); + } else { + return arg.fieldExpression().getText(); + } + }) + .collect(Collectors.toList()); + return new Highlight(highlightArgs); + } + @Override public UnresolvedPlan visitAppendcolCommand(OpenSearchPPLParser.AppendcolCommandContext ctx) { final Optional subsearch = diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java index 96c0787d5e3..377ded06e22 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java @@ -80,6 +80,7 @@ import org.opensearch.sql.ast.tree.Flatten; import org.opensearch.sql.ast.tree.GraphLookup; import org.opensearch.sql.ast.tree.Head; +import org.opensearch.sql.ast.tree.Highlight; import org.opensearch.sql.ast.tree.Join; import org.opensearch.sql.ast.tree.Lookup; import org.opensearch.sql.ast.tree.MinSpanBin; @@ -730,6 +731,13 @@ public String visitFlatten(Flatten node, String context) { return StringUtils.format("%s | flatten %s", child, field); } + @Override + public String visitHighlight(Highlight node, String context) { + String child = node.getChild().get(0).accept(this, context); + String fields = String.join(", ", node.getHighlightArgs()); + return StringUtils.format("%s | highlight %s", child, fields); + } + @Override public String visitTrendline(Trendline node, String context) { String child = node.getChild().get(0).accept(this, context); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java index a53e4a5d8dd..e076ed6b3dd 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java @@ -32,6 +32,7 @@ import static org.opensearch.sql.ast.dsl.AstDSL.filter; import static org.opensearch.sql.ast.dsl.AstDSL.function; import static org.opensearch.sql.ast.dsl.AstDSL.head; +import static org.opensearch.sql.ast.dsl.AstDSL.highlight; import static org.opensearch.sql.ast.dsl.AstDSL.intLiteral; import static org.opensearch.sql.ast.dsl.AstDSL.let; import static org.opensearch.sql.ast.dsl.AstDSL.map; @@ -1106,6 +1107,28 @@ public void testTrendlineTooFewSamples() { assertThrows(SyntaxCheckException.class, () -> plan("source=t | trendline sma(0, test_field)")); } + @Test + public void testHighlightStar() { + assertEqual("source=t | highlight *", highlight(relation("t"), List.of("*"))); + } + + @Test + public void testHighlightStringLiteral() { + assertEqual("source=t | highlight \"error\"", highlight(relation("t"), List.of("error"))); + } + + @Test + public void testHighlightMultipleStringLiterals() { + assertEqual( + "source=t | highlight \"error\", \"login\"", + highlight(relation("t"), List.of("error", "login"))); + } + + @Test + public void testHighlightFieldExpression() { + assertEqual("source=t | highlight fieldname", highlight(relation("t"), List.of("fieldname"))); + } + @Test public void testDescribeCommand() { assertEqual("describe t", describe(mappingTable("t"))); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index bb720bd4207..1987246958f 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -262,6 +262,18 @@ public void testTrendlineCommand() { anonymize("source=t | trendline sma(2, date) as date_alias sma(3, time) as time_alias")); } + @Test + public void testHighlightCommand() { + assertEquals("source=table | highlight *", anonymize("source=t | highlight *")); + } + + @Test + public void testHighlightCommandMultipleFields() { + assertEquals( + "source=table | highlight error, login", + anonymize("source=t | highlight \"error\", \"login\"")); + } + @Test public void testHeadCommandWithNumber() { assertEquals("source=table | head 3", anonymize("source=t | head 3")); From f8f7208e81927d6d9abc2e0bb017e1b517897b20 Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Fri, 13 Mar 2026 16:06:13 -0700 Subject: [PATCH 02/25] fix the response format Signed-off-by: Jialiang Liang --- .../sql/protocol/response/QueryResult.java | 45 ++++++++++++++++--- .../format/SimpleJsonResponseFormatter.java | 9 ++++ 2 files changed, 47 insertions(+), 7 deletions(-) diff --git a/protocol/src/main/java/org/opensearch/sql/protocol/response/QueryResult.java b/protocol/src/main/java/org/opensearch/sql/protocol/response/QueryResult.java index 53badf3950d..5581430170a 100644 --- a/protocol/src/main/java/org/opensearch/sql/protocol/response/QueryResult.java +++ b/protocol/src/main/java/org/opensearch/sql/protocol/response/QueryResult.java @@ -5,11 +5,15 @@ package org.opensearch.sql.protocol.response; +import static org.opensearch.sql.expression.HighlightExpression.HIGHLIGHT_FIELD; + import java.util.Collection; import java.util.Iterator; import java.util.LinkedHashMap; +import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.stream.Collectors; import lombok.Getter; import org.opensearch.sql.data.model.ExprValue; import org.opensearch.sql.data.model.ExprValueUtils; @@ -82,19 +86,46 @@ public Map columnNameTypes() { @Override public Iterator iterator() { - // Any chance to avoid copy for json response generation? return exprValues.stream() .map(ExprValueUtils::getTupleValue) - .map(Map::values) - .map(this::convertExprValuesToValues) + .map( + tuple -> + tuple.entrySet().stream() + .filter(e -> !HIGHLIGHT_FIELD.equals(e.getKey())) + .map(e -> e.getValue().value()) + .toArray(Object[]::new)) .iterator(); } - private String getColumnName(Column column) { - return (column.getAlias() != null) ? column.getAlias() : column.getName(); + /** + * Extract highlight data from each result row. Each row may contain a {@code _highlight} field + * added by {@code OpenSearchResponse.addHighlightsToBuilder()} and preserved through projection. + * Returns a list parallel to datarows where each entry is either a map of field name to highlight + * fragments, or null if no highlight data exists for that row. + */ + public List> highlights() { + return exprValues.stream() + .map(ExprValueUtils::getTupleValue) + .map( + tuple -> { + ExprValue hl = tuple.get(HIGHLIGHT_FIELD); + if (hl == null || hl.isMissing() || hl.isNull()) { + return null; + } + Map hlMap = new LinkedHashMap<>(); + for (Map.Entry entry : hl.tupleValue().entrySet()) { + hlMap.put( + entry.getKey(), + entry.getValue().collectionValue().stream() + .map(ExprValue::stringValue) + .collect(Collectors.toList())); + } + return (Map) hlMap; + }) + .collect(Collectors.toList()); } - private Object[] convertExprValuesToValues(Collection exprValues) { - return exprValues.stream().map(ExprValue::value).toArray(Object[]::new); + private String getColumnName(Column column) { + return (column.getAlias() != null) ? column.getAlias() : column.getName(); } } diff --git a/protocol/src/main/java/org/opensearch/sql/protocol/response/format/SimpleJsonResponseFormatter.java b/protocol/src/main/java/org/opensearch/sql/protocol/response/format/SimpleJsonResponseFormatter.java index ff59ce4cddc..abade18e9f9 100644 --- a/protocol/src/main/java/org/opensearch/sql/protocol/response/format/SimpleJsonResponseFormatter.java +++ b/protocol/src/main/java/org/opensearch/sql/protocol/response/format/SimpleJsonResponseFormatter.java @@ -6,6 +6,8 @@ package org.opensearch.sql.protocol.response.format; import java.util.List; +import java.util.Map; +import java.util.Objects; import lombok.Builder; import lombok.Getter; import lombok.RequiredArgsConstructor; @@ -54,6 +56,12 @@ public Object buildJsonObject(QueryResult response) { response.columnNameTypes().forEach((name, type) -> json.column(new Column(name, type))); json.datarows(fetchDataRows(response)); + + List> highlights = response.highlights(); + if (highlights.stream().anyMatch(Objects::nonNull)) { + json.highlights(highlights); + } + formatMetric.set(System.nanoTime() - formatTime); json.profile(QueryProfiling.current().finish()); @@ -79,6 +87,7 @@ public static class JsonResponse { private final List schema; private final Object[][] datarows; + private final List> highlights; private long total; private long size; From 4c649b53e8e51234168a4f8677b8cc7c00c8b50a Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Fri, 13 Mar 2026 17:39:58 -0700 Subject: [PATCH 03/25] pushdown fix (similar to v2) Signed-off-by: Jialiang Liang --- .../sql/calcite/CalcitePlanContext.java | 3 - .../sql/calcite/CalciteRelNodeVisitor.java | 17 ++--- .../calcite/plan/rel/LogicalHighlight.java | 76 +++++++++++++++++++ .../planner/rules/HighlightIndexScanRule.java | 55 ++++++++++++++ .../planner/rules/OpenSearchIndexRules.java | 5 +- .../scan/AbstractCalciteIndexScan.java | 4 +- 6 files changed, 141 insertions(+), 19 deletions(-) create mode 100644 core/src/main/java/org/opensearch/sql/calcite/plan/rel/LogicalHighlight.java create mode 100644 opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/HighlightIndexScanRule.java diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java b/core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java index 022cc49e283..6ad935e59da 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java @@ -45,9 +45,6 @@ public class CalcitePlanContext { private static final ThreadLocal legacyPreferredFlag = ThreadLocal.withInitial(() -> true); - /** Highlight field patterns set by the {@code | highlight} command. */ - @Getter @Setter private List highlightArgs; - @Getter @Setter private boolean isResolvingJoinCondition = false; @Getter @Setter private boolean isResolvingSubquery = false; @Getter @Setter private boolean inCoalesceFunction = false; diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 8c189f8f7d1..aec4bdb62f8 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -157,9 +157,9 @@ import org.opensearch.sql.ast.tree.Values; import org.opensearch.sql.ast.tree.Window; import org.opensearch.sql.calcite.plan.AliasFieldsWrappable; -import org.opensearch.sql.calcite.plan.HighlightPushable; import org.opensearch.sql.calcite.plan.OpenSearchConstants; import org.opensearch.sql.calcite.plan.rel.LogicalGraphLookup; +import org.opensearch.sql.calcite.plan.rel.LogicalHighlight; import org.opensearch.sql.calcite.plan.rel.LogicalSystemLimit; import org.opensearch.sql.calcite.plan.rel.LogicalSystemLimit.SystemLimitType; import org.opensearch.sql.calcite.utils.BinUtils; @@ -229,15 +229,6 @@ public RelNode visitRelation(Relation node, CalcitePlanContext context) { context.relBuilder.scan(node.getTableQualifiedName().getParts()); RelNode scan = context.relBuilder.peek(); - if (context.getHighlightArgs() != null && scan instanceof HighlightPushable highlightPushable) { - RelNode newScan = highlightPushable.pushDownHighlight(context.getHighlightArgs()); - context.relBuilder.build(); // pop old scan - context.relBuilder.push(newScan); - scan = newScan; - // Clear so that join's right-side scan is not affected - context.setHighlightArgs(null); - } - if (scan instanceof AliasFieldsWrappable) { return ((AliasFieldsWrappable) scan).wrapProjectForAliasFields(context.relBuilder); } @@ -3201,9 +3192,11 @@ static ChartConfig fromArguments(ArgumentMap argMap) { @Override public RelNode visitHighlight(Highlight node, CalcitePlanContext context) { - context.setHighlightArgs(node.getHighlightArgs()); visitChildren(node, context); - return context.relBuilder.peek(); + RelNode input = context.relBuilder.build(); + LogicalHighlight highlight = LogicalHighlight.create(input, node.getHighlightArgs()); + context.relBuilder.push(highlight); + return highlight; } @Override diff --git a/core/src/main/java/org/opensearch/sql/calcite/plan/rel/LogicalHighlight.java b/core/src/main/java/org/opensearch/sql/calcite/plan/rel/LogicalHighlight.java new file mode 100644 index 00000000000..7842bb3e728 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/calcite/plan/rel/LogicalHighlight.java @@ -0,0 +1,76 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.plan.rel; + +import java.util.List; +import lombok.Getter; +import org.apache.calcite.plan.Convention; +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelTraitSet; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelWriter; +import org.apache.calcite.rel.SingleRel; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.sql.type.SqlTypeName; +import org.opensearch.sql.expression.HighlightExpression; + +/** + * Logical relational node representing a PPL {@code | highlight} command. Stores the highlight + * arguments (terms to highlight or {@code *} for wildcard) and adds a {@code _highlight} column to + * the output row type. An optimizer rule ({@code HighlightIndexScanRule}) pushes this node down + * into the OpenSearch index scan. + */ +public class LogicalHighlight extends SingleRel { + + @Getter private final List highlightArgs; + private final RelDataType highlightRowType; + + protected LogicalHighlight( + RelOptCluster cluster, + RelTraitSet traitSet, + RelNode input, + List highlightArgs, + RelDataType highlightRowType) { + super(cluster, traitSet, input); + this.highlightArgs = highlightArgs; + this.highlightRowType = highlightRowType; + } + + public static LogicalHighlight create(RelNode input, List highlightArgs) { + final RelOptCluster cluster = input.getCluster(); + RelTraitSet traitSet = cluster.traitSetOf(Convention.NONE); + + // Add _highlight column to the output row type so that downstream operators + // (e.g. visitProject) can reference it before the optimizer rule fires. + RelDataTypeFactory typeFactory = cluster.getTypeFactory(); + RelDataTypeFactory.Builder schemaBuilder = typeFactory.builder(); + schemaBuilder.addAll(input.getRowType().getFieldList()); + if (!input.getRowType().getFieldNames().contains(HighlightExpression.HIGHLIGHT_FIELD)) { + schemaBuilder.add( + HighlightExpression.HIGHLIGHT_FIELD, typeFactory.createSqlType(SqlTypeName.ANY)); + } + + return new LogicalHighlight(cluster, traitSet, input, highlightArgs, schemaBuilder.build()); + } + + @Override + protected RelDataType deriveRowType() { + return highlightRowType; + } + + @Override + public RelNode copy(RelTraitSet traitSet, List inputs) { + assert traitSet.containsIfApplicable(Convention.NONE); + return new LogicalHighlight( + getCluster(), traitSet, sole(inputs), highlightArgs, highlightRowType); + } + + @Override + public RelWriter explainTerms(RelWriter pw) { + return super.explainTerms(pw).item("highlightArgs", highlightArgs); + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/HighlightIndexScanRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/HighlightIndexScanRule.java new file mode 100644 index 00000000000..4ee12799496 --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/HighlightIndexScanRule.java @@ -0,0 +1,55 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.planner.rules; + +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.rel.RelNode; +import org.immutables.value.Value; +import org.opensearch.sql.calcite.plan.rel.LogicalHighlight; +import org.opensearch.sql.calcite.plan.rule.OpenSearchRuleConfig; +import org.opensearch.sql.calcite.utils.PlanUtils; +import org.opensearch.sql.opensearch.storage.scan.CalciteLogicalIndexScan; + +/** + * Planner rule that pushes a {@link LogicalHighlight} down to {@link CalciteLogicalIndexScan}. This + * adds the {@code _highlight} column to the scan schema and configures the OpenSearch {@code + * HighlightBuilder} for the search request. + */ +@Value.Enclosing +public class HighlightIndexScanRule extends InterruptibleRelRule { + + protected HighlightIndexScanRule(Config config) { + super(config); + } + + @Override + protected void onMatchImpl(RelOptRuleCall call) { + final LogicalHighlight highlight = call.rel(0); + final CalciteLogicalIndexScan scan = call.rel(1); + RelNode newScan = scan.pushDownHighlight(highlight.getHighlightArgs()); + if (newScan != null) { + call.transformTo(newScan); + PlanUtils.tryPruneRelNodes(call); + } + } + + /** Rule configuration. */ + @Value.Immutable + public interface Config extends OpenSearchRuleConfig { + Config DEFAULT = + ImmutableHighlightIndexScanRule.Config.builder() + .build() + .withOperandSupplier( + b0 -> + b0.operand(LogicalHighlight.class) + .oneInput(b1 -> b1.operand(CalciteLogicalIndexScan.class).noInputs())); + + @Override + default HighlightIndexScanRule toRule() { + return new HighlightIndexScanRule(this); + } + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/OpenSearchIndexRules.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/OpenSearchIndexRules.java index 0068f445ce7..c7c6d9da68f 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/OpenSearchIndexRules.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/OpenSearchIndexRules.java @@ -62,6 +62,8 @@ public class OpenSearchIndexRules { SortExprIndexScanRule.Config.DEFAULT.toRule(); private static final EnumerableTopKMergeRule ENUMERABLE_TOP_K_MERGE_RULE = EnumerableTopKMergeRule.Config.DEFAULT.toRule(); + private static final HighlightIndexScanRule HIGHLIGHT_INDEX_SCAN = + HighlightIndexScanRule.Config.DEFAULT.toRule(); /** The rules will apply only when the pushdown is enabled. */ public static final List OPEN_SEARCH_PUSHDOWN_RULES = @@ -80,7 +82,8 @@ public class OpenSearchIndexRules { RARE_TOP_PUSH_DOWN, ENUMERABLE_TOP_K_MERGE_RULE, EXPAND_COLLATION_ON_PROJECT_EXPR, - SORT_EXPR_INDEX_SCAN); + SORT_EXPR_INDEX_SCAN, + HIGHLIGHT_INDEX_SCAN); // prevent instantiation private OpenSearchIndexRules() {} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java index eea2f719048..ad251db4d13 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java @@ -508,7 +508,7 @@ protected static void applyHighlightPushDown( // Wildcard: highlight search query matches in all fields hb.field("*"); } else { - // Splunk-like: highlight specific terms across all fields + // Highlight specific terms across all fields String queryStr = highlightArgs.stream() .map(term -> "\"" + term + "\"") @@ -521,8 +521,6 @@ protected static void applyHighlightPushDown( hb.field(field); } hb.fragmentSize(Integer.MAX_VALUE); - hb.preTags("@opensearch-dashboards-highlighted-field@"); - hb.postTags("@/opensearch-dashboards-highlighted-field@"); requestBuilder.getSourceBuilder().highlighter(hb); } } From a596cba33afdacbaeaaf494854ba4647e53dcbcc Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Fri, 13 Mar 2026 17:49:13 -0700 Subject: [PATCH 04/25] fix - filter out highlight col in result schema Signed-off-by: Jialiang Liang --- .../org/opensearch/sql/protocol/response/QueryResult.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/protocol/src/main/java/org/opensearch/sql/protocol/response/QueryResult.java b/protocol/src/main/java/org/opensearch/sql/protocol/response/QueryResult.java index 5581430170a..9c0593bd887 100644 --- a/protocol/src/main/java/org/opensearch/sql/protocol/response/QueryResult.java +++ b/protocol/src/main/java/org/opensearch/sql/protocol/response/QueryResult.java @@ -74,8 +74,8 @@ public int size() { */ public Map columnNameTypes() { Map colNameTypes = new LinkedHashMap<>(); - schema - .getColumns() + schema.getColumns().stream() + .filter(column -> !HIGHLIGHT_FIELD.equals(getColumnName(column))) .forEach( column -> colNameTypes.put( From 5c64e044ae1f408ff495d551fa1ad1f8b1c0e91c Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Fri, 13 Mar 2026 18:13:23 -0700 Subject: [PATCH 05/25] add docs for highlight Signed-off-by: Jialiang Liang --- docs/category.json | 1 + docs/user/ppl/cmd/highlight.md | 144 +++++++++++++++++++++++++++++++++ 2 files changed, 145 insertions(+) create mode 100644 docs/user/ppl/cmd/highlight.md diff --git a/docs/category.json b/docs/category.json index 5e9b6f954a5..16f7b7b12f8 100644 --- a/docs/category.json +++ b/docs/category.json @@ -22,6 +22,7 @@ "user/ppl/cmd/fillnull.md", "user/ppl/cmd/grok.md", "user/ppl/cmd/head.md", + "user/ppl/cmd/highlight.md", "user/ppl/cmd/join.md", "user/ppl/cmd/lookup.md", "user/ppl/cmd/mvcombine.md", diff --git a/docs/user/ppl/cmd/highlight.md b/docs/user/ppl/cmd/highlight.md new file mode 100644 index 00000000000..1f11cf038ba --- /dev/null +++ b/docs/user/ppl/cmd/highlight.md @@ -0,0 +1,144 @@ + +# highlight + +The `highlight` command adds search result highlighting metadata to the query response. When used, matching terms are wrapped in highlight tags (for example, `Holmes`) and returned in a separate `highlights` array alongside the data rows. + +> **Note**: The `highlight` command is supported by the Calcite engine only. Using it with the V2 engine returns an error. + +## Syntax + +The `highlight` command has the following syntax: + +```syntax +highlight [, ]* +``` + +## Parameters + +The `highlight` command supports the following parameters. + +| Parameter | Required/Optional | Description | +| --- | --- | --- | +| `*` | Required (one of `*` or `""`) | Highlights all matches from the search query across all fields. | +| `""` | Required (one of `*` or `""`) | Highlights a specific term across all fields, regardless of the search query. Multiple terms can be specified, separated by commas. | + +## Example 1: Highlight search query matches + +The following query searches for `"Holmes"` and highlights all matches using the wildcard `*`: + +```ppl +source=accounts "Holmes" | highlight * | fields account_number, firstname, address +``` + +The query returns the following results: + +```text +fetched rows / total rows = 1/1 ++----------------+-----------+-----------------+ +| account_number | firstname | address | +|----------------+-----------+-----------------| +| 1 | Amber | 880 Holmes Lane | ++----------------+-----------+-----------------+ +``` + +The tabular output shows the data rows only. The highlight metadata is returned in the JSON response as a parallel `highlights` array: + +```json ignore +{ + "highlights": [ + { + "address": ["880 Holmes Lane"] + } + ] +} +``` + +## Example 2: Highlight a specific term + +The following query highlights the term `"Duke"` across all fields, independent of any search query: + +```ppl +source=accounts | highlight "Duke" | fields account_number, firstname, lastname | sort account_number +``` + +The query returns the following results: + +```text +fetched rows / total rows = 4/4 ++----------------+-----------+----------+ +| account_number | firstname | lastname | +|----------------+-----------+----------| +| 1 | Amber | Duke | +| 6 | Hattie | Bond | +| 13 | Nanette | Bates | +| 18 | Dale | Adams | ++----------------+-----------+----------+ +``` + +All rows are returned because `highlight` does not filter results. Only the row containing `"Duke"` will have highlight metadata in the response. + +## Example 3: Highlight multiple terms with a filter + +The following query highlights multiple terms and filters results using `where`: + +```ppl +source=accounts | highlight "Bond", "Duke" | where age > 30 | fields account_number, firstname, lastname | sort account_number +``` + +The query returns the following results: + +```text +fetched rows / total rows = 3/3 ++----------------+-----------+----------+ +| account_number | firstname | lastname | +|----------------+-----------+----------| +| 1 | Amber | Duke | +| 6 | Hattie | Bond | +| 18 | Dale | Adams | ++----------------+-----------+----------+ +``` + +## Response format + +The `highlight` command adds a `highlights` array to the JSON response, parallel to `datarows`. Each entry maps field names to a list of highlighted fragments. Rows with no matches have a `null` entry. The `highlights` field is omitted entirely when no rows have highlight data. + +```json ignore +{ + "schema": [ + { "name": "firstname", "type": "string" }, + { "name": "lastname", "type": "string" } + ], + "datarows": [ + ["Amber", "Duke"], + ["Hattie", "Bond"] + ], + "highlights": [ + { + "lastname": ["Duke"], + "lastname.keyword": ["Duke"] + }, + null + ], + "total": 2, + "size": 2 +} +``` + +## Comparison with Splunk + +Both Splunk and OpenSearch PPL use `| highlight` as a pipe command. However, they differ in implementation: + +| | Splunk | OpenSearch PPL | +|---|---|---| +| Arguments | Literal string values (`ERROR`, `WARNING`) | Terms (`"login"`) or wildcard (`*`) | +| What gets highlighted | The exact strings you pass | `*`: search query matches; terms: those specific terms | +| Output | UI colorization on Events tab only | `highlights` array in JSON response | +| Compatible commands | Breaks with any transforming command (`sort`, `table`, `fields`) | Works with all non-aggregating commands (`sort`, `where`, `fields`, `dedup`, `eval`) | + +## Limitations + +The `highlight` command has the following limitations: + +* Aggregation commands (`stats`, `eventstats`, `streamstats`, `top`, `rare`, `chart`) eliminate per-document results. Highlight data is lost after aggregation. +* For joins, only the left-side (first) table gets highlighting. +* Highlight tags are not customizable via the `| highlight` command. OpenSearch defaults (``, ``) are used. From 49f55988c75584522b9f815a2bbd09b205db58ad Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Sat, 14 Mar 2026 10:28:20 -0700 Subject: [PATCH 06/25] dead code cleanup Signed-off-by: Jialiang Liang --- .../sql/calcite/plan/HighlightPushable.java | 26 ------------------- .../storage/scan/CalciteLogicalIndexScan.java | 4 +-- ppl/src/main/antlr/OpenSearchPPLParser.g4 | 1 - .../opensearch/sql/ppl/parser/AstBuilder.java | 4 +-- .../sql/ppl/parser/AstBuilderTest.java | 4 +-- 5 files changed, 4 insertions(+), 35 deletions(-) delete mode 100644 core/src/main/java/org/opensearch/sql/calcite/plan/HighlightPushable.java diff --git a/core/src/main/java/org/opensearch/sql/calcite/plan/HighlightPushable.java b/core/src/main/java/org/opensearch/sql/calcite/plan/HighlightPushable.java deleted file mode 100644 index 0a50f5fb609..00000000000 --- a/core/src/main/java/org/opensearch/sql/calcite/plan/HighlightPushable.java +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.calcite.plan; - -import java.util.List; -import org.apache.calcite.rel.RelNode; - -/** - * Interface for scan operators that support highlight push-down. Allows the visitor in the core - * module to push highlight configuration to the scan without knowing the concrete scan type. - */ -public interface HighlightPushable { - - /** - * Returns a new scan with the {@code _highlight} column added to its schema and the highlight - * arguments stored for later use during execution. - * - * @param highlightArgs the highlight arguments — {@code ["*"]} to highlight search query matches - * in all fields, or specific terms like {@code ["login", "logout"]} to highlight - * @return a new scan RelNode with highlight support - */ - RelNode pushDownHighlight(List highlightArgs); -} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java index 98ac4d0dc66..d7d5c8a13f5 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java @@ -42,7 +42,6 @@ import org.apache.logging.log4j.Logger; import org.opensearch.search.aggregations.AggregationBuilder; import org.opensearch.search.aggregations.bucket.composite.CompositeAggregationBuilder; -import org.opensearch.sql.calcite.plan.HighlightPushable; import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory; import org.opensearch.sql.calcite.utils.PPLHintUtils; import org.opensearch.sql.common.setting.Settings; @@ -71,7 +70,7 @@ /** The logical relational operator representing a scan of an OpenSearchIndex type. */ @Getter -public class CalciteLogicalIndexScan extends AbstractCalciteIndexScan implements HighlightPushable { +public class CalciteLogicalIndexScan extends AbstractCalciteIndexScan { private static final Logger LOG = LogManager.getLogger(CalciteLogicalIndexScan.class); public CalciteLogicalIndexScan( @@ -86,7 +85,6 @@ public CalciteLogicalIndexScan( new PushDownContext(osIndex)); } - @Override public RelNode pushDownHighlight(List highlightArgs) { RelDataTypeFactory.Builder schemaBuilder = getCluster().getTypeFactory().builder(); schemaBuilder.addAll(getRowType().getFieldList()); diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 476daffdbdd..5946fce5bf5 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -572,7 +572,6 @@ highlightCommand highlightArg : STAR | stringLiteral - | fieldExpression ; expandCommand diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index 7fb90b51de4..7d3a28f5be7 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -1294,10 +1294,8 @@ public UnresolvedPlan visitHighlightCommand(OpenSearchPPLParser.HighlightCommand arg -> { if (arg.STAR() != null) { return "*"; - } else if (arg.stringLiteral() != null) { - return StringUtils.unquoteText(arg.stringLiteral().getText()); } else { - return arg.fieldExpression().getText(); + return StringUtils.unquoteText(arg.stringLiteral().getText()); } }) .collect(Collectors.toList()); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java index e076ed6b3dd..f67fddc42a0 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java @@ -1125,8 +1125,8 @@ public void testHighlightMultipleStringLiterals() { } @Test - public void testHighlightFieldExpression() { - assertEqual("source=t | highlight fieldname", highlight(relation("t"), List.of("fieldname"))); + public void testHighlightFieldExpressionNotSupported() { + assertThrows(SyntaxCheckException.class, () -> plan("source=t | highlight fieldname")); } @Test From fec921d07d7fa8db302cbf0e7f9c06059855d195 Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Sun, 15 Mar 2026 12:38:24 -0700 Subject: [PATCH 07/25] enable no pushdown Signed-off-by: Jialiang Liang --- .../planner/rules/OpenSearchIndexRules.java | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/OpenSearchIndexRules.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/OpenSearchIndexRules.java index c7c6d9da68f..7c09c22bdf1 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/OpenSearchIndexRules.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/OpenSearchIndexRules.java @@ -21,6 +21,10 @@ public class OpenSearchIndexRules { // Rule that always pushes down relevance functions regardless of pushdown settings private static final RelevanceFunctionPushdownRule RELEVANCE_FUNCTION_RULE = RelevanceFunctionPushdownRule.Config.DEFAULT.toRule(); + // Rule that always pushes down highlight regardless of pushdown settings, + // consistent with V2 engine where PUSH_DOWN_HIGHLIGHT is always in the default rules + private static final HighlightIndexScanRule HIGHLIGHT_INDEX_SCAN = + HighlightIndexScanRule.Config.DEFAULT.toRule(); /** The rules will apply whatever the pushdown setting is. */ public static final List OPEN_SEARCH_NON_PUSHDOWN_RULES = @@ -29,7 +33,8 @@ public class OpenSearchIndexRules { SYSTEM_INDEX_SCAN_RULE, NESTED_AGGREGATE_RULE, GRAPH_LOOKUP_RULE, - RELEVANCE_FUNCTION_RULE); + RELEVANCE_FUNCTION_RULE, + HIGHLIGHT_INDEX_SCAN); private static final ProjectIndexScanRule PROJECT_INDEX_SCAN = ProjectIndexScanRule.Config.DEFAULT.toRule(); @@ -62,8 +67,6 @@ public class OpenSearchIndexRules { SortExprIndexScanRule.Config.DEFAULT.toRule(); private static final EnumerableTopKMergeRule ENUMERABLE_TOP_K_MERGE_RULE = EnumerableTopKMergeRule.Config.DEFAULT.toRule(); - private static final HighlightIndexScanRule HIGHLIGHT_INDEX_SCAN = - HighlightIndexScanRule.Config.DEFAULT.toRule(); /** The rules will apply only when the pushdown is enabled. */ public static final List OPEN_SEARCH_PUSHDOWN_RULES = @@ -82,8 +85,7 @@ public class OpenSearchIndexRules { RARE_TOP_PUSH_DOWN, ENUMERABLE_TOP_K_MERGE_RULE, EXPAND_COLLATION_ON_PROJECT_EXPR, - SORT_EXPR_INDEX_SCAN, - HIGHLIGHT_INDEX_SCAN); + SORT_EXPR_INDEX_SCAN); // prevent instantiation private OpenSearchIndexRules() {} From a5a2788b2ae93a93ed8c45c5a2ef30fb9be5a859 Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Sun, 15 Mar 2026 13:49:57 -0700 Subject: [PATCH 08/25] fix pushdown and add IT and explain IT Signed-off-by: Jialiang Liang --- .../sql/calcite/remote/CalciteExplainIT.java | 25 +++++ .../explain_highlight_single_term.yaml | 8 ++ .../calcite/explain_highlight_wildcard.yaml | 8 ++ .../explain_highlight_with_filter.yaml | 9 ++ .../explain_highlight_single_term.yaml | 10 ++ .../explain_highlight_wildcard.yaml | 10 ++ .../explain_highlight_with_filter.yaml | 11 +++ .../scan/AbstractCalciteIndexScan.java | 6 +- .../scan/CalciteEnumerableIndexScan.java | 1 - .../storage/scan/CalciteLogicalIndexScan.java | 8 +- .../storage/scan/context/PushDownContext.java | 3 - .../storage/scan/context/PushDownType.java | 4 +- .../ppl/calcite/CalcitePPLHighlightTest.java | 91 +++++++++++++++++++ 13 files changed, 184 insertions(+), 10 deletions(-) create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_single_term.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_wildcard.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_with_filter.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_single_term.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_wildcard.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_with_filter.yaml create mode 100644 ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLHighlightTest.java diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index 73531a8895c..096e510c664 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -2795,4 +2795,29 @@ public void testNoMvWithEval() throws IOException { "Expected explain to contain both CONCAT and ARRAY_JOIN", result.toLowerCase().contains("concat") && result.toLowerCase().contains("array_join")); } + + @Test + public void testHighlightWildcardExplain() throws IOException { + String query = "source=" + TEST_INDEX_ACCOUNT + " | highlight *"; + var result = explainQueryYaml(query); + String expected = loadExpectedPlan("explain_highlight_wildcard.yaml"); + assertYamlEqualsIgnoreId(expected, result); + } + + @Test + public void testHighlightSingleTermExplain() throws IOException { + String query = "source=" + TEST_INDEX_ACCOUNT + " | highlight \\\"Holmes\\\""; + var result = explainQueryYaml(query); + String expected = loadExpectedPlan("explain_highlight_single_term.yaml"); + assertYamlEqualsIgnoreId(expected, result); + } + + @Test + public void testHighlightWithFilterExplain() throws IOException { + String query = + "source=" + TEST_INDEX_ACCOUNT + " | highlight * | where age > 30 | fields firstname, age"; + var result = explainQueryYaml(query); + String expected = loadExpectedPlan("explain_highlight_with_filter.yaml"); + assertYamlEqualsIgnoreId(expected, result); + } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_single_term.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_single_term.yaml new file mode 100644 index 00000000000..2879c48929d --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_single_term.yaml @@ -0,0 +1,8 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _highlight=[$17]) + LogicalHighlight(highlightArgs=[[Holmes]]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[HIGHLIGHT->[Holmes], PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname, _highlight], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]},"highlight":{"fragment_size":2147483647,"fields":{"*":{"highlight_query":{"query_string":{"query":"\"Holmes\"","default_field":"*","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_wildcard.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_wildcard.yaml new file mode 100644 index 00000000000..5c9501c2445 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_wildcard.yaml @@ -0,0 +1,8 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _highlight=[$17]) + LogicalHighlight(highlightArgs=[[*]]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[HIGHLIGHT->[*], PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname, _highlight], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]},"highlight":{"fragment_size":2147483647,"fields":{"*":{}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_with_filter.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_with_filter.yaml new file mode 100644 index 00000000000..e1457539fe7 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_with_filter.yaml @@ -0,0 +1,9 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(firstname=[$1], age=[$8], _highlight=[$17]) + LogicalFilter(condition=[>($8, 30)]) + LogicalHighlight(highlightArgs=[[*]]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[HIGHLIGHT->[*], FILTER->>($8, 30), PROJECT->[firstname, age, _highlight], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"range":{"age":{"from":30,"to":null,"include_lower":false,"include_upper":true,"boost":1.0}}},"_source":{"includes":["firstname","age"],"excludes":[]},"highlight":{"fragment_size":2147483647,"fields":{"*":{}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_single_term.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_single_term.yaml new file mode 100644 index 00000000000..e9403e6a92c --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_single_term.yaml @@ -0,0 +1,10 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _highlight=[$17]) + LogicalHighlight(highlightArgs=[[Holmes]]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..17=[{inputs}], proj#0..10=[{exprs}], _highlight=[$t17]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[HIGHLIGHT->[Holmes]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","highlight":{"fragment_size":2147483647,"fields":{"*":{"highlight_query":{"query_string":{"query":"\"Holmes\"","default_field":"*","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_wildcard.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_wildcard.yaml new file mode 100644 index 00000000000..21d22ccb43b --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_wildcard.yaml @@ -0,0 +1,10 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _highlight=[$17]) + LogicalHighlight(highlightArgs=[[*]]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..17=[{inputs}], proj#0..10=[{exprs}], _highlight=[$t17]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[HIGHLIGHT->[*]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","highlight":{"fragment_size":2147483647,"fields":{"*":{}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_with_filter.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_with_filter.yaml new file mode 100644 index 00000000000..c8a51814cf3 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_with_filter.yaml @@ -0,0 +1,11 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(firstname=[$1], age=[$8], _highlight=[$17]) + LogicalFilter(condition=[>($8, 30)]) + LogicalHighlight(highlightArgs=[[*]]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..17=[{inputs}], expr#18=[30], expr#19=[>($t8, $t18)], firstname=[$t1], age=[$t8], _highlight=[$t17], $condition=[$t19]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[HIGHLIGHT->[*]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","highlight":{"fragment_size":2147483647,"fields":{"*":{}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java index ad251db4d13..4d9f54ccd39 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java @@ -130,7 +130,7 @@ public double estimateRowCount(RelMetadataQuery mq) { (rowCount, operation) -> switch (operation.type()) { case AGGREGATION -> mq.getRowCount((RelNode) operation.digest()); - case PROJECT, SORT, SORT_EXPR -> rowCount; + case PROJECT, SORT, SORT_EXPR, HIGHLIGHT -> rowCount; case SORT_AGG_METRICS -> NumberUtil.min(rowCount, osIndex.getQueryBucketSize().doubleValue()); // Refer the org.apache.calcite.rel.metadata.RelMdRowCount @@ -176,8 +176,8 @@ public double estimateRowCount(RelMetadataQuery mq) { dRows = mq.getRowCount((RelNode) operation.digest()); dCpu += dRows * getAggMultiplier(operation); } - // Ignored Project in cost accumulation, but it will affect the external cost - case PROJECT -> {} + // Ignored Project and Highlight in cost accumulation, but they affect the external cost + case PROJECT, HIGHLIGHT -> {} case SORT -> dCpu += dRows; case SORT_AGG_METRICS -> { dRows = dRows * .9 / 10; // *.9 because always bucket IS_NOT_NULL diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableIndexScan.java index 826959361ff..7ba75e46ba0 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableIndexScan.java @@ -119,7 +119,6 @@ public Result implement(EnumerableRelImplementor implementor, Prefer pref) { @Override public Enumerator enumerator() { OpenSearchRequestBuilder requestBuilder = pushDownContext.createRequestBuilder(); - applyHighlightPushDown(requestBuilder, pushDownContext.getHighlightArgs()); return new OpenSearchIndexEnumerator( osIndex.getClient(), getRowType().getFieldNames(), diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java index d7d5c8a13f5..c2a068cf621 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java @@ -92,7 +92,13 @@ public RelNode pushDownHighlight(List highlightArgs) { HighlightExpression.HIGHLIGHT_FIELD, getCluster().getTypeFactory().createSqlType(SqlTypeName.ANY)); CalciteLogicalIndexScan newScan = copyWithNewSchema(schemaBuilder.build()); - newScan.getPushDownContext().setHighlightArgs(highlightArgs); + newScan + .getPushDownContext() + .add( + PushDownType.HIGHLIGHT, + highlightArgs, + (OSRequestBuilderAction) + requestBuilder -> applyHighlightPushDown(requestBuilder, highlightArgs)); return newScan; } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java index 1d0973d7c1f..2d236207c10 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java @@ -42,8 +42,6 @@ public class PushDownContext extends AbstractCollection { private boolean isRareTopPushed = false; private boolean isScriptPushed = false; - @lombok.Setter private List highlightArgs; - public PushDownContext(OpenSearchIndex osIndex) { this.osIndex = osIndex; } @@ -52,7 +50,6 @@ public PushDownContext(OpenSearchIndex osIndex) { public PushDownContext clone() { PushDownContext newContext = new PushDownContext(osIndex); newContext.addAll(this); - newContext.highlightArgs = this.highlightArgs; return newContext; } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownType.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownType.java index c763808164d..d6817b8eeab 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownType.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownType.java @@ -15,7 +15,7 @@ public enum PushDownType { SCRIPT, // script in predicate SORT_AGG_METRICS, // convert composite aggregate to terms or multi-terms bucket aggregate RARE_TOP, // convert composite aggregate to nested aggregate - SORT_EXPR - // HIGHLIGHT, + SORT_EXPR, + HIGHLIGHT // NESTED } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLHighlightTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLHighlightTest.java new file mode 100644 index 00000000000..a5dd76669de --- /dev/null +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLHighlightTest.java @@ -0,0 +1,91 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ppl.calcite; + +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.test.CalciteAssert; +import org.junit.Test; + +public class CalcitePPLHighlightTest extends CalcitePPLAbstractTest { + + public CalcitePPLHighlightTest() { + super(CalciteAssert.SchemaSpec.SCOTT_WITH_TEMPORAL); + } + + @Test + public void testHighlightWildcard() { + String ppl = "source=EMP | highlight *"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalHighlight(highlightArgs=[[*]])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + } + + @Test + public void testHighlightSingleTerm() { + String ppl = "source=EMP | highlight \"error\""; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalHighlight(highlightArgs=[[error]])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + } + + @Test + public void testHighlightMultipleTerms() { + String ppl = "source=EMP | highlight \"error\", \"login\""; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalHighlight(highlightArgs=[[error, login]])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + } + + @Test + public void testHighlightWithFilter() { + String ppl = "source=EMP | highlight * | where SAL > 2000"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalFilter(condition=[>($5, 2000)])\n" + + " LogicalHighlight(highlightArgs=[[*]])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + } + + @Test + public void testHighlightWithFields() { + String ppl = "source=EMP | highlight \"error\" | fields ENAME, JOB"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(ENAME=[$1], JOB=[$2], _highlight=[$8])\n" + + " LogicalHighlight(highlightArgs=[[error]])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + } + + @Test + public void testHighlightWithSort() { + String ppl = "source=EMP | highlight * | sort SAL"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalSort(sort0=[$5], dir0=[ASC-nulls-first])\n" + + " LogicalHighlight(highlightArgs=[[*]])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + } + + @Test + public void testHighlightWithFilterAndFields() { + String ppl = "source=EMP | highlight \"error\" | where SAL > 2000 | fields ENAME, SAL"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(ENAME=[$1], SAL=[$5], _highlight=[$8])\n" + + " LogicalFilter(condition=[>($5, 2000)])\n" + + " LogicalHighlight(highlightArgs=[[error]])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + } +} From f46b2418a7e1e0eab6624284a0c0bb354cd9a83a Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Sun, 15 Mar 2026 14:13:59 -0700 Subject: [PATCH 09/25] add more tests for coverage Signed-off-by: Jialiang Liang --- .../scan/CalciteIndexScanCostTest.java | 16 +++ .../protocol/response/QueryResultTest.java | 98 +++++++++++++++++++ .../SimpleJsonResponseFormatterTest.java | 44 +++++++++ 3 files changed, 158 insertions(+) diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/CalciteIndexScanCostTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/CalciteIndexScanCostTest.java index 021a64aad7d..a91c99e26cd 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/CalciteIndexScanCostTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/CalciteIndexScanCostTest.java @@ -382,6 +382,22 @@ public long getScriptCount() { 2913.7500643730164, Objects.requireNonNull(scan.computeSelfCost(planner, mq)).getRows()); } + @Test + void test_cost_on_highlight_pushdown() { + RelDataType relDataType = mock(RelDataType.class); + lenient().when(relDataType.getFieldList()).thenReturn(new MockFieldList(10)); + lenient().when(table.getRowType()).thenReturn(relDataType); + CalciteLogicalIndexScan scan = new CalciteLogicalIndexScan(cluster, table, osIndex); + + List highlightArgs = List.of("*"); + scan.getPushDownContext() + .add( + new PushDownOperation( + PushDownType.HIGHLIGHT, highlightArgs, (OSRequestBuilderAction) req -> {})); + // Highlight should not change cost compared to non-pushdown (same as PROJECT behavior) + assertEquals(90000, Objects.requireNonNull(scan.computeSelfCost(planner, mq)).getRows()); + } + @Test void test_cost_on_project_limit_pushdown() { RelDataType relDataType = mock(RelDataType.class); diff --git a/protocol/src/test/java/org/opensearch/sql/protocol/response/QueryResultTest.java b/protocol/src/test/java/org/opensearch/sql/protocol/response/QueryResultTest.java index fc3402e20a5..69faf1e16cb 100644 --- a/protocol/src/test/java/org/opensearch/sql/protocol/response/QueryResultTest.java +++ b/protocol/src/test/java/org/opensearch/sql/protocol/response/QueryResultTest.java @@ -7,8 +7,11 @@ import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.fail; +import static org.opensearch.sql.data.model.ExprValueUtils.collectionValue; import static org.opensearch.sql.data.model.ExprValueUtils.tupleValue; +import static org.opensearch.sql.data.type.ExprCoreType.ARRAY; import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; import static org.opensearch.sql.data.type.ExprCoreType.STRING; @@ -16,7 +19,12 @@ import com.google.common.collect.ImmutableMap; import java.util.Arrays; import java.util.Collections; +import java.util.List; +import java.util.Map; import org.junit.jupiter.api.Test; +import org.opensearch.sql.data.model.ExprTupleValue; +import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.data.model.ExprValueUtils; import org.opensearch.sql.executor.ExecutionEngine; import org.opensearch.sql.executor.pagination.Cursor; @@ -106,4 +114,94 @@ void iterate() { i++; } } + + @Test + void columnNameTypesFiltersHighlightField() { + ExecutionEngine.Schema schemaWithHighlight = + new ExecutionEngine.Schema( + ImmutableList.of( + new ExecutionEngine.Schema.Column("name", null, STRING), + new ExecutionEngine.Schema.Column("age", null, INTEGER), + new ExecutionEngine.Schema.Column("_highlight", null, ARRAY))); + QueryResult response = + new QueryResult( + schemaWithHighlight, + Collections.singletonList(tupleValue(ImmutableMap.of("name", "John", "age", 20))), + Cursor.None); + + assertEquals(ImmutableMap.of("name", "string", "age", "integer"), response.columnNameTypes()); + } + + @Test + void iterateFiltersHighlightField() { + java.util.LinkedHashMap map = new java.util.LinkedHashMap<>(); + map.put("name", ExprValueUtils.stringValue("John")); + map.put("age", ExprValueUtils.integerValue(20)); + map.put( + "_highlight", + ExprTupleValue.fromExprValueMap( + Map.of("name", collectionValue(List.of("highlighted John"))))); + ExprValue row = ExprTupleValue.fromExprValueMap(map); + QueryResult response = new QueryResult(schema, Collections.singletonList(row), Cursor.None); + + for (Object[] objects : response) { + assertArrayEquals(new Object[] {"John", 20}, objects); + } + } + + @Test + void highlightsExtractsHighlightData() { + ExprValue row = + ExprTupleValue.fromExprValueMap( + new java.util.LinkedHashMap<>( + Map.of( + "name", ExprValueUtils.stringValue("John"), + "_highlight", + ExprTupleValue.fromExprValueMap( + Map.of("name", collectionValue(List.of("John"))))))); + QueryResult response = new QueryResult(schema, Collections.singletonList(row), Cursor.None); + + List> highlights = response.highlights(); + assertEquals(1, highlights.size()); + assertEquals(Map.of("name", List.of("John")), highlights.get(0)); + } + + @Test + void highlightsReturnsNullWhenNoHighlightData() { + QueryResult response = + new QueryResult( + schema, + Collections.singletonList(tupleValue(ImmutableMap.of("name", "John", "age", 20))), + Cursor.None); + + List> highlights = response.highlights(); + assertEquals(1, highlights.size()); + assertNull(highlights.get(0)); + } + + @Test + void highlightsReturnsNullWhenHighlightIsMissing() { + java.util.LinkedHashMap map = new java.util.LinkedHashMap<>(); + map.put("name", ExprValueUtils.stringValue("John")); + map.put("_highlight", ExprValueUtils.LITERAL_MISSING); + ExprValue row = ExprTupleValue.fromExprValueMap(map); + QueryResult response = new QueryResult(schema, Collections.singletonList(row), Cursor.None); + + List> highlights = response.highlights(); + assertEquals(1, highlights.size()); + assertNull(highlights.get(0)); + } + + @Test + void highlightsReturnsNullWhenHighlightIsNull() { + java.util.LinkedHashMap map = new java.util.LinkedHashMap<>(); + map.put("name", ExprValueUtils.stringValue("John")); + map.put("_highlight", ExprValueUtils.LITERAL_NULL); + ExprValue row = ExprTupleValue.fromExprValueMap(map); + QueryResult response = new QueryResult(schema, Collections.singletonList(row), Cursor.None); + + List> highlights = response.highlights(); + assertEquals(1, highlights.size()); + assertNull(highlights.get(0)); + } } diff --git a/protocol/src/test/java/org/opensearch/sql/protocol/response/format/SimpleJsonResponseFormatterTest.java b/protocol/src/test/java/org/opensearch/sql/protocol/response/format/SimpleJsonResponseFormatterTest.java index e5eb0f1ac77..5377c1c4d46 100644 --- a/protocol/src/test/java/org/opensearch/sql/protocol/response/format/SimpleJsonResponseFormatterTest.java +++ b/protocol/src/test/java/org/opensearch/sql/protocol/response/format/SimpleJsonResponseFormatterTest.java @@ -7,6 +7,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.opensearch.sql.data.model.ExprValueUtils.LITERAL_MISSING; +import static org.opensearch.sql.data.model.ExprValueUtils.collectionValue; import static org.opensearch.sql.data.model.ExprValueUtils.stringValue; import static org.opensearch.sql.data.model.ExprValueUtils.tupleValue; import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; @@ -17,8 +18,13 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; import org.junit.jupiter.api.Test; import org.opensearch.sql.data.model.ExprTupleValue; +import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.data.model.ExprValueUtils; import org.opensearch.sql.executor.ExecutionEngine; import org.opensearch.sql.protocol.response.QueryResult; @@ -160,6 +166,44 @@ void formatResponseWithArrayValue() { formatter.format(response)); } + @Test + void formatResponseWithHighlights() { + java.util.LinkedHashMap map = new java.util.LinkedHashMap<>(); + map.put("firstname", ExprValueUtils.stringValue("John")); + map.put("age", ExprValueUtils.integerValue(20)); + map.put( + "_highlight", + ExprTupleValue.fromExprValueMap( + Map.of("firstname", collectionValue(List.of("John"))))); + ExprValue row = ExprTupleValue.fromExprValueMap(map); + QueryResult response = new QueryResult(schema, Collections.singletonList(row)); + SimpleJsonResponseFormatter formatter = new SimpleJsonResponseFormatter(COMPACT); + String result = formatter.format(response); + assertEquals( + "{\"schema\":[{\"name\":\"firstname\",\"type\":\"string\"}," + + "{\"name\":\"age\",\"type\":\"integer\"}]," + + "\"datarows\":[[\"John\",20]]," + + "\"highlights\":[{\"firstname\":[\"John\"]}]," + + "\"total\":1,\"size\":1}", + result); + } + + @Test + void formatResponseWithoutHighlights() { + QueryResult response = + new QueryResult( + schema, + Collections.singletonList(tupleValue(ImmutableMap.of("firstname", "John", "age", 20)))); + SimpleJsonResponseFormatter formatter = new SimpleJsonResponseFormatter(COMPACT); + String result = formatter.format(response); + // highlights field should not be present when no highlight data exists + assertEquals( + "{\"schema\":[{\"name\":\"firstname\",\"type\":\"string\"}," + + "{\"name\":\"age\",\"type\":\"integer\"}]," + + "\"datarows\":[[\"John\",20]],\"total\":1,\"size\":1}", + result); + } + @Test void formatError() { SimpleJsonResponseFormatter formatter = new SimpleJsonResponseFormatter(COMPACT); From 90839b6aebbde08e8af9b802f9f5626a0192edb5 Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Sun, 15 Mar 2026 14:19:43 -0700 Subject: [PATCH 10/25] add ccr tests Signed-off-by: Jialiang Liang --- .../sql/security/CalciteCrossClusterSearchIT.java | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/integ-test/src/test/java/org/opensearch/sql/security/CalciteCrossClusterSearchIT.java b/integ-test/src/test/java/org/opensearch/sql/security/CalciteCrossClusterSearchIT.java index 49afb9d60a6..4166df4ac0f 100644 --- a/integ-test/src/test/java/org/opensearch/sql/security/CalciteCrossClusterSearchIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/security/CalciteCrossClusterSearchIT.java @@ -472,6 +472,16 @@ public void testCrossClusterMvExpandBasic() throws IOException { verifyDataRows(result, rows("happy", "java"), rows("happy", "python"), rows("happy", "sql")); } + @Test + public void testCrossClusterHighlightWildcard() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s | highlight * | fields firstname, _highlight", + TEST_INDEX_BANK_REMOTE)); + verifyColumn(result, columnName("firstname"), columnName("_highlight")); + } + @Test public void testCrossClusterMvExpandWithLimit() throws IOException { JSONObject result = From e38284d4b1ef66043a2ae45da4ac98a991214e69 Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Mon, 16 Mar 2026 09:38:27 -0700 Subject: [PATCH 11/25] fix ccr test Signed-off-by: Jialiang Liang --- .../sql/security/CalciteCrossClusterSearchIT.java | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/integ-test/src/test/java/org/opensearch/sql/security/CalciteCrossClusterSearchIT.java b/integ-test/src/test/java/org/opensearch/sql/security/CalciteCrossClusterSearchIT.java index 4166df4ac0f..c538f65c78b 100644 --- a/integ-test/src/test/java/org/opensearch/sql/security/CalciteCrossClusterSearchIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/security/CalciteCrossClusterSearchIT.java @@ -15,6 +15,7 @@ import java.io.IOException; import org.apache.commons.text.StringEscapeUtils; import org.json.JSONObject; +import org.junit.Assert; import org.junit.Test; /** Cross Cluster Search tests with Calcite enabled for enhanced fields features. */ @@ -477,9 +478,15 @@ public void testCrossClusterHighlightWildcard() throws IOException { JSONObject result = executeQuery( String.format( - "search source=%s | highlight * | fields firstname, _highlight", + "search source=%s \\\"Hattie\\\" | highlight * | fields firstname", TEST_INDEX_BANK_REMOTE)); - verifyColumn(result, columnName("firstname"), columnName("_highlight")); + verifySchema(result, schema("firstname", "string")); + verifyDataRows(result, rows("Hattie")); + var highlights = result.getJSONArray("highlights"); + var highlight = highlights.getJSONObject(0); + Assert.assertTrue( + "Highlight should contain Hattie", + highlight.getJSONArray("firstname").getString(0).contains("Hattie")); } @Test From 3cfbe9a4f10ec3cf1361f254a619f4673dde01d9 Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Mon, 16 Mar 2026 13:37:17 -0700 Subject: [PATCH 12/25] [GRAMMAR REMOVAL] Internally handle highlight instead of expose as a command Signed-off-by: Jialiang Liang --- .../org/opensearch/sql/ast/dsl/AstDSL.java | 9 +- .../opensearch/sql/ast/tree/Highlight.java | 2 +- .../sql/ast/tree/HighlightConfig.java | 22 +++ .../sql/calcite/CalcitePlanContext.java | 2 + .../sql/calcite/CalciteRelNodeVisitor.java | 19 ++- .../calcite/plan/rel/LogicalHighlight.java | 21 +-- docs/category.json | 1 - docs/user/ppl/cmd/highlight.md | 144 ------------------ docs/user/ppl/interfaces/endpoint.md | 52 +++++++ .../sql/calcite/remote/CalciteExplainIT.java | 26 +++- .../opensearch/sql/ppl/PPLIntegTestCase.java | 23 +++ .../security/CalciteCrossClusterSearchIT.java | 25 ++- .../explain_highlight_single_term.yaml | 2 +- .../calcite/explain_highlight_wildcard.yaml | 2 +- .../explain_highlight_with_filter.yaml | 2 +- .../explain_highlight_single_term.yaml | 2 +- .../explain_highlight_wildcard.yaml | 2 +- .../explain_highlight_with_filter.yaml | 2 +- .../planner/rules/HighlightIndexScanRule.java | 2 +- .../scan/AbstractCalciteIndexScan.java | 27 +++- .../storage/scan/CalciteLogicalIndexScan.java | 7 +- ppl/src/main/antlr/OpenSearchPPLLexer.g4 | 1 - ppl/src/main/antlr/OpenSearchPPLParser.g4 | 11 -- .../org/opensearch/sql/ppl/PPLService.java | 1 + .../sql/ppl/domain/PPLQueryRequest.java | 66 ++++++++ .../opensearch/sql/ppl/parser/AstBuilder.java | 18 --- .../sql/ppl/parser/AstStatementBuilder.java | 10 ++ .../sql/ppl/utils/PPLQueryDataAnonymizer.java | 2 +- .../ppl/calcite/CalcitePPLHighlightTest.java | 91 ----------- .../sql/ppl/parser/AstBuilderTest.java | 23 --- .../ppl/parser/AstStatementBuilderTest.java | 77 ++++++++++ .../ppl/utils/PPLQueryDataAnonymizerTest.java | 12 -- 32 files changed, 360 insertions(+), 346 deletions(-) create mode 100644 core/src/main/java/org/opensearch/sql/ast/tree/HighlightConfig.java delete mode 100644 docs/user/ppl/cmd/highlight.md delete mode 100644 ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLHighlightTest.java diff --git a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java index 6d6efb1e49f..42b13d3eabd 100644 --- a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java +++ b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java @@ -61,6 +61,7 @@ import org.opensearch.sql.ast.tree.Filter; import org.opensearch.sql.ast.tree.Head; import org.opensearch.sql.ast.tree.Highlight; +import org.opensearch.sql.ast.tree.HighlightConfig; import org.opensearch.sql.ast.tree.Limit; import org.opensearch.sql.ast.tree.MinSpanBin; import org.opensearch.sql.ast.tree.MvCombine; @@ -585,8 +586,12 @@ public static Trendline trendline( return new Trendline(sortField, Arrays.asList(computations)).attach(input); } - public static Highlight highlight(UnresolvedPlan input, List highlightArgs) { - return new Highlight(highlightArgs).attach(input); + public static Highlight highlight(UnresolvedPlan input, HighlightConfig highlightConfig) { + return new Highlight(highlightConfig).attach(input); + } + + public static Highlight highlight(UnresolvedPlan input, List highlightFields) { + return new Highlight(new HighlightConfig(highlightFields)).attach(input); } public static AppendPipe appendPipe(UnresolvedPlan input, UnresolvedPlan subquery) { diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Highlight.java b/core/src/main/java/org/opensearch/sql/ast/tree/Highlight.java index ef445c6bae5..df710b7a2e8 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/Highlight.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Highlight.java @@ -21,7 +21,7 @@ public class Highlight extends UnresolvedPlan { private UnresolvedPlan child; - private final List highlightArgs; + private final HighlightConfig highlightConfig; @Override public Highlight attach(UnresolvedPlan child) { diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/HighlightConfig.java b/core/src/main/java/org/opensearch/sql/ast/tree/HighlightConfig.java new file mode 100644 index 00000000000..13880b56002 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/ast/tree/HighlightConfig.java @@ -0,0 +1,22 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ast.tree; + +import java.util.List; + +/** + * Bundles highlight configuration: field names (or wildcards) and optional pre/post tags and + * fragment size. Supports both the simple array format ({@code ["*"]}) and the rich OSD object + * format with {@code pre_tags}, {@code post_tags}, {@code fields}, and {@code fragment_size}. + */ +public record HighlightConfig( + List fields, List preTags, List postTags, Integer fragmentSize) { + + /** Convenience constructor for the simple array format (fields only, no tag/size overrides). */ + public HighlightConfig(List fields) { + this(fields, null, null, null); + } +} diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java b/core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java index 6ad935e59da..b98b2ff03a5 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java @@ -22,6 +22,7 @@ import org.apache.calcite.rex.RexNode; import org.apache.calcite.tools.FrameworkConfig; import org.opensearch.sql.ast.expression.UnresolvedExpression; +import org.opensearch.sql.ast.tree.HighlightConfig; import org.opensearch.sql.calcite.utils.CalciteToolsHelper; import org.opensearch.sql.calcite.utils.CalciteToolsHelper.OpenSearchRelBuilder; import org.opensearch.sql.common.setting.Settings; @@ -45,6 +46,7 @@ public class CalcitePlanContext { private static final ThreadLocal legacyPreferredFlag = ThreadLocal.withInitial(() -> true); + @Getter @Setter private HighlightConfig highlightConfig; @Getter @Setter private boolean isResolvingJoinCondition = false; @Getter @Setter private boolean isResolvingSubquery = false; @Getter @Setter private boolean inCoalesceFunction = false; diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index aec4bdb62f8..d728c3d5e38 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -230,9 +230,18 @@ public RelNode visitRelation(Relation node, CalcitePlanContext context) { RelNode scan = context.relBuilder.peek(); if (scan instanceof AliasFieldsWrappable) { - return ((AliasFieldsWrappable) scan).wrapProjectForAliasFields(context.relBuilder); + ((AliasFieldsWrappable) scan).wrapProjectForAliasFields(context.relBuilder); } - return scan; + + // Wrap with LogicalHighlight if highlight config is set on context (from API request) + if (context.getHighlightConfig() != null) { + RelNode input = context.relBuilder.build(); + LogicalHighlight highlight = LogicalHighlight.create(input, context.getHighlightConfig()); + context.relBuilder.push(highlight); + context.setHighlightConfig(null); // Clear for join safety + } + + return context.relBuilder.peek(); } // This is a tool method to add an existed RelOptTable to builder stack, not used for now @@ -3192,11 +3201,9 @@ static ChartConfig fromArguments(ArgumentMap argMap) { @Override public RelNode visitHighlight(Highlight node, CalcitePlanContext context) { + context.setHighlightConfig(node.getHighlightConfig()); visitChildren(node, context); - RelNode input = context.relBuilder.build(); - LogicalHighlight highlight = LogicalHighlight.create(input, node.getHighlightArgs()); - context.relBuilder.push(highlight); - return highlight; + return context.relBuilder.peek(); } @Override diff --git a/core/src/main/java/org/opensearch/sql/calcite/plan/rel/LogicalHighlight.java b/core/src/main/java/org/opensearch/sql/calcite/plan/rel/LogicalHighlight.java index 7842bb3e728..6f3946729a3 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/plan/rel/LogicalHighlight.java +++ b/core/src/main/java/org/opensearch/sql/calcite/plan/rel/LogicalHighlight.java @@ -16,31 +16,32 @@ import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.sql.type.SqlTypeName; +import org.opensearch.sql.ast.tree.HighlightConfig; import org.opensearch.sql.expression.HighlightExpression; /** * Logical relational node representing a PPL {@code | highlight} command. Stores the highlight - * arguments (terms to highlight or {@code *} for wildcard) and adds a {@code _highlight} column to - * the output row type. An optimizer rule ({@code HighlightIndexScanRule}) pushes this node down - * into the OpenSearch index scan. + * configuration (fields, pre/post tags, fragment size) and adds a {@code _highlight} column to the + * output row type. An optimizer rule ({@code HighlightIndexScanRule}) pushes this node down into + * the OpenSearch index scan. */ public class LogicalHighlight extends SingleRel { - @Getter private final List highlightArgs; + @Getter private final HighlightConfig highlightConfig; private final RelDataType highlightRowType; protected LogicalHighlight( RelOptCluster cluster, RelTraitSet traitSet, RelNode input, - List highlightArgs, + HighlightConfig highlightConfig, RelDataType highlightRowType) { super(cluster, traitSet, input); - this.highlightArgs = highlightArgs; + this.highlightConfig = highlightConfig; this.highlightRowType = highlightRowType; } - public static LogicalHighlight create(RelNode input, List highlightArgs) { + public static LogicalHighlight create(RelNode input, HighlightConfig highlightConfig) { final RelOptCluster cluster = input.getCluster(); RelTraitSet traitSet = cluster.traitSetOf(Convention.NONE); @@ -54,7 +55,7 @@ public static LogicalHighlight create(RelNode input, List highlightArgs) HighlightExpression.HIGHLIGHT_FIELD, typeFactory.createSqlType(SqlTypeName.ANY)); } - return new LogicalHighlight(cluster, traitSet, input, highlightArgs, schemaBuilder.build()); + return new LogicalHighlight(cluster, traitSet, input, highlightConfig, schemaBuilder.build()); } @Override @@ -66,11 +67,11 @@ protected RelDataType deriveRowType() { public RelNode copy(RelTraitSet traitSet, List inputs) { assert traitSet.containsIfApplicable(Convention.NONE); return new LogicalHighlight( - getCluster(), traitSet, sole(inputs), highlightArgs, highlightRowType); + getCluster(), traitSet, sole(inputs), highlightConfig, highlightRowType); } @Override public RelWriter explainTerms(RelWriter pw) { - return super.explainTerms(pw).item("highlightArgs", highlightArgs); + return super.explainTerms(pw).item("highlightConfig", highlightConfig.fields()); } } diff --git a/docs/category.json b/docs/category.json index 16f7b7b12f8..5e9b6f954a5 100644 --- a/docs/category.json +++ b/docs/category.json @@ -22,7 +22,6 @@ "user/ppl/cmd/fillnull.md", "user/ppl/cmd/grok.md", "user/ppl/cmd/head.md", - "user/ppl/cmd/highlight.md", "user/ppl/cmd/join.md", "user/ppl/cmd/lookup.md", "user/ppl/cmd/mvcombine.md", diff --git a/docs/user/ppl/cmd/highlight.md b/docs/user/ppl/cmd/highlight.md deleted file mode 100644 index 1f11cf038ba..00000000000 --- a/docs/user/ppl/cmd/highlight.md +++ /dev/null @@ -1,144 +0,0 @@ - -# highlight - -The `highlight` command adds search result highlighting metadata to the query response. When used, matching terms are wrapped in highlight tags (for example, `Holmes`) and returned in a separate `highlights` array alongside the data rows. - -> **Note**: The `highlight` command is supported by the Calcite engine only. Using it with the V2 engine returns an error. - -## Syntax - -The `highlight` command has the following syntax: - -```syntax -highlight [, ]* -``` - -## Parameters - -The `highlight` command supports the following parameters. - -| Parameter | Required/Optional | Description | -| --- | --- | --- | -| `*` | Required (one of `*` or `""`) | Highlights all matches from the search query across all fields. | -| `""` | Required (one of `*` or `""`) | Highlights a specific term across all fields, regardless of the search query. Multiple terms can be specified, separated by commas. | - -## Example 1: Highlight search query matches - -The following query searches for `"Holmes"` and highlights all matches using the wildcard `*`: - -```ppl -source=accounts "Holmes" | highlight * | fields account_number, firstname, address -``` - -The query returns the following results: - -```text -fetched rows / total rows = 1/1 -+----------------+-----------+-----------------+ -| account_number | firstname | address | -|----------------+-----------+-----------------| -| 1 | Amber | 880 Holmes Lane | -+----------------+-----------+-----------------+ -``` - -The tabular output shows the data rows only. The highlight metadata is returned in the JSON response as a parallel `highlights` array: - -```json ignore -{ - "highlights": [ - { - "address": ["880 Holmes Lane"] - } - ] -} -``` - -## Example 2: Highlight a specific term - -The following query highlights the term `"Duke"` across all fields, independent of any search query: - -```ppl -source=accounts | highlight "Duke" | fields account_number, firstname, lastname | sort account_number -``` - -The query returns the following results: - -```text -fetched rows / total rows = 4/4 -+----------------+-----------+----------+ -| account_number | firstname | lastname | -|----------------+-----------+----------| -| 1 | Amber | Duke | -| 6 | Hattie | Bond | -| 13 | Nanette | Bates | -| 18 | Dale | Adams | -+----------------+-----------+----------+ -``` - -All rows are returned because `highlight` does not filter results. Only the row containing `"Duke"` will have highlight metadata in the response. - -## Example 3: Highlight multiple terms with a filter - -The following query highlights multiple terms and filters results using `where`: - -```ppl -source=accounts | highlight "Bond", "Duke" | where age > 30 | fields account_number, firstname, lastname | sort account_number -``` - -The query returns the following results: - -```text -fetched rows / total rows = 3/3 -+----------------+-----------+----------+ -| account_number | firstname | lastname | -|----------------+-----------+----------| -| 1 | Amber | Duke | -| 6 | Hattie | Bond | -| 18 | Dale | Adams | -+----------------+-----------+----------+ -``` - -## Response format - -The `highlight` command adds a `highlights` array to the JSON response, parallel to `datarows`. Each entry maps field names to a list of highlighted fragments. Rows with no matches have a `null` entry. The `highlights` field is omitted entirely when no rows have highlight data. - -```json ignore -{ - "schema": [ - { "name": "firstname", "type": "string" }, - { "name": "lastname", "type": "string" } - ], - "datarows": [ - ["Amber", "Duke"], - ["Hattie", "Bond"] - ], - "highlights": [ - { - "lastname": ["Duke"], - "lastname.keyword": ["Duke"] - }, - null - ], - "total": 2, - "size": 2 -} -``` - -## Comparison with Splunk - -Both Splunk and OpenSearch PPL use `| highlight` as a pipe command. However, they differ in implementation: - -| | Splunk | OpenSearch PPL | -|---|---|---| -| Arguments | Literal string values (`ERROR`, `WARNING`) | Terms (`"login"`) or wildcard (`*`) | -| What gets highlighted | The exact strings you pass | `*`: search query matches; terms: those specific terms | -| Output | UI colorization on Events tab only | `highlights` array in JSON response | -| Compatible commands | Breaks with any transforming command (`sort`, `table`, `fields`) | Works with all non-aggregating commands (`sort`, `where`, `fields`, `dedup`, `eval`) | - -## Limitations - -The `highlight` command has the following limitations: - -* Aggregation commands (`stats`, `eventstats`, `streamstats`, `top`, `rare`, `chart`) eliminate per-document results. Highlight data is lost after aggregation. -* For joins, only the left-side (first) table gets highlighting. -* Highlight tags are not customizable via the `| highlight` command. OpenSearch defaults (``, ``) are used. diff --git a/docs/user/ppl/interfaces/endpoint.md b/docs/user/ppl/interfaces/endpoint.md index d5958ba3250..30b5206e188 100644 --- a/docs/user/ppl/interfaces/endpoint.md +++ b/docs/user/ppl/interfaces/endpoint.md @@ -201,3 +201,55 @@ Expected output (trimmed): - Plan node names use Calcite physical operator names (for example, `EnumerableCalc` or `CalciteEnumerableIndexScan`). - Plan `time_ms` is inclusive of child operators and represents wall-clock time; overlapping work can make summed plan times exceed `summary.total_time_ms`. - Scan nodes reflect operator wall-clock time; background prefetch can make scan time smaller than total request latency. + +## Highlight + +You can add a `highlight` parameter to the PPL request body to enable search-result highlighting. When enabled, the response includes a `_highlight` field containing matching fragments with the specified tags. + +Two formats are supported: + +### Simple array format + +Pass a JSON array of field names or wildcards. Use `["*"]` to highlight all fields that match the search query. + +```bash ppl +curl -sS -H 'Content-Type: application/json' \ + -X POST localhost:9200/_plugins/_ppl \ + -d '{ + "query": "source=accounts \"Holmes\"", + "highlight": ["*"] + }' +``` + +### Object format (OpenSearch Dashboards) + +Pass a JSON object with `fields`, `pre_tags`, `post_tags`, and `fragment_size`. This is the format used by OpenSearch Dashboards. + +```bash ppl +curl -sS -H 'Content-Type: application/json' \ + -X POST localhost:9200/_plugins/_ppl \ + -d '{ + "query": "source=accounts \"Holmes\"", + "highlight": { + "pre_tags": ["@opensearch-dashboards-highlighted-field@"], + "post_tags": ["@/opensearch-dashboards-highlighted-field@"], + "fields": {"*": {}}, + "fragment_size": 2147483647 + } + }' +``` + +### Parameters + +| Parameter | Type | Required | Description | +|-----------------|-----------------|----------|--------------------------------------------------------------------------------------------------------------| +| `fields` | Object | Yes | An object whose keys are field names or wildcards (e.g. `{"*": {}}`) to highlight. | +| `pre_tags` | Array of string | No | Tags inserted before highlighted tokens. Defaults to ``. | +| `post_tags` | Array of string | No | Tags inserted after highlighted tokens. Defaults to ``. | +| `fragment_size` | Integer | No | Maximum character size of a highlight fragment. Defaults to `2147483647` (returns the full field value). | + +### Notes + +- Highlighting requires a search query in the PPL statement (e.g. `source=accounts "Holmes"`). Without a query, the `_highlight` field will be empty. +- In the simple array format, `["*"]` highlights all fields. Specific terms like `["error", "login"]` highlight those terms across all fields. +- In the object format, only the keys of the `fields` object are used; per-field options inside the value objects are currently ignored. diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index 096e510c664..293cd669ddf 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -2798,26 +2798,38 @@ public void testNoMvWithEval() throws IOException { @Test public void testHighlightWildcardExplain() throws IOException { - String query = "source=" + TEST_INDEX_ACCOUNT + " | highlight *"; - var result = explainQueryYaml(query); + String query = "source=" + TEST_INDEX_ACCOUNT; + var result = explainQueryYaml(query, "[\"*\"]"); String expected = loadExpectedPlan("explain_highlight_wildcard.yaml"); assertYamlEqualsIgnoreId(expected, result); } @Test public void testHighlightSingleTermExplain() throws IOException { - String query = "source=" + TEST_INDEX_ACCOUNT + " | highlight \\\"Holmes\\\""; - var result = explainQueryYaml(query); + String query = "source=" + TEST_INDEX_ACCOUNT; + var result = explainQueryYaml(query, "[\"Holmes\"]"); String expected = loadExpectedPlan("explain_highlight_single_term.yaml"); assertYamlEqualsIgnoreId(expected, result); } @Test public void testHighlightWithFilterExplain() throws IOException { - String query = - "source=" + TEST_INDEX_ACCOUNT + " | highlight * | where age > 30 | fields firstname, age"; - var result = explainQueryYaml(query); + String query = "source=" + TEST_INDEX_ACCOUNT + " | where age > 30 | fields firstname, age"; + var result = explainQueryYaml(query, "[\"*\"]"); String expected = loadExpectedPlan("explain_highlight_with_filter.yaml"); assertYamlEqualsIgnoreId(expected, result); } + + @Test + public void testHighlightOsdObjectFormatExplain() throws IOException { + // OSD sends highlight as a rich object with pre_tags, post_tags, fields, fragment_size + String query = "source=" + TEST_INDEX_ACCOUNT; + String highlightJson = + "{\"pre_tags\": [\"\"], \"post_tags\": [\"\"]," + + " \"fields\": {\"*\": {}}, \"fragment_size\": 2147483647}"; + var result = explainQueryYaml(query, highlightJson); + // Same explain plan as wildcard (fields: {"*": {}}) — just verify it doesn't error + String expected = loadExpectedPlan("explain_highlight_wildcard.yaml"); + assertYamlEqualsIgnoreId(expected, result); + } } diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/PPLIntegTestCase.java b/integ-test/src/test/java/org/opensearch/sql/ppl/PPLIntegTestCase.java index 6135d74b2fd..1a20b42751e 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/PPLIntegTestCase.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/PPLIntegTestCase.java @@ -73,6 +73,17 @@ protected String explainQueryYaml(String query, ExplainMode mode) throws IOExcep return explainQuery(query, Format.YAML, mode); } + protected String explainQueryYaml(String query, String highlightJson) throws IOException { + Request request = + buildRequestWithHighlight( + query, + String.format(EXPLAIN_API_ENDPOINT, Format.YAML, ExplainMode.STANDARD), + highlightJson); + Response response = client().performRequest(request); + Assert.assertEquals(200, response.getStatusLine().getStatusCode()); + return getResponseBody(response, true); + } + protected String explainQueryToString(String query, ExplainMode mode) throws IOException { return explainQuery(query, Format.JSON, mode).replace("\\r\\n", "\\n"); } @@ -150,6 +161,18 @@ protected Request buildRequest(String query, String endpoint) { return request; } + protected Request buildRequestWithHighlight(String query, String endpoint, String highlightJson) { + Request request = new Request("POST", endpoint); + request.setJsonEntity( + String.format( + Locale.ROOT, "{\n \"query\": \"%s\",\n \"highlight\": %s\n}", query, highlightJson)); + + RequestOptions.Builder restOptionsBuilder = RequestOptions.DEFAULT.toBuilder(); + restOptionsBuilder.addHeader("Content-Type", "application/json"); + request.setOptions(restOptionsBuilder); + return request; + } + protected static JSONObject updateClusterSettings(ClusterSetting setting) throws IOException { Request request = new Request("PUT", "/_cluster/settings"); String persistentSetting = diff --git a/integ-test/src/test/java/org/opensearch/sql/security/CalciteCrossClusterSearchIT.java b/integ-test/src/test/java/org/opensearch/sql/security/CalciteCrossClusterSearchIT.java index c538f65c78b..20a2a539c70 100644 --- a/integ-test/src/test/java/org/opensearch/sql/security/CalciteCrossClusterSearchIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/security/CalciteCrossClusterSearchIT.java @@ -5,6 +5,7 @@ package org.opensearch.sql.security; +import static org.opensearch.sql.legacy.TestUtils.getResponseBody; import static org.opensearch.sql.util.MatcherUtils.columnName; import static org.opensearch.sql.util.MatcherUtils.rows; import static org.opensearch.sql.util.MatcherUtils.schema; @@ -13,10 +14,14 @@ import static org.opensearch.sql.util.MatcherUtils.verifySchema; import java.io.IOException; +import java.util.Locale; import org.apache.commons.text.StringEscapeUtils; import org.json.JSONObject; import org.junit.Assert; import org.junit.Test; +import org.opensearch.client.Request; +import org.opensearch.client.RequestOptions; +import org.opensearch.client.Response; /** Cross Cluster Search tests with Calcite enabled for enhanced fields features. */ public class CalciteCrossClusterSearchIT extends CrossClusterTestBase { @@ -476,10 +481,10 @@ public void testCrossClusterMvExpandBasic() throws IOException { @Test public void testCrossClusterHighlightWildcard() throws IOException { JSONObject result = - executeQuery( + executeQueryWithHighlight( String.format( - "search source=%s \\\"Hattie\\\" | highlight * | fields firstname", - TEST_INDEX_BANK_REMOTE)); + "search source=%s \\\"Hattie\\\" | fields firstname", TEST_INDEX_BANK_REMOTE), + "[\"*\"]"); verifySchema(result, schema("firstname", "string")); verifyDataRows(result, rows("Hattie")); var highlights = result.getJSONArray("highlights"); @@ -489,6 +494,20 @@ public void testCrossClusterHighlightWildcard() throws IOException { highlight.getJSONArray("firstname").getString(0).contains("Hattie")); } + private JSONObject executeQueryWithHighlight(String query, String highlightJson) + throws IOException { + Request request = new Request("POST", "/_plugins/_ppl"); + request.setJsonEntity( + String.format( + Locale.ROOT, "{\n \"query\": \"%s\",\n \"highlight\": %s\n}", query, highlightJson)); + RequestOptions.Builder restOptionsBuilder = RequestOptions.DEFAULT.toBuilder(); + restOptionsBuilder.addHeader("Content-Type", "application/json"); + request.setOptions(restOptionsBuilder); + Response response = client().performRequest(request); + Assert.assertEquals(200, response.getStatusLine().getStatusCode()); + return new JSONObject(getResponseBody(response, true)); + } + @Test public void testCrossClusterMvExpandWithLimit() throws IOException { JSONObject result = diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_single_term.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_single_term.yaml index 2879c48929d..6ba122c4317 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_single_term.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_single_term.yaml @@ -2,7 +2,7 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _highlight=[$17]) - LogicalHighlight(highlightArgs=[[Holmes]]) + LogicalHighlight(highlightConfig=[[Holmes]]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[HIGHLIGHT->[Holmes], PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname, _highlight], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]},"highlight":{"fragment_size":2147483647,"fields":{"*":{"highlight_query":{"query_string":{"query":"\"Holmes\"","default_field":"*","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_wildcard.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_wildcard.yaml index 5c9501c2445..e8a242d1456 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_wildcard.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_wildcard.yaml @@ -2,7 +2,7 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _highlight=[$17]) - LogicalHighlight(highlightArgs=[[*]]) + LogicalHighlight(highlightConfig=[[*]]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[HIGHLIGHT->[*], PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname, _highlight], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]},"highlight":{"fragment_size":2147483647,"fields":{"*":{}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_with_filter.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_with_filter.yaml index e1457539fe7..74ba1020f57 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_with_filter.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_with_filter.yaml @@ -3,7 +3,7 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(firstname=[$1], age=[$8], _highlight=[$17]) LogicalFilter(condition=[>($8, 30)]) - LogicalHighlight(highlightArgs=[[*]]) + LogicalHighlight(highlightConfig=[[*]]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[HIGHLIGHT->[*], FILTER->>($8, 30), PROJECT->[firstname, age, _highlight], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"range":{"age":{"from":30,"to":null,"include_lower":false,"include_upper":true,"boost":1.0}}},"_source":{"includes":["firstname","age"],"excludes":[]},"highlight":{"fragment_size":2147483647,"fields":{"*":{}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_single_term.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_single_term.yaml index e9403e6a92c..18fb38bf945 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_single_term.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_single_term.yaml @@ -2,7 +2,7 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _highlight=[$17]) - LogicalHighlight(highlightArgs=[[Holmes]]) + LogicalHighlight(highlightConfig=[[Holmes]]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | EnumerableLimit(fetch=[10000]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_wildcard.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_wildcard.yaml index 21d22ccb43b..85e64651ed0 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_wildcard.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_wildcard.yaml @@ -2,7 +2,7 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _highlight=[$17]) - LogicalHighlight(highlightArgs=[[*]]) + LogicalHighlight(highlightConfig=[[*]]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | EnumerableLimit(fetch=[10000]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_with_filter.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_with_filter.yaml index c8a51814cf3..576067c384a 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_with_filter.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_with_filter.yaml @@ -3,7 +3,7 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(firstname=[$1], age=[$8], _highlight=[$17]) LogicalFilter(condition=[>($8, 30)]) - LogicalHighlight(highlightArgs=[[*]]) + LogicalHighlight(highlightConfig=[[*]]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | EnumerableLimit(fetch=[10000]) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/HighlightIndexScanRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/HighlightIndexScanRule.java index 4ee12799496..3f24f682820 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/HighlightIndexScanRule.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/HighlightIndexScanRule.java @@ -29,7 +29,7 @@ protected HighlightIndexScanRule(Config config) { protected void onMatchImpl(RelOptRuleCall call) { final LogicalHighlight highlight = call.rel(0); final CalciteLogicalIndexScan scan = call.rel(1); - RelNode newScan = scan.pushDownHighlight(highlight.getHighlightArgs()); + RelNode newScan = scan.pushDownHighlight(highlight.getHighlightConfig()); if (newScan != null) { call.transformTo(newScan); PlanUtils.tryPruneRelNodes(call); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java index 4d9f54ccd39..50fe3840339 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java @@ -498,19 +498,22 @@ public boolean isProjectPushed() { protected static void applyHighlightPushDown( org.opensearch.sql.opensearch.request.OpenSearchRequestBuilder requestBuilder, - java.util.List highlightArgs) { - if (highlightArgs == null || highlightArgs.isEmpty()) { + org.opensearch.sql.ast.tree.HighlightConfig highlightConfig) { + if (highlightConfig == null + || highlightConfig.fields() == null + || highlightConfig.fields().isEmpty()) { return; } + java.util.List fields = highlightConfig.fields(); org.opensearch.search.fetch.subphase.highlight.HighlightBuilder hb = new org.opensearch.search.fetch.subphase.highlight.HighlightBuilder(); - if (highlightArgs.size() == 1 && "*".equals(highlightArgs.get(0))) { + if (fields.size() == 1 && "*".equals(fields.get(0))) { // Wildcard: highlight search query matches in all fields hb.field("*"); } else { // Highlight specific terms across all fields String queryStr = - highlightArgs.stream() + fields.stream() .map(term -> "\"" + term + "\"") .collect(java.util.stream.Collectors.joining(" OR ")); org.opensearch.search.fetch.subphase.highlight.HighlightBuilder.Field field = @@ -520,7 +523,21 @@ protected static void applyHighlightPushDown( .defaultField("*")); hb.field(field); } - hb.fragmentSize(Integer.MAX_VALUE); + + // Apply pre_tags / post_tags if provided + if (highlightConfig.preTags() != null && !highlightConfig.preTags().isEmpty()) { + hb.preTags(highlightConfig.preTags().toArray(new String[0])); + } + if (highlightConfig.postTags() != null && !highlightConfig.postTags().isEmpty()) { + hb.postTags(highlightConfig.postTags().toArray(new String[0])); + } + + // Apply fragment_size (default to Integer.MAX_VALUE when not specified) + hb.fragmentSize( + highlightConfig.fragmentSize() != null + ? highlightConfig.fragmentSize() + : Integer.MAX_VALUE); + requestBuilder.getSourceBuilder().highlighter(hb); } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java index c2a068cf621..2dadb888f05 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java @@ -42,6 +42,7 @@ import org.apache.logging.log4j.Logger; import org.opensearch.search.aggregations.AggregationBuilder; import org.opensearch.search.aggregations.bucket.composite.CompositeAggregationBuilder; +import org.opensearch.sql.ast.tree.HighlightConfig; import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory; import org.opensearch.sql.calcite.utils.PPLHintUtils; import org.opensearch.sql.common.setting.Settings; @@ -85,7 +86,7 @@ public CalciteLogicalIndexScan( new PushDownContext(osIndex)); } - public RelNode pushDownHighlight(List highlightArgs) { + public RelNode pushDownHighlight(HighlightConfig highlightConfig) { RelDataTypeFactory.Builder schemaBuilder = getCluster().getTypeFactory().builder(); schemaBuilder.addAll(getRowType().getFieldList()); schemaBuilder.add( @@ -96,9 +97,9 @@ public RelNode pushDownHighlight(List highlightArgs) { .getPushDownContext() .add( PushDownType.HIGHLIGHT, - highlightArgs, + highlightConfig.fields(), (OSRequestBuilderAction) - requestBuilder -> applyHighlightPushDown(requestBuilder, highlightArgs)); + requestBuilder -> applyHighlightPushDown(requestBuilder, highlightConfig)); return newScan; } diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index 1631ab010b1..3ada5c96b72 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -48,7 +48,6 @@ FILLNULL: 'FILLNULL'; FLATTEN: 'FLATTEN'; CONVERT: 'CONVERT'; TRENDLINE: 'TRENDLINE'; -HIGHLIGHT: 'HIGHLIGHT'; TRANSPOSE: 'TRANSPOSE'; CHART: 'CHART'; TIMECHART: 'TIMECHART'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 5946fce5bf5..72d5d0fd76d 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -95,7 +95,6 @@ commands | fieldformatCommand | nomvCommand | graphLookupCommand - | highlightCommand ; commandName @@ -146,7 +145,6 @@ commandName | NOMV | TRANSPOSE | GRAPHLOOKUP - | HIGHLIGHT ; searchCommand @@ -565,15 +563,6 @@ trendlineType | WMA ; -highlightCommand - : HIGHLIGHT highlightArg (COMMA highlightArg)* - ; - -highlightArg - : STAR - | stringLiteral - ; - expandCommand : EXPAND fieldExpression (AS alias = qualifiedName)? ; diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/PPLService.java b/ppl/src/main/java/org/opensearch/sql/ppl/PPLService.java index d6f025a4540..ea353066cb0 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/PPLService.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/PPLService.java @@ -98,6 +98,7 @@ private AbstractPlan plan( AstStatementBuilder.StatementBuilderContext.builder() .isExplain(request.isExplainRequest()) .fetchSize(request.getFetchSize()) + .highlightConfig(request.getHighlightConfig()) .format(request.getFormat()) .explainMode(request.getExplainMode()) .build())); diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/domain/PPLQueryRequest.java b/ppl/src/main/java/org/opensearch/sql/ppl/domain/PPLQueryRequest.java index caf666b3b4e..cb8f21d8d83 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/domain/PPLQueryRequest.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/domain/PPLQueryRequest.java @@ -5,13 +5,17 @@ package org.opensearch.sql.ppl.domain; +import java.util.ArrayList; +import java.util.List; import java.util.Locale; import java.util.Optional; import lombok.Getter; import lombok.Setter; import lombok.experimental.Accessors; +import org.json.JSONArray; import org.json.JSONObject; import org.opensearch.sql.ast.statement.ExplainMode; +import org.opensearch.sql.ast.tree.HighlightConfig; import org.opensearch.sql.protocol.response.format.Format; import org.opensearch.sql.protocol.response.format.JsonResponseFormatter; @@ -19,6 +23,7 @@ public class PPLQueryRequest { private static final String DEFAULT_PPL_PATH = "/_plugins/_ppl"; private static final String FETCH_SIZE_FIELD = "fetch_size"; + private static final String HIGHLIGHT_FIELD = "highlight"; public static final PPLQueryRequest NULL = new PPLQueryRequest("", null, DEFAULT_PPL_PATH, ""); @@ -109,4 +114,65 @@ public int getFetchSize() { } return jsonContent.optInt(FETCH_SIZE_FIELD, 0); } + + /** + * Get highlight config from the request. Supports both the simple array format ({@code ["*"]}) + * and the rich OSD object format with {@code pre_tags}, {@code post_tags}, {@code fields}, and + * {@code fragment_size}. + * + * @return highlight configuration, or null if not specified + */ + public HighlightConfig getHighlightConfig() { + if (jsonContent == null || !jsonContent.has(HIGHLIGHT_FIELD)) { + return null; + } + + // Simple array format: ["*"] or ["error", "login"] + JSONArray arr = jsonContent.optJSONArray(HIGHLIGHT_FIELD); + if (arr != null) { + List fields = new ArrayList<>(); + for (int i = 0; i < arr.length(); i++) { + fields.add(arr.getString(i)); + } + return new HighlightConfig(fields); + } + + // Rich OSD object format: + // { "pre_tags": [...], "post_tags": [...], "fields": {"*": {}}, "fragment_size": N } + JSONObject obj = jsonContent.optJSONObject(HIGHLIGHT_FIELD); + if (obj == null) { + return null; + } + + // Parse fields from "fields" object keys + List fields = new ArrayList<>(); + JSONObject fieldsObj = obj.optJSONObject("fields"); + if (fieldsObj != null) { + for (String key : fieldsObj.keySet()) { + fields.add(key); + } + } + + // Parse pre_tags + List preTags = jsonArrayToList(obj.optJSONArray("pre_tags")); + + // Parse post_tags + List postTags = jsonArrayToList(obj.optJSONArray("post_tags")); + + // Parse fragment_size + Integer fragmentSize = obj.has("fragment_size") ? obj.getInt("fragment_size") : null; + + return new HighlightConfig(fields, preTags, postTags, fragmentSize); + } + + private static List jsonArrayToList(JSONArray arr) { + if (arr == null) { + return null; + } + List list = new ArrayList<>(); + for (int i = 0; i < arr.length(); i++) { + list.add(arr.getString(i)); + } + return list; + } } diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index 7d3a28f5be7..9a92126d2e6 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -89,7 +89,6 @@ import org.opensearch.sql.ast.tree.GraphLookup; import org.opensearch.sql.ast.tree.GraphLookup.Direction; import org.opensearch.sql.ast.tree.Head; -import org.opensearch.sql.ast.tree.Highlight; import org.opensearch.sql.ast.tree.Join; import org.opensearch.sql.ast.tree.Kmeans; import org.opensearch.sql.ast.tree.Lookup; @@ -1285,23 +1284,6 @@ public UnresolvedPlan visitTrendlineCommand(OpenSearchPPLParser.TrendlineCommand .orElse(new Trendline(Optional.empty(), trendlineComputations)); } - /** highlight command. */ - @Override - public UnresolvedPlan visitHighlightCommand(OpenSearchPPLParser.HighlightCommandContext ctx) { - List highlightArgs = - ctx.highlightArg().stream() - .map( - arg -> { - if (arg.STAR() != null) { - return "*"; - } else { - return StringUtils.unquoteText(arg.stringLiteral().getText()); - } - }) - .collect(Collectors.toList()); - return new Highlight(highlightArgs); - } - @Override public UnresolvedPlan visitAppendcolCommand(OpenSearchPPLParser.AppendcolCommandContext ctx) { final Optional subsearch = diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstStatementBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstStatementBuilder.java index cee084bc71b..006a6351dca 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstStatementBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstStatementBuilder.java @@ -16,6 +16,8 @@ import org.opensearch.sql.ast.statement.Query; import org.opensearch.sql.ast.statement.Statement; import org.opensearch.sql.ast.tree.Head; +import org.opensearch.sql.ast.tree.Highlight; +import org.opensearch.sql.ast.tree.HighlightConfig; import org.opensearch.sql.ast.tree.Project; import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser; @@ -35,6 +37,11 @@ public Statement visitPplStatement(OpenSearchPPLParser.PplStatementContext ctx) if (context.getFetchSize() > 0) { rawPlan = new Head(context.getFetchSize(), 0).attach(rawPlan); } + if (context.getHighlightConfig() != null + && context.getHighlightConfig().fields() != null + && !context.getHighlightConfig().fields().isEmpty()) { + rawPlan = new Highlight(context.getHighlightConfig()).attach(rawPlan); + } UnresolvedPlan plan = addSelectAll(rawPlan); Query query = new Query(plan, 0, PPL); if (ctx.explainStatement() != null) { @@ -65,6 +72,9 @@ public static class StatementBuilderContext { */ private final int fetchSize; + /** Highlight config from the API request. */ + private final HighlightConfig highlightConfig; + private final String format; private final String explainMode; } diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java index 377ded06e22..5d2708fd34e 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java @@ -734,7 +734,7 @@ public String visitFlatten(Flatten node, String context) { @Override public String visitHighlight(Highlight node, String context) { String child = node.getChild().get(0).accept(this, context); - String fields = String.join(", ", node.getHighlightArgs()); + String fields = String.join(", ", node.getHighlightConfig().fields()); return StringUtils.format("%s | highlight %s", child, fields); } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLHighlightTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLHighlightTest.java deleted file mode 100644 index a5dd76669de..00000000000 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLHighlightTest.java +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.ppl.calcite; - -import org.apache.calcite.rel.RelNode; -import org.apache.calcite.test.CalciteAssert; -import org.junit.Test; - -public class CalcitePPLHighlightTest extends CalcitePPLAbstractTest { - - public CalcitePPLHighlightTest() { - super(CalciteAssert.SchemaSpec.SCOTT_WITH_TEMPORAL); - } - - @Test - public void testHighlightWildcard() { - String ppl = "source=EMP | highlight *"; - RelNode root = getRelNode(ppl); - String expectedLogical = - "LogicalHighlight(highlightArgs=[[*]])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; - verifyLogical(root, expectedLogical); - } - - @Test - public void testHighlightSingleTerm() { - String ppl = "source=EMP | highlight \"error\""; - RelNode root = getRelNode(ppl); - String expectedLogical = - "LogicalHighlight(highlightArgs=[[error]])\n" - + " LogicalTableScan(table=[[scott, EMP]])\n"; - verifyLogical(root, expectedLogical); - } - - @Test - public void testHighlightMultipleTerms() { - String ppl = "source=EMP | highlight \"error\", \"login\""; - RelNode root = getRelNode(ppl); - String expectedLogical = - "LogicalHighlight(highlightArgs=[[error, login]])\n" - + " LogicalTableScan(table=[[scott, EMP]])\n"; - verifyLogical(root, expectedLogical); - } - - @Test - public void testHighlightWithFilter() { - String ppl = "source=EMP | highlight * | where SAL > 2000"; - RelNode root = getRelNode(ppl); - String expectedLogical = - "LogicalFilter(condition=[>($5, 2000)])\n" - + " LogicalHighlight(highlightArgs=[[*]])\n" - + " LogicalTableScan(table=[[scott, EMP]])\n"; - verifyLogical(root, expectedLogical); - } - - @Test - public void testHighlightWithFields() { - String ppl = "source=EMP | highlight \"error\" | fields ENAME, JOB"; - RelNode root = getRelNode(ppl); - String expectedLogical = - "LogicalProject(ENAME=[$1], JOB=[$2], _highlight=[$8])\n" - + " LogicalHighlight(highlightArgs=[[error]])\n" - + " LogicalTableScan(table=[[scott, EMP]])\n"; - verifyLogical(root, expectedLogical); - } - - @Test - public void testHighlightWithSort() { - String ppl = "source=EMP | highlight * | sort SAL"; - RelNode root = getRelNode(ppl); - String expectedLogical = - "LogicalSort(sort0=[$5], dir0=[ASC-nulls-first])\n" - + " LogicalHighlight(highlightArgs=[[*]])\n" - + " LogicalTableScan(table=[[scott, EMP]])\n"; - verifyLogical(root, expectedLogical); - } - - @Test - public void testHighlightWithFilterAndFields() { - String ppl = "source=EMP | highlight \"error\" | where SAL > 2000 | fields ENAME, SAL"; - RelNode root = getRelNode(ppl); - String expectedLogical = - "LogicalProject(ENAME=[$1], SAL=[$5], _highlight=[$8])\n" - + " LogicalFilter(condition=[>($5, 2000)])\n" - + " LogicalHighlight(highlightArgs=[[error]])\n" - + " LogicalTableScan(table=[[scott, EMP]])\n"; - verifyLogical(root, expectedLogical); - } -} diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java index f67fddc42a0..a53e4a5d8dd 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java @@ -32,7 +32,6 @@ import static org.opensearch.sql.ast.dsl.AstDSL.filter; import static org.opensearch.sql.ast.dsl.AstDSL.function; import static org.opensearch.sql.ast.dsl.AstDSL.head; -import static org.opensearch.sql.ast.dsl.AstDSL.highlight; import static org.opensearch.sql.ast.dsl.AstDSL.intLiteral; import static org.opensearch.sql.ast.dsl.AstDSL.let; import static org.opensearch.sql.ast.dsl.AstDSL.map; @@ -1107,28 +1106,6 @@ public void testTrendlineTooFewSamples() { assertThrows(SyntaxCheckException.class, () -> plan("source=t | trendline sma(0, test_field)")); } - @Test - public void testHighlightStar() { - assertEqual("source=t | highlight *", highlight(relation("t"), List.of("*"))); - } - - @Test - public void testHighlightStringLiteral() { - assertEqual("source=t | highlight \"error\"", highlight(relation("t"), List.of("error"))); - } - - @Test - public void testHighlightMultipleStringLiterals() { - assertEqual( - "source=t | highlight \"error\", \"login\"", - highlight(relation("t"), List.of("error", "login"))); - } - - @Test - public void testHighlightFieldExpressionNotSupported() { - assertThrows(SyntaxCheckException.class, () -> plan("source=t | highlight fieldname")); - } - @Test public void testDescribeCommand() { assertEqual("describe t", describe(mappingTable("t"))); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstStatementBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstStatementBuilderTest.java index 4229bbc8af3..d25a05b80c0 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstStatementBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstStatementBuilderTest.java @@ -9,6 +9,7 @@ import static org.opensearch.sql.ast.dsl.AstDSL.*; import static org.opensearch.sql.executor.QueryType.PPL; +import java.util.List; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; @@ -18,6 +19,7 @@ import org.opensearch.sql.ast.statement.Explain; import org.opensearch.sql.ast.statement.Query; import org.opensearch.sql.ast.statement.Statement; +import org.opensearch.sql.ast.tree.HighlightConfig; import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.ppl.antlr.PPLSyntaxParser; @@ -116,11 +118,75 @@ public void buildQueryStatementWithFetchSizeAndHeadWithOffset() { new Query(project(head(head(relation("t"), 3, 1), 10, 0), AllFields.of()), 0, PPL)); } + @Test + public void buildQueryStatementWithHighlight() { + // Highlight wraps the raw plan, then Project(*) wraps on top + HighlightConfig config = new HighlightConfig(List.of("*")); + assertEqualWithHighlight( + "search source=t a=1", + config, + new Query( + project(highlight(search(relation("t"), "a:1"), config), AllFields.of()), 0, PPL)); + } + + @Test + public void buildQueryStatementWithHighlightMultipleTerms() { + HighlightConfig config = new HighlightConfig(List.of("error", "login")); + assertEqualWithHighlight( + "search source=t a=1", + config, + new Query( + project(highlight(search(relation("t"), "a:1"), config), AllFields.of()), 0, PPL)); + } + + @Test + public void buildQueryStatementWithHighlightNull() { + // null highlight means no Highlight node injected + assertEqualWithHighlight( + "search source=t a=1", + null, + new Query(project(search(relation("t"), "a:1"), AllFields.of()), 0, PPL)); + } + + @Test + public void buildQueryStatementWithHighlightAndFetchSize() { + // Both fetch_size and highlight: Head wraps first, then Highlight wraps Head + HighlightConfig config = new HighlightConfig(List.of("*")); + assertEqualWithHighlightAndFetchSize( + "search source=t a=1", + config, + 100, + new Query( + project(highlight(head(search(relation("t"), "a:1"), 100, 0), config), AllFields.of()), + 0, + PPL)); + } + private void assertEqualWithFetchSize(String query, int fetchSize, Statement expectedStatement) { Node actualPlan = planWithFetchSize(query, fetchSize); assertEquals(expectedStatement, actualPlan); } + private void assertEqualWithHighlight( + String query, HighlightConfig highlightConfig, Statement expectedStatement) { + Node actualPlan = planWithHighlight(query, highlightConfig); + assertEquals(expectedStatement, actualPlan); + } + + private void assertEqualWithHighlightAndFetchSize( + String query, HighlightConfig highlightConfig, int fetchSize, Statement expectedStatement) { + final AstStatementBuilder builder = + new AstStatementBuilder( + new AstBuilder(query, settings), + AstStatementBuilder.StatementBuilderContext.builder() + .isExplain(false) + .fetchSize(fetchSize) + .highlightConfig(highlightConfig) + .build()); + Node actualPlan = builder.visit(parser.parse(query)); + assertEquals(expectedStatement, actualPlan); + } + private Node planWithFetchSize(String query, int fetchSize) { final AstStatementBuilder builder = new AstStatementBuilder( @@ -132,6 +198,17 @@ private Node planWithFetchSize(String query, int fetchSize) { return builder.visit(parser.parse(query)); } + private Node planWithHighlight(String query, HighlightConfig highlightConfig) { + final AstStatementBuilder builder = + new AstStatementBuilder( + new AstBuilder(query, settings), + AstStatementBuilder.StatementBuilderContext.builder() + .isExplain(false) + .highlightConfig(highlightConfig) + .build()); + return builder.visit(parser.parse(query)); + } + private void assertEqual(String query, Statement expectedStatement) { Node actualPlan = plan(query, false); assertEquals(expectedStatement, actualPlan); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index 1987246958f..bb720bd4207 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -262,18 +262,6 @@ public void testTrendlineCommand() { anonymize("source=t | trendline sma(2, date) as date_alias sma(3, time) as time_alias")); } - @Test - public void testHighlightCommand() { - assertEquals("source=table | highlight *", anonymize("source=t | highlight *")); - } - - @Test - public void testHighlightCommandMultipleFields() { - assertEquals( - "source=table | highlight error, login", - anonymize("source=t | highlight \"error\", \"login\"")); - } - @Test public void testHeadCommandWithNumber() { assertEquals("source=table | head 3", anonymize("source=t | head 3")); From 9a272c4cf51c9ce2268675a369d7debfb58c04f4 Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Mon, 16 Mar 2026 14:19:41 -0700 Subject: [PATCH 13/25] Add more tests Signed-off-by: Jialiang Liang --- .../org/opensearch/sql/ppl/HighlightIT.java | 214 ++++++++++++++++++ .../sql/ppl/domain/PPLQueryRequestTest.java | 90 ++++++++ 2 files changed, 304 insertions(+) create mode 100644 integ-test/src/test/java/org/opensearch/sql/ppl/HighlightIT.java diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/HighlightIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/HighlightIT.java new file mode 100644 index 00000000000..b9e42742c01 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/HighlightIT.java @@ -0,0 +1,214 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ppl; + +import static org.opensearch.sql.legacy.TestUtils.getResponseBody; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_ACCOUNT; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK; +import static org.opensearch.sql.plugin.rest.RestPPLQueryAction.QUERY_API_ENDPOINT; + +import java.io.IOException; +import java.util.Locale; +import org.json.JSONArray; +import org.json.JSONObject; +import org.junit.Assert; +import org.junit.jupiter.api.Test; +import org.opensearch.client.Request; +import org.opensearch.client.RequestOptions; +import org.opensearch.client.Response; + +/** Integration tests for PPL highlight parameter. */ +public class HighlightIT extends PPLIntegTestCase { + + @Override + public void init() throws Exception { + super.init(); + loadIndex(Index.ACCOUNT); + loadIndex(Index.BANK); + } + + @Test + public void testHighlightWildcardWithSearchQuery() throws IOException { + // Search for "Holmes" with wildcard highlight — _highlight should appear in results + JSONObject result = + executeQueryWithHighlight("source=" + TEST_INDEX_ACCOUNT + " \"Holmes\"", "[\"*\"]"); + JSONArray dataRows = result.getJSONArray("datarows"); + assertTrue(dataRows.length() > 0); + assertSchemaContains(result, "_highlight"); + } + + @Test + public void testHighlightContainsMatchingFragments() throws IOException { + JSONObject result = + executeQueryWithHighlight("source=" + TEST_INDEX_ACCOUNT + " \"Holmes\"", "[\"*\"]"); + JSONArray dataRows = result.getJSONArray("datarows"); + assertTrue(dataRows.length() > 0); + // Find the _highlight column index and verify it contains highlighted fragments + int highlightIdx = getFieldIndex(result, "_highlight"); + assertTrue("_highlight column should exist", highlightIdx >= 0); + // At least one row should have non-empty highlight data + boolean foundHighlight = false; + for (int i = 0; i < dataRows.length(); i++) { + Object hlValue = dataRows.getJSONArray(i).get(highlightIdx); + if (hlValue != null && !hlValue.equals(JSONObject.NULL)) { + foundHighlight = true; + break; + } + } + assertTrue("At least one row should have highlight data", foundHighlight); + } + + @Test + public void testHighlightOsdObjectFormat() throws IOException { + // OSD sends highlight as a rich object with custom tags + String highlightJson = + "{\"pre_tags\": [\"\"], \"post_tags\": [\"\"]," + + " \"fields\": {\"*\": {}}, \"fragment_size\": 2147483647}"; + JSONObject result = + executeQueryWithHighlight("source=" + TEST_INDEX_ACCOUNT + " \"Holmes\"", highlightJson); + JSONArray dataRows = result.getJSONArray("datarows"); + assertTrue(dataRows.length() > 0); + assertSchemaContains(result, "_highlight"); + } + + @Test + public void testHighlightOsdObjectFormatWithDashboardsTags() throws IOException { + // The exact format OSD uses with @opensearch-dashboards-highlighted-field@ tags + String highlightJson = + "{\"pre_tags\": [\"@opensearch-dashboards-highlighted-field@\"]," + + " \"post_tags\": [\"@/opensearch-dashboards-highlighted-field@\"]," + + " \"fields\": {\"*\": {}}, \"fragment_size\": 2147483647}"; + JSONObject result = + executeQueryWithHighlight("source=" + TEST_INDEX_ACCOUNT + " \"Holmes\"", highlightJson); + JSONArray dataRows = result.getJSONArray("datarows"); + assertTrue(dataRows.length() > 0); + assertSchemaContains(result, "_highlight"); + } + + @Test + public void testHighlightWithFilter() throws IOException { + JSONObject result = + executeQueryWithHighlight( + "source=" + TEST_INDEX_ACCOUNT + " \"Holmes\" | where age > 30", "[\"*\"]"); + assertSchemaContains(result, "_highlight"); + } + + @Test + public void testHighlightWithFields() throws IOException { + JSONObject result = + executeQueryWithHighlight( + "source=" + TEST_INDEX_ACCOUNT + " \"Holmes\" | fields firstname, lastname", "[\"*\"]"); + assertSchemaContains(result, "_highlight"); + } + + @Test + public void testHighlightWithFetchSize() throws IOException { + // Highlight combined with fetch_size + Request request = new Request("POST", QUERY_API_ENDPOINT); + String jsonBody = + String.format( + Locale.ROOT, + "{\n \"query\": \"source=%s \\\"Holmes\\\"\",\n" + + " \"fetch_size\": 5,\n" + + " \"highlight\": [\"*\"]\n}", + TEST_INDEX_ACCOUNT); + request.setJsonEntity(jsonBody); + RequestOptions.Builder restOptionsBuilder = RequestOptions.DEFAULT.toBuilder(); + restOptionsBuilder.addHeader("Content-Type", "application/json"); + request.setOptions(restOptionsBuilder); + Response response = client().performRequest(request); + Assert.assertEquals(200, response.getStatusLine().getStatusCode()); + JSONObject result = jsonify(getResponseBody(response, true)); + assertSchemaContains(result, "_highlight"); + } + + @Test + public void testHighlightWithSort() throws IOException { + JSONObject result = + executeQueryWithHighlight( + "source=" + TEST_INDEX_ACCOUNT + " \"Holmes\" | sort age", "[\"*\"]"); + assertSchemaContains(result, "_highlight"); + } + + @Test + public void testHighlightWithEval() throws IOException { + JSONObject result = + executeQueryWithHighlight( + "source=" + + TEST_INDEX_ACCOUNT + + " \"Holmes\" | eval age_plus_10 = age + 10 | fields firstname, age_plus_10", + "[\"*\"]"); + assertSchemaContains(result, "_highlight"); + } + + @Test + public void testHighlightNoSearchQuery() throws IOException { + // Without a search query, _highlight column appears but fragments may be empty + JSONObject result = executeQueryWithHighlight("source=" + TEST_INDEX_BANK, "[\"*\"]"); + assertSchemaContains(result, "_highlight"); + } + + @Test + public void testWithoutHighlightNoHighlightColumn() throws IOException { + // Without highlight parameter, _highlight should NOT appear in schema + JSONObject result = executeQuery("source=" + TEST_INDEX_BANK); + assertSchemaDoesNotContain(result, "_highlight"); + } + + /** + * Execute a PPL query with highlight parameter. + * + * @param query the PPL query string + * @param highlightJson the highlight JSON (array or object) + * @return the JSON response + */ + protected JSONObject executeQueryWithHighlight(String query, String highlightJson) + throws IOException { + Request request = new Request("POST", QUERY_API_ENDPOINT); + request.setJsonEntity( + String.format( + Locale.ROOT, "{\n \"query\": \"%s\",\n \"highlight\": %s\n}", query, highlightJson)); + RequestOptions.Builder restOptionsBuilder = RequestOptions.DEFAULT.toBuilder(); + restOptionsBuilder.addHeader("Content-Type", "application/json"); + request.setOptions(restOptionsBuilder); + + Response response = client().performRequest(request); + Assert.assertEquals(200, response.getStatusLine().getStatusCode()); + return jsonify(getResponseBody(response, true)); + } + + /** Assert that the response schema contains a field with the given name. */ + private void assertSchemaContains(JSONObject result, String fieldName) { + JSONArray schema = result.getJSONArray("schema"); + for (int i = 0; i < schema.length(); i++) { + if (schema.getJSONObject(i).getString("name").equals(fieldName)) { + return; + } + } + Assert.fail("Schema should contain field: " + fieldName); + } + + /** Assert that the response schema does NOT contain a field with the given name. */ + private void assertSchemaDoesNotContain(JSONObject result, String fieldName) { + JSONArray schema = result.getJSONArray("schema"); + for (int i = 0; i < schema.length(); i++) { + if (schema.getJSONObject(i).getString("name").equals(fieldName)) { + Assert.fail("Schema should NOT contain field: " + fieldName); + } + } + } + + /** Get the column index for a field name from the schema. */ + private int getFieldIndex(JSONObject result, String fieldName) { + JSONArray schema = result.getJSONArray("schema"); + for (int i = 0; i < schema.length(); i++) { + if (schema.getJSONObject(i).getString("name").equals(fieldName)) { + return i; + } + } + return -1; + } +} diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/domain/PPLQueryRequestTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/domain/PPLQueryRequestTest.java index 9fe3a4fef64..809ecd357ae 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/domain/PPLQueryRequestTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/domain/PPLQueryRequestTest.java @@ -6,12 +6,16 @@ package org.opensearch.sql.ppl.domain; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; +import java.util.List; import org.json.JSONObject; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; +import org.opensearch.sql.ast.tree.HighlightConfig; import org.opensearch.sql.protocol.response.format.Format; public class PPLQueryRequestTest { @@ -92,4 +96,90 @@ public void testGetFetchSizeWithLargeValue() { PPLQueryRequest request = new PPLQueryRequest("source=t", json, "/_plugins/_ppl"); assertEquals(15000, request.getFetchSize()); } + + @Test + public void testGetHighlightConfigReturnsNullWhenJsonContentIsNull() { + PPLQueryRequest request = new PPLQueryRequest("source=t", null, "/_plugins/_ppl"); + assertNull(request.getHighlightConfig()); + } + + @Test + public void testGetHighlightConfigReturnsNullWhenNotSpecified() { + JSONObject json = new JSONObject("{\"query\": \"source=t\"}"); + PPLQueryRequest request = new PPLQueryRequest("source=t", json, "/_plugins/_ppl"); + assertNull(request.getHighlightConfig()); + } + + @Test + public void testGetHighlightConfigSimpleArrayWildcard() { + JSONObject json = new JSONObject("{\"query\": \"source=t\", \"highlight\": [\"*\"]}"); + PPLQueryRequest request = new PPLQueryRequest("source=t", json, "/_plugins/_ppl"); + HighlightConfig config = request.getHighlightConfig(); + assertNotNull(config); + assertEquals(List.of("*"), config.fields()); + assertNull(config.preTags()); + assertNull(config.postTags()); + assertNull(config.fragmentSize()); + } + + @Test + public void testGetHighlightConfigSimpleArrayMultipleTerms() { + JSONObject json = + new JSONObject("{\"query\": \"source=t\", \"highlight\": [\"error\", \"login\"]}"); + PPLQueryRequest request = new PPLQueryRequest("source=t", json, "/_plugins/_ppl"); + HighlightConfig config = request.getHighlightConfig(); + assertNotNull(config); + assertEquals(List.of("error", "login"), config.fields()); + assertNull(config.preTags()); + assertNull(config.postTags()); + assertNull(config.fragmentSize()); + } + + @Test + public void testGetHighlightConfigOsdObjectFormatAllFields() { + JSONObject json = + new JSONObject( + "{\"query\": \"source=t\", \"highlight\": {" + + "\"pre_tags\": [\"\"], \"post_tags\": [\"\"]," + + "\"fields\": {\"*\": {}}, \"fragment_size\": 2147483647}}"); + PPLQueryRequest request = new PPLQueryRequest("source=t", json, "/_plugins/_ppl"); + HighlightConfig config = request.getHighlightConfig(); + assertNotNull(config); + assertEquals(List.of("*"), config.fields()); + assertEquals(List.of(""), config.preTags()); + assertEquals(List.of(""), config.postTags()); + assertEquals(Integer.valueOf(2147483647), config.fragmentSize()); + } + + @Test + public void testGetHighlightConfigOsdObjectFormatMultipleTags() { + JSONObject json = + new JSONObject( + "{\"query\": \"source=t\", \"highlight\": {" + + "\"pre_tags\": [\"\", \"\"], \"post_tags\": [\"\", \"\"]," + + "\"fields\": {\"title\": {}, \"body\": {}}}}"); + PPLQueryRequest request = new PPLQueryRequest("source=t", json, "/_plugins/_ppl"); + HighlightConfig config = request.getHighlightConfig(); + assertNotNull(config); + assertEquals(2, config.fields().size()); + assertTrue(config.fields().contains("title")); + assertTrue(config.fields().contains("body")); + assertEquals(List.of("", ""), config.preTags()); + assertEquals(List.of("", ""), config.postTags()); + assertNull(config.fragmentSize()); + } + + @Test + public void testGetHighlightConfigOsdObjectFormatFieldsOnly() { + // Object format with only fields, no tags or fragment_size + JSONObject json = + new JSONObject("{\"query\": \"source=t\", \"highlight\": {\"fields\": {\"*\": {}}}}"); + PPLQueryRequest request = new PPLQueryRequest("source=t", json, "/_plugins/_ppl"); + HighlightConfig config = request.getHighlightConfig(); + assertNotNull(config); + assertEquals(List.of("*"), config.fields()); + assertNull(config.preTags()); + assertNull(config.postTags()); + assertNull(config.fragmentSize()); + } } From d039af69e818c08eabaa2190f1e3b8957cb245a0 Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Mon, 16 Mar 2026 14:23:54 -0700 Subject: [PATCH 14/25] Update the doc Signed-off-by: Jialiang Liang --- docs/user/ppl/interfaces/endpoint.md | 39 +++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/docs/user/ppl/interfaces/endpoint.md b/docs/user/ppl/interfaces/endpoint.md index 30b5206e188..53dcb28782d 100644 --- a/docs/user/ppl/interfaces/endpoint.md +++ b/docs/user/ppl/interfaces/endpoint.md @@ -204,7 +204,7 @@ Expected output (trimmed): ## Highlight -You can add a `highlight` parameter to the PPL request body to enable search-result highlighting. When enabled, the response includes a `_highlight` field containing matching fragments with the specified tags. +You can add a `highlight` parameter to the PPL request body to enable search-result highlighting. When enabled, the response includes a top-level `highlights` array containing matching fragments with the specified tags. Each entry in the `highlights` array corresponds to the row at the same index in `datarows`. Two formats are supported: @@ -239,7 +239,39 @@ curl -sS -H 'Content-Type: application/json' \ }' ``` -### Parameters +Expected output (trimmed): + +```json +{ + "schema": [ + { "name": "account_number", "type": "bigint" }, + { "name": "firstname", "type": "string" }, + { "name": "lastname", "type": "string" } + ], + "datarows": [ + [578, "Holmes", "Mcknight"], + [828, "Blanche", "Holmes"], + [1, "Amber", "Duke"] + ], + "highlights": [ + { + "firstname": ["@opensearch-dashboards-highlighted-field@Holmes@/opensearch-dashboards-highlighted-field@"], + "firstname.keyword": ["@opensearch-dashboards-highlighted-field@Holmes@/opensearch-dashboards-highlighted-field@"] + }, + { + "lastname": ["@opensearch-dashboards-highlighted-field@Holmes@/opensearch-dashboards-highlighted-field@"], + "lastname.keyword": ["@opensearch-dashboards-highlighted-field@Holmes@/opensearch-dashboards-highlighted-field@"] + }, + { + "address": ["880 @opensearch-dashboards-highlighted-field@Holmes@/opensearch-dashboards-highlighted-field@ Lane"] + } + ], + "total": 3, + "size": 3 +} +``` + +### Parameters (object format) | Parameter | Type | Required | Description | |-----------------|-----------------|----------|--------------------------------------------------------------------------------------------------------------| @@ -250,6 +282,7 @@ curl -sS -H 'Content-Type: application/json' \ ### Notes -- Highlighting requires a search query in the PPL statement (e.g. `source=accounts "Holmes"`). Without a query, the `_highlight` field will be empty. +- Highlighting requires a search query in the PPL statement (e.g. `source=accounts "Holmes"`). Without a query, the `highlights` array entries will be empty. +- The `highlights` array in the response is parallel to `datarows` — each entry contains the highlighted fragments for the corresponding row. - In the simple array format, `["*"]` highlights all fields. Specific terms like `["error", "login"]` highlight those terms across all fields. - In the object format, only the keys of the `fields` object are used; per-field options inside the value objects are currently ignored. From 82443d8590d1478b09377356f27a339b9f35aaaf Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Mon, 16 Mar 2026 15:32:16 -0700 Subject: [PATCH 15/25] fix doc test Signed-off-by: Jialiang Liang --- docs/user/ppl/interfaces/endpoint.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/user/ppl/interfaces/endpoint.md b/docs/user/ppl/interfaces/endpoint.md index 53dcb28782d..8f68aa6e1d1 100644 --- a/docs/user/ppl/interfaces/endpoint.md +++ b/docs/user/ppl/interfaces/endpoint.md @@ -212,7 +212,7 @@ Two formats are supported: Pass a JSON array of field names or wildcards. Use `["*"]` to highlight all fields that match the search query. -```bash ppl +```bash ppl ignore curl -sS -H 'Content-Type: application/json' \ -X POST localhost:9200/_plugins/_ppl \ -d '{ @@ -225,7 +225,7 @@ curl -sS -H 'Content-Type: application/json' \ Pass a JSON object with `fields`, `pre_tags`, `post_tags`, and `fragment_size`. This is the format used by OpenSearch Dashboards. -```bash ppl +```bash ppl ignore curl -sS -H 'Content-Type: application/json' \ -X POST localhost:9200/_plugins/_ppl \ -d '{ From 124c42a5e9ec91138850c41f60666d423afef344 Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Mon, 16 Mar 2026 16:38:31 -0700 Subject: [PATCH 16/25] fix IT Signed-off-by: Jialiang Liang --- .../remote/CalciteHighlightIT.java} | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) rename integ-test/src/test/java/org/opensearch/sql/{ppl/HighlightIT.java => calcite/remote/CalciteHighlightIT.java} (97%) diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/HighlightIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteHighlightIT.java similarity index 97% rename from integ-test/src/test/java/org/opensearch/sql/ppl/HighlightIT.java rename to integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteHighlightIT.java index b9e42742c01..20c9157af1b 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/HighlightIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteHighlightIT.java @@ -3,7 +3,7 @@ * SPDX-License-Identifier: Apache-2.0 */ -package org.opensearch.sql.ppl; +package org.opensearch.sql.calcite.remote; import static org.opensearch.sql.legacy.TestUtils.getResponseBody; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_ACCOUNT; @@ -19,13 +19,15 @@ import org.opensearch.client.Request; import org.opensearch.client.RequestOptions; import org.opensearch.client.Response; +import org.opensearch.sql.ppl.PPLIntegTestCase; /** Integration tests for PPL highlight parameter. */ -public class HighlightIT extends PPLIntegTestCase { +public class CalciteHighlightIT extends PPLIntegTestCase { @Override public void init() throws Exception { super.init(); + enableCalcite(); loadIndex(Index.ACCOUNT); loadIndex(Index.BANK); } From c87b5b829d7424be0d4966763f0e4fe277c800de Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Mon, 16 Mar 2026 17:33:22 -0700 Subject: [PATCH 17/25] fix tests Signed-off-by: Jialiang Liang --- .../sql/calcite/remote/CalciteExplainIT.java | 4 +- .../calcite/remote/CalciteHighlightIT.java | 134 ++++++++---------- .../calcite/explain_highlight_osd_format.yaml | 8 ++ .../explain_highlight_osd_format.yaml | 10 ++ 4 files changed, 83 insertions(+), 73 deletions(-) create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_osd_format.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_osd_format.yaml diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index 293cd669ddf..8a7eec3dc3f 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -2828,8 +2828,8 @@ public void testHighlightOsdObjectFormatExplain() throws IOException { "{\"pre_tags\": [\"\"], \"post_tags\": [\"\"]," + " \"fields\": {\"*\": {}}, \"fragment_size\": 2147483647}"; var result = explainQueryYaml(query, highlightJson); - // Same explain plan as wildcard (fields: {"*": {}}) — just verify it doesn't error - String expected = loadExpectedPlan("explain_highlight_wildcard.yaml"); + // OSD format includes pre_tags/post_tags in the highlight builder output + String expected = loadExpectedPlan("explain_highlight_osd_format.yaml"); assertYamlEqualsIgnoreId(expected, result); } } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteHighlightIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteHighlightIT.java index 20c9157af1b..edd88fede38 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteHighlightIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteHighlightIT.java @@ -11,7 +11,6 @@ import static org.opensearch.sql.plugin.rest.RestPPLQueryAction.QUERY_API_ENDPOINT; import java.io.IOException; -import java.util.Locale; import org.json.JSONArray; import org.json.JSONObject; import org.junit.Assert; @@ -34,38 +33,33 @@ public void init() throws Exception { @Test public void testHighlightWildcardWithSearchQuery() throws IOException { - // Search for "Holmes" with wildcard highlight — _highlight should appear in results JSONObject result = executeQueryWithHighlight("source=" + TEST_INDEX_ACCOUNT + " \"Holmes\"", "[\"*\"]"); JSONArray dataRows = result.getJSONArray("datarows"); assertTrue(dataRows.length() > 0); - assertSchemaContains(result, "_highlight"); + assertHighlightsExist(result); } @Test public void testHighlightContainsMatchingFragments() throws IOException { JSONObject result = executeQueryWithHighlight("source=" + TEST_INDEX_ACCOUNT + " \"Holmes\"", "[\"*\"]"); - JSONArray dataRows = result.getJSONArray("datarows"); - assertTrue(dataRows.length() > 0); - // Find the _highlight column index and verify it contains highlighted fragments - int highlightIdx = getFieldIndex(result, "_highlight"); - assertTrue("_highlight column should exist", highlightIdx >= 0); - // At least one row should have non-empty highlight data - boolean foundHighlight = false; - for (int i = 0; i < dataRows.length(); i++) { - Object hlValue = dataRows.getJSONArray(i).get(highlightIdx); - if (hlValue != null && !hlValue.equals(JSONObject.NULL)) { - foundHighlight = true; + JSONArray highlights = result.getJSONArray("highlights"); + assertTrue("highlights array should not be empty", highlights.length() > 0); + // At least one highlight entry should have non-empty data + boolean foundFragment = false; + for (int i = 0; i < highlights.length(); i++) { + JSONObject hlEntry = highlights.getJSONObject(i); + if (hlEntry.length() > 0) { + foundFragment = true; break; } } - assertTrue("At least one row should have highlight data", foundHighlight); + assertTrue("At least one highlight entry should have fragment data", foundFragment); } @Test public void testHighlightOsdObjectFormat() throws IOException { - // OSD sends highlight as a rich object with custom tags String highlightJson = "{\"pre_tags\": [\"\"], \"post_tags\": [\"\"]," + " \"fields\": {\"*\": {}}, \"fragment_size\": 2147483647}"; @@ -73,12 +67,22 @@ public void testHighlightOsdObjectFormat() throws IOException { executeQueryWithHighlight("source=" + TEST_INDEX_ACCOUNT + " \"Holmes\"", highlightJson); JSONArray dataRows = result.getJSONArray("datarows"); assertTrue(dataRows.length() > 0); - assertSchemaContains(result, "_highlight"); + assertHighlightsExist(result); + // Verify custom tags are applied + JSONArray highlights = result.getJSONArray("highlights"); + boolean foundCustomTag = false; + for (int i = 0; i < highlights.length(); i++) { + String hlStr = highlights.getJSONObject(i).toString(); + if (hlStr.contains("")) { + foundCustomTag = true; + break; + } + } + assertTrue("Highlights should use custom tags", foundCustomTag); } @Test public void testHighlightOsdObjectFormatWithDashboardsTags() throws IOException { - // The exact format OSD uses with @opensearch-dashboards-highlighted-field@ tags String highlightJson = "{\"pre_tags\": [\"@opensearch-dashboards-highlighted-field@\"]," + " \"post_tags\": [\"@/opensearch-dashboards-highlighted-field@\"]," @@ -87,7 +91,18 @@ public void testHighlightOsdObjectFormatWithDashboardsTags() throws IOException executeQueryWithHighlight("source=" + TEST_INDEX_ACCOUNT + " \"Holmes\"", highlightJson); JSONArray dataRows = result.getJSONArray("datarows"); assertTrue(dataRows.length() > 0); - assertSchemaContains(result, "_highlight"); + assertHighlightsExist(result); + // Verify dashboards tags are applied + JSONArray highlights = result.getJSONArray("highlights"); + boolean foundDashboardsTag = false; + for (int i = 0; i < highlights.length(); i++) { + String hlStr = highlights.getJSONObject(i).toString(); + if (hlStr.contains("@opensearch-dashboards-highlighted-field@")) { + foundDashboardsTag = true; + break; + } + } + assertTrue("Highlights should use OSD dashboards tags", foundDashboardsTag); } @Test @@ -95,7 +110,7 @@ public void testHighlightWithFilter() throws IOException { JSONObject result = executeQueryWithHighlight( "source=" + TEST_INDEX_ACCOUNT + " \"Holmes\" | where age > 30", "[\"*\"]"); - assertSchemaContains(result, "_highlight"); + assertHighlightsExist(result); } @Test @@ -103,28 +118,24 @@ public void testHighlightWithFields() throws IOException { JSONObject result = executeQueryWithHighlight( "source=" + TEST_INDEX_ACCOUNT + " \"Holmes\" | fields firstname, lastname", "[\"*\"]"); - assertSchemaContains(result, "_highlight"); + assertHighlightsExist(result); } @Test public void testHighlightWithFetchSize() throws IOException { - // Highlight combined with fetch_size Request request = new Request("POST", QUERY_API_ENDPOINT); - String jsonBody = - String.format( - Locale.ROOT, - "{\n \"query\": \"source=%s \\\"Holmes\\\"\",\n" - + " \"fetch_size\": 5,\n" - + " \"highlight\": [\"*\"]\n}", - TEST_INDEX_ACCOUNT); - request.setJsonEntity(jsonBody); + JSONObject body = new JSONObject(); + body.put("query", "source=" + TEST_INDEX_ACCOUNT + " \"Holmes\""); + body.put("fetch_size", 5); + body.put("highlight", new JSONArray("[\"*\"]")); + request.setJsonEntity(body.toString()); RequestOptions.Builder restOptionsBuilder = RequestOptions.DEFAULT.toBuilder(); restOptionsBuilder.addHeader("Content-Type", "application/json"); request.setOptions(restOptionsBuilder); Response response = client().performRequest(request); Assert.assertEquals(200, response.getStatusLine().getStatusCode()); JSONObject result = jsonify(getResponseBody(response, true)); - assertSchemaContains(result, "_highlight"); + assertHighlightsExist(result); } @Test @@ -132,7 +143,7 @@ public void testHighlightWithSort() throws IOException { JSONObject result = executeQueryWithHighlight( "source=" + TEST_INDEX_ACCOUNT + " \"Holmes\" | sort age", "[\"*\"]"); - assertSchemaContains(result, "_highlight"); + assertHighlightsExist(result); } @Test @@ -143,21 +154,21 @@ public void testHighlightWithEval() throws IOException { + TEST_INDEX_ACCOUNT + " \"Holmes\" | eval age_plus_10 = age + 10 | fields firstname, age_plus_10", "[\"*\"]"); - assertSchemaContains(result, "_highlight"); + assertHighlightsExist(result); } @Test public void testHighlightNoSearchQuery() throws IOException { - // Without a search query, _highlight column appears but fragments may be empty + // Without a search query, request should still succeed (highlights may or may not be present) JSONObject result = executeQueryWithHighlight("source=" + TEST_INDEX_BANK, "[\"*\"]"); - assertSchemaContains(result, "_highlight"); + assertTrue("Response should contain datarows", result.has("datarows")); } @Test - public void testWithoutHighlightNoHighlightColumn() throws IOException { - // Without highlight parameter, _highlight should NOT appear in schema + public void testWithoutHighlightNoHighlightArray() throws IOException { + // Without highlight parameter, highlights array should NOT appear JSONObject result = executeQuery("source=" + TEST_INDEX_BANK); - assertSchemaDoesNotContain(result, "_highlight"); + assertFalse("Response should NOT contain highlights array", result.has("highlights")); } /** @@ -170,9 +181,15 @@ public void testWithoutHighlightNoHighlightColumn() throws IOException { protected JSONObject executeQueryWithHighlight(String query, String highlightJson) throws IOException { Request request = new Request("POST", QUERY_API_ENDPOINT); - request.setJsonEntity( - String.format( - Locale.ROOT, "{\n \"query\": \"%s\",\n \"highlight\": %s\n}", query, highlightJson)); + JSONObject body = new JSONObject(); + body.put("query", query); + // Parse highlightJson to proper JSON type (array or object) so it serializes correctly + Object highlightValue = + highlightJson.trim().startsWith("[") + ? new JSONArray(highlightJson) + : new JSONObject(highlightJson); + body.put("highlight", highlightValue); + request.setJsonEntity(body.toString()); RequestOptions.Builder restOptionsBuilder = RequestOptions.DEFAULT.toBuilder(); restOptionsBuilder.addHeader("Content-Type", "application/json"); request.setOptions(restOptionsBuilder); @@ -182,35 +199,10 @@ protected JSONObject executeQueryWithHighlight(String query, String highlightJso return jsonify(getResponseBody(response, true)); } - /** Assert that the response schema contains a field with the given name. */ - private void assertSchemaContains(JSONObject result, String fieldName) { - JSONArray schema = result.getJSONArray("schema"); - for (int i = 0; i < schema.length(); i++) { - if (schema.getJSONObject(i).getString("name").equals(fieldName)) { - return; - } - } - Assert.fail("Schema should contain field: " + fieldName); - } - - /** Assert that the response schema does NOT contain a field with the given name. */ - private void assertSchemaDoesNotContain(JSONObject result, String fieldName) { - JSONArray schema = result.getJSONArray("schema"); - for (int i = 0; i < schema.length(); i++) { - if (schema.getJSONObject(i).getString("name").equals(fieldName)) { - Assert.fail("Schema should NOT contain field: " + fieldName); - } - } - } - - /** Get the column index for a field name from the schema. */ - private int getFieldIndex(JSONObject result, String fieldName) { - JSONArray schema = result.getJSONArray("schema"); - for (int i = 0; i < schema.length(); i++) { - if (schema.getJSONObject(i).getString("name").equals(fieldName)) { - return i; - } - } - return -1; + /** Assert that the response contains a non-empty highlights array. */ + private void assertHighlightsExist(JSONObject result) { + assertTrue("Response should contain highlights array", result.has("highlights")); + JSONArray highlights = result.getJSONArray("highlights"); + assertTrue("Highlights array should not be empty", highlights.length() > 0); } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_osd_format.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_osd_format.yaml new file mode 100644 index 00000000000..3dce4195f5d --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_osd_format.yaml @@ -0,0 +1,8 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _highlight=[$17]) + LogicalHighlight(highlightConfig=[[*]]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[HIGHLIGHT->[*], PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname, _highlight], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]},"highlight":{"pre_tags":[""],"post_tags":[""],"fragment_size":2147483647,"fields":{"*":{}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_osd_format.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_osd_format.yaml new file mode 100644 index 00000000000..b459538a488 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_osd_format.yaml @@ -0,0 +1,10 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _highlight=[$17]) + LogicalHighlight(highlightConfig=[[*]]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..17=[{inputs}], proj#0..10=[{exprs}], _highlight=[$t17]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[HIGHLIGHT->[*]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","highlight":{"pre_tags":[""],"post_tags":[""],"fragment_size":2147483647,"fields":{"*":{}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) From f3939c6aa91aad37699302e1a60e043a01fe123f Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Tue, 17 Mar 2026 11:32:01 -0700 Subject: [PATCH 18/25] peng - align with lucene opensearch for request intake Signed-off-by: Jialiang Liang --- .../explain_highlight_single_term.yaml | 2 +- .../explain_highlight_single_term.yaml | 2 +- .../scan/AbstractCalciteIndexScan.java | 28 ++++++------------- 3 files changed, 10 insertions(+), 22 deletions(-) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_single_term.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_single_term.yaml index 6ba122c4317..437340e88d6 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_single_term.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_single_term.yaml @@ -5,4 +5,4 @@ calcite: LogicalHighlight(highlightConfig=[[Holmes]]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[HIGHLIGHT->[Holmes], PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname, _highlight], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]},"highlight":{"fragment_size":2147483647,"fields":{"*":{"highlight_query":{"query_string":{"query":"\"Holmes\"","default_field":"*","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[HIGHLIGHT->[Holmes], PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname, _highlight], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]},"highlight":{"fragment_size":2147483647,"fields":{"Holmes":{}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_single_term.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_single_term.yaml index 18fb38bf945..ec64a3d1c68 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_single_term.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_single_term.yaml @@ -7,4 +7,4 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableCalc(expr#0..17=[{inputs}], proj#0..10=[{exprs}], _highlight=[$t17]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[HIGHLIGHT->[Holmes]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","highlight":{"fragment_size":2147483647,"fields":{"*":{"highlight_query":{"query_string":{"query":"\"Holmes\"","default_field":"*","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[HIGHLIGHT->[Holmes]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","highlight":{"fragment_size":2147483647,"fields":{"Holmes":{}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java index 50fe3840339..c821093edc6 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java @@ -45,15 +45,18 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.checkerframework.checker.nullness.qual.Nullable; +import org.opensearch.search.fetch.subphase.highlight.HighlightBuilder; import org.opensearch.search.sort.ScoreSortBuilder; import org.opensearch.search.sort.ScriptSortBuilder.ScriptSortType; import org.opensearch.search.sort.SortBuilder; import org.opensearch.search.sort.SortBuilders; import org.opensearch.search.sort.SortOrder; +import org.opensearch.sql.ast.tree.HighlightConfig; import org.opensearch.sql.calcite.plan.AliasFieldsWrappable; import org.opensearch.sql.common.setting.Settings.Key; import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.opensearch.data.type.OpenSearchTextType; +import org.opensearch.sql.opensearch.request.OpenSearchRequestBuilder; import org.opensearch.sql.opensearch.request.PredicateAnalyzer; import org.opensearch.sql.opensearch.storage.OpenSearchIndex; import org.opensearch.sql.opensearch.storage.scan.context.AbstractAction; @@ -497,31 +500,16 @@ public boolean isProjectPushed() { } protected static void applyHighlightPushDown( - org.opensearch.sql.opensearch.request.OpenSearchRequestBuilder requestBuilder, - org.opensearch.sql.ast.tree.HighlightConfig highlightConfig) { + OpenSearchRequestBuilder requestBuilder, HighlightConfig highlightConfig) { if (highlightConfig == null || highlightConfig.fields() == null || highlightConfig.fields().isEmpty()) { return; } - java.util.List fields = highlightConfig.fields(); - org.opensearch.search.fetch.subphase.highlight.HighlightBuilder hb = - new org.opensearch.search.fetch.subphase.highlight.HighlightBuilder(); - if (fields.size() == 1 && "*".equals(fields.get(0))) { - // Wildcard: highlight search query matches in all fields - hb.field("*"); - } else { - // Highlight specific terms across all fields - String queryStr = - fields.stream() - .map(term -> "\"" + term + "\"") - .collect(java.util.stream.Collectors.joining(" OR ")); - org.opensearch.search.fetch.subphase.highlight.HighlightBuilder.Field field = - new org.opensearch.search.fetch.subphase.highlight.HighlightBuilder.Field("*") - .highlightQuery( - org.opensearch.index.query.QueryBuilders.queryStringQuery(queryStr) - .defaultField("*")); - hb.field(field); + List fields = highlightConfig.fields(); + HighlightBuilder hb = new HighlightBuilder(); + for (String fieldName : fields) { + hb.field(fieldName); } // Apply pre_tags / post_tags if provided From 9b992966869cfa4d9a676d56fa38691fbc232087 Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Tue, 17 Mar 2026 11:49:56 -0700 Subject: [PATCH 19/25] peng - set the default fragment size to 100 Signed-off-by: Jialiang Liang --- .../calcite/explain_highlight_single_term.yaml | 2 +- .../calcite/explain_highlight_wildcard.yaml | 2 +- .../calcite/explain_highlight_with_filter.yaml | 2 +- .../explain_highlight_single_term.yaml | 2 +- .../calcite_no_pushdown/explain_highlight_wildcard.yaml | 2 +- .../explain_highlight_with_filter.yaml | 2 +- .../storage/scan/AbstractCalciteIndexScan.java | 9 ++++----- 7 files changed, 10 insertions(+), 11 deletions(-) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_single_term.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_single_term.yaml index 437340e88d6..c4bfbc0a8bf 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_single_term.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_single_term.yaml @@ -5,4 +5,4 @@ calcite: LogicalHighlight(highlightConfig=[[Holmes]]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[HIGHLIGHT->[Holmes], PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname, _highlight], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]},"highlight":{"fragment_size":2147483647,"fields":{"Holmes":{}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[HIGHLIGHT->[Holmes], PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname, _highlight], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]},"highlight":{"fields":{"Holmes":{}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_wildcard.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_wildcard.yaml index e8a242d1456..a570f1246c8 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_wildcard.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_wildcard.yaml @@ -5,4 +5,4 @@ calcite: LogicalHighlight(highlightConfig=[[*]]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[HIGHLIGHT->[*], PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname, _highlight], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]},"highlight":{"fragment_size":2147483647,"fields":{"*":{}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[HIGHLIGHT->[*], PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname, _highlight], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]},"highlight":{"fields":{"*":{}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_with_filter.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_with_filter.yaml index 74ba1020f57..be897c9958f 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_with_filter.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_with_filter.yaml @@ -6,4 +6,4 @@ calcite: LogicalHighlight(highlightConfig=[[*]]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[HIGHLIGHT->[*], FILTER->>($8, 30), PROJECT->[firstname, age, _highlight], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"range":{"age":{"from":30,"to":null,"include_lower":false,"include_upper":true,"boost":1.0}}},"_source":{"includes":["firstname","age"],"excludes":[]},"highlight":{"fragment_size":2147483647,"fields":{"*":{}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[HIGHLIGHT->[*], FILTER->>($8, 30), PROJECT->[firstname, age, _highlight], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"range":{"age":{"from":30,"to":null,"include_lower":false,"include_upper":true,"boost":1.0}}},"_source":{"includes":["firstname","age"],"excludes":[]},"highlight":{"fields":{"*":{}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_single_term.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_single_term.yaml index ec64a3d1c68..f4270b4f69f 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_single_term.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_single_term.yaml @@ -7,4 +7,4 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableCalc(expr#0..17=[{inputs}], proj#0..10=[{exprs}], _highlight=[$t17]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[HIGHLIGHT->[Holmes]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","highlight":{"fragment_size":2147483647,"fields":{"Holmes":{}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[HIGHLIGHT->[Holmes]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","highlight":{"fields":{"Holmes":{}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_wildcard.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_wildcard.yaml index 85e64651ed0..130e542f796 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_wildcard.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_wildcard.yaml @@ -7,4 +7,4 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableCalc(expr#0..17=[{inputs}], proj#0..10=[{exprs}], _highlight=[$t17]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[HIGHLIGHT->[*]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","highlight":{"fragment_size":2147483647,"fields":{"*":{}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[HIGHLIGHT->[*]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","highlight":{"fields":{"*":{}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_with_filter.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_with_filter.yaml index 576067c384a..c56a7f590b6 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_with_filter.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_with_filter.yaml @@ -8,4 +8,4 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableCalc(expr#0..17=[{inputs}], expr#18=[30], expr#19=[>($t8, $t18)], firstname=[$t1], age=[$t8], _highlight=[$t17], $condition=[$t19]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[HIGHLIGHT->[*]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","highlight":{"fragment_size":2147483647,"fields":{"*":{}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[HIGHLIGHT->[*]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","highlight":{"fields":{"*":{}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java index c821093edc6..c52904a25d3 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java @@ -520,11 +520,10 @@ protected static void applyHighlightPushDown( hb.postTags(highlightConfig.postTags().toArray(new String[0])); } - // Apply fragment_size (default to Integer.MAX_VALUE when not specified) - hb.fragmentSize( - highlightConfig.fragmentSize() != null - ? highlightConfig.fragmentSize() - : Integer.MAX_VALUE); + // Apply fragment_size only when explicitly specified; otherwise let OpenSearch use its default + if (highlightConfig.fragmentSize() != null) { + hb.fragmentSize(highlightConfig.fragmentSize()); + } requestBuilder.getSourceBuilder().highlighter(hb); } From 87bb284a5b284792a9feb23e4ab17c0eaf61ae08 Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Tue, 17 Mar 2026 11:56:17 -0700 Subject: [PATCH 20/25] peng - update the doc to reflect the changes Signed-off-by: Jialiang Liang --- docs/user/ppl/interfaces/endpoint.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/user/ppl/interfaces/endpoint.md b/docs/user/ppl/interfaces/endpoint.md index 8f68aa6e1d1..a130cf53c54 100644 --- a/docs/user/ppl/interfaces/endpoint.md +++ b/docs/user/ppl/interfaces/endpoint.md @@ -204,7 +204,7 @@ Expected output (trimmed): ## Highlight -You can add a `highlight` parameter to the PPL request body to enable search-result highlighting. When enabled, the response includes a top-level `highlights` array containing matching fragments with the specified tags. Each entry in the `highlights` array corresponds to the row at the same index in `datarows`. +You can add a `highlight` parameter to the PPL request body to enable search-result highlighting. This parameter follows the same semantics as the [OpenSearch highlight API](https://docs.opensearch.org/latest/search-plugins/searching-data/highlight/). When enabled, the response includes a top-level `highlights` array containing matching fragments with the specified tags. Each entry in the `highlights` array corresponds to the row at the same index in `datarows`. Two formats are supported: @@ -278,11 +278,11 @@ Expected output (trimmed): | `fields` | Object | Yes | An object whose keys are field names or wildcards (e.g. `{"*": {}}`) to highlight. | | `pre_tags` | Array of string | No | Tags inserted before highlighted tokens. Defaults to ``. | | `post_tags` | Array of string | No | Tags inserted after highlighted tokens. Defaults to ``. | -| `fragment_size` | Integer | No | Maximum character size of a highlight fragment. Defaults to `2147483647` (returns the full field value). | +| `fragment_size` | Integer | No | Maximum character size of a highlight fragment. Defaults to `100`. | ### Notes - Highlighting requires a search query in the PPL statement (e.g. `source=accounts "Holmes"`). Without a query, the `highlights` array entries will be empty. - The `highlights` array in the response is parallel to `datarows` — each entry contains the highlighted fragments for the corresponding row. -- In the simple array format, `["*"]` highlights all fields. Specific terms like `["error", "login"]` highlight those terms across all fields. +- In the simple array format, `["*"]` highlights all fields. Specific field names like `["firstname", "lastname"]` scope highlighting to those fields only. - In the object format, only the keys of the `fields` object are used; per-field options inside the value objects are currently ignored. From 1b07536570a8a1708c4d6cff2d14f68a2d53bda8 Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Tue, 17 Mar 2026 18:13:03 -0700 Subject: [PATCH 21/25] Peng - drop the AST layer implementation and address the comments. Signed-off-by: Jialiang Liang --- .../org/opensearch/sql/analysis/Analyzer.java | 6 - .../sql/ast/AbstractNodeVisitor.java | 5 - .../org/opensearch/sql/ast/dsl/AstDSL.java | 10 -- .../opensearch/sql/ast/statement/Query.java | 2 + .../opensearch/sql/ast/tree/Highlight.java | 41 ------- .../sql/ast/tree/HighlightConfig.java | 38 +++++- .../sql/calcite/CalcitePlanContext.java | 1 + .../sql/calcite/CalciteRelNodeVisitor.java | 27 ++--- .../sql/calcite/plan/HighlightPushDown.java | 18 +++ .../calcite/plan/rel/LogicalHighlight.java | 77 ------------ .../opensearch/sql/executor/QueryService.java | 28 ++++- .../sql/executor/execution/QueryPlan.java | 20 +++- .../executor/execution/QueryPlanFactory.java | 7 +- .../sql/executor/execution/QueryPlanTest.java | 4 +- docs/user/ppl/interfaces/endpoint.md | 7 +- .../calcite/remote/CalciteHighlightIT.java | 56 +++++++++ .../calcite/explain_highlight_osd_format.yaml | 3 +- .../explain_highlight_single_term.yaml | 3 +- .../calcite/explain_highlight_wildcard.yaml | 3 +- .../explain_highlight_with_filter.yaml | 5 +- .../explain_highlight_osd_format.yaml | 3 +- .../explain_highlight_single_term.yaml | 3 +- .../explain_highlight_wildcard.yaml | 3 +- .../explain_highlight_with_filter.yaml | 3 +- .../planner/rules/HighlightIndexScanRule.java | 55 --------- .../planner/rules/OpenSearchIndexRules.java | 7 +- .../scan/AbstractCalciteIndexScan.java | 44 ++++++- .../storage/scan/CalciteLogicalIndexScan.java | 5 +- .../scan/OpenSearchIndexEnumerator.java | 7 +- .../sql/ppl/domain/PPLQueryRequest.java | 111 +++++++++++++++--- .../sql/ppl/parser/AstStatementBuilder.java | 7 +- .../sql/ppl/utils/PPLQueryDataAnonymizer.java | 8 -- .../opensearch/sql/ppl/PPLServiceTest.java | 16 +-- .../sql/ppl/domain/PPLQueryRequestTest.java | 28 ++++- .../ppl/parser/AstStatementBuilderTest.java | 34 ++---- 35 files changed, 373 insertions(+), 322 deletions(-) delete mode 100644 core/src/main/java/org/opensearch/sql/ast/tree/Highlight.java create mode 100644 core/src/main/java/org/opensearch/sql/calcite/plan/HighlightPushDown.java delete mode 100644 core/src/main/java/org/opensearch/sql/calcite/plan/rel/LogicalHighlight.java delete mode 100644 opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/HighlightIndexScanRule.java diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index 6303a411d53..fc96f2f389c 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -76,7 +76,6 @@ import org.opensearch.sql.ast.tree.Flatten; import org.opensearch.sql.ast.tree.GraphLookup; import org.opensearch.sql.ast.tree.Head; -import org.opensearch.sql.ast.tree.Highlight; import org.opensearch.sql.ast.tree.Join; import org.opensearch.sql.ast.tree.Kmeans; import org.opensearch.sql.ast.tree.Limit; @@ -571,11 +570,6 @@ public LogicalPlan visitGraphLookup(GraphLookup node, AnalysisContext context) { throw getOnlyForCalciteException("graphlookup"); } - @Override - public LogicalPlan visitHighlight(Highlight node, AnalysisContext context) { - throw getOnlyForCalciteException("highlight"); - } - /** Build {@link ParseExpression} to context and skip to child nodes. */ @Override public LogicalPlan visitParse(Parse node, AnalysisContext context) { diff --git a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java index e7151fb5853..7f02bb3ef1b 100644 --- a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java @@ -64,7 +64,6 @@ import org.opensearch.sql.ast.tree.Flatten; import org.opensearch.sql.ast.tree.GraphLookup; import org.opensearch.sql.ast.tree.Head; -import org.opensearch.sql.ast.tree.Highlight; import org.opensearch.sql.ast.tree.Join; import org.opensearch.sql.ast.tree.Kmeans; import org.opensearch.sql.ast.tree.Limit; @@ -496,8 +495,4 @@ public T visitMvExpand(MvExpand node, C context) { public T visitGraphLookup(GraphLookup node, C context) { return visitChildren(node, context); } - - public T visitHighlight(Highlight node, C context) { - return visitChildren(node, context); - } } diff --git a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java index 42b13d3eabd..b2731ebbd40 100644 --- a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java +++ b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java @@ -60,8 +60,6 @@ import org.opensearch.sql.ast.tree.FillNull; import org.opensearch.sql.ast.tree.Filter; import org.opensearch.sql.ast.tree.Head; -import org.opensearch.sql.ast.tree.Highlight; -import org.opensearch.sql.ast.tree.HighlightConfig; import org.opensearch.sql.ast.tree.Limit; import org.opensearch.sql.ast.tree.MinSpanBin; import org.opensearch.sql.ast.tree.MvCombine; @@ -586,14 +584,6 @@ public static Trendline trendline( return new Trendline(sortField, Arrays.asList(computations)).attach(input); } - public static Highlight highlight(UnresolvedPlan input, HighlightConfig highlightConfig) { - return new Highlight(highlightConfig).attach(input); - } - - public static Highlight highlight(UnresolvedPlan input, List highlightFields) { - return new Highlight(new HighlightConfig(highlightFields)).attach(input); - } - public static AppendPipe appendPipe(UnresolvedPlan input, UnresolvedPlan subquery) { return new AppendPipe(subquery).attach(input); diff --git a/core/src/main/java/org/opensearch/sql/ast/statement/Query.java b/core/src/main/java/org/opensearch/sql/ast/statement/Query.java index 6681d6d1d2c..c6a78724b76 100644 --- a/core/src/main/java/org/opensearch/sql/ast/statement/Query.java +++ b/core/src/main/java/org/opensearch/sql/ast/statement/Query.java @@ -11,6 +11,7 @@ import lombok.Setter; import lombok.ToString; import org.opensearch.sql.ast.AbstractNodeVisitor; +import org.opensearch.sql.ast.tree.HighlightConfig; import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.executor.QueryType; @@ -25,6 +26,7 @@ public class Query extends Statement { protected final UnresolvedPlan plan; protected final int fetchSize; private final QueryType queryType; + private HighlightConfig highlightConfig; @Override public R accept(AbstractNodeVisitor visitor, C context) { diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Highlight.java b/core/src/main/java/org/opensearch/sql/ast/tree/Highlight.java deleted file mode 100644 index df710b7a2e8..00000000000 --- a/core/src/main/java/org/opensearch/sql/ast/tree/Highlight.java +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.ast.tree; - -import com.google.common.collect.ImmutableList; -import java.util.List; -import lombok.EqualsAndHashCode; -import lombok.Getter; -import lombok.RequiredArgsConstructor; -import lombok.ToString; -import org.opensearch.sql.ast.AbstractNodeVisitor; -import org.opensearch.sql.ast.Node; - -@ToString -@Getter -@RequiredArgsConstructor -@EqualsAndHashCode(callSuper = false) -public class Highlight extends UnresolvedPlan { - - private UnresolvedPlan child; - private final HighlightConfig highlightConfig; - - @Override - public Highlight attach(UnresolvedPlan child) { - this.child = child; - return this; - } - - @Override - public List getChild() { - return this.child == null ? ImmutableList.of() : ImmutableList.of(this.child); - } - - @Override - public T accept(AbstractNodeVisitor visitor, C context) { - return visitor.visitHighlight(this, context); - } -} diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/HighlightConfig.java b/core/src/main/java/org/opensearch/sql/ast/tree/HighlightConfig.java index 13880b56002..cd520362646 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/HighlightConfig.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/HighlightConfig.java @@ -5,18 +5,44 @@ package org.opensearch.sql.ast.tree; +import java.util.LinkedHashMap; import java.util.List; +import java.util.Map; /** - * Bundles highlight configuration: field names (or wildcards) and optional pre/post tags and - * fragment size. Supports both the simple array format ({@code ["*"]}) and the rich OSD object - * format with {@code pre_tags}, {@code post_tags}, {@code fields}, and {@code fragment_size}. + * Bundles highlight configuration: field names (or wildcards) with per-field options, and optional + * global pre/post tags and fragment size. Supports both the simple array format ({@code ["*"]}) and + * the rich OSD object format with {@code pre_tags}, {@code post_tags}, {@code fields}, and {@code + * fragment_size}. + * + *

The {@code fields} map keys are field names or wildcards; the values are per-field option maps + * that are passed through to the OpenSearch highlight builder (e.g. {@code fragment_size}, {@code + * number_of_fragments}, {@code type}). */ public record HighlightConfig( - List fields, List preTags, List postTags, Integer fragmentSize) { + Map> fields, + List preTags, + List postTags, + Integer fragmentSize) { /** Convenience constructor for the simple array format (fields only, no tag/size overrides). */ - public HighlightConfig(List fields) { - this(fields, null, null, null); + public HighlightConfig(List fieldNames) { + this(toFieldMap(fieldNames), null, null, null); + } + + /** Returns the field names as a list (for display and iteration). */ + public List fieldNames() { + return fields == null ? List.of() : List.copyOf(fields.keySet()); + } + + private static Map> toFieldMap(List fieldNames) { + if (fieldNames == null) { + return null; + } + Map> map = new LinkedHashMap<>(); + for (String name : fieldNames) { + map.put(name, Map.of()); + } + return map; } } diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java b/core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java index b98b2ff03a5..5f58dea227e 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java @@ -98,6 +98,7 @@ private CalcitePlanContext(CalcitePlanContext parent) { this.relBuilder = parent.relBuilder; // Share the same relBuilder this.rexBuilder = parent.rexBuilder; // Share the same rexBuilder this.functionProperties = parent.functionProperties; + this.highlightConfig = parent.highlightConfig; this.rexLambdaRefMap = new HashMap<>(); // New map for lambda variables this.capturedVariables = new ArrayList<>(); // New list for captured variables this.inLambdaContext = true; // Mark that we're inside a lambda diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index d728c3d5e38..489c933953f 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -123,7 +123,6 @@ import org.opensearch.sql.ast.tree.GraphLookup; import org.opensearch.sql.ast.tree.GraphLookup.Direction; import org.opensearch.sql.ast.tree.Head; -import org.opensearch.sql.ast.tree.Highlight; import org.opensearch.sql.ast.tree.Join; import org.opensearch.sql.ast.tree.Kmeans; import org.opensearch.sql.ast.tree.Lookup; @@ -157,9 +156,9 @@ import org.opensearch.sql.ast.tree.Values; import org.opensearch.sql.ast.tree.Window; import org.opensearch.sql.calcite.plan.AliasFieldsWrappable; +import org.opensearch.sql.calcite.plan.HighlightPushDown; import org.opensearch.sql.calcite.plan.OpenSearchConstants; import org.opensearch.sql.calcite.plan.rel.LogicalGraphLookup; -import org.opensearch.sql.calcite.plan.rel.LogicalHighlight; import org.opensearch.sql.calcite.plan.rel.LogicalSystemLimit; import org.opensearch.sql.calcite.plan.rel.LogicalSystemLimit.SystemLimitType; import org.opensearch.sql.calcite.utils.BinUtils; @@ -229,16 +228,17 @@ public RelNode visitRelation(Relation node, CalcitePlanContext context) { context.relBuilder.scan(node.getTableQualifiedName().getParts()); RelNode scan = context.relBuilder.peek(); - if (scan instanceof AliasFieldsWrappable) { - ((AliasFieldsWrappable) scan).wrapProjectForAliasFields(context.relBuilder); + // Eagerly push down highlight config to the scan (highlight is a scan hint, not an operator) + if (context.getHighlightConfig() != null && scan instanceof HighlightPushDown) { + RelNode newScan = ((HighlightPushDown) scan).pushDownHighlight(context.getHighlightConfig()); + context.relBuilder.build(); // pop old scan + context.relBuilder.push(newScan); + scan = newScan; + context.setHighlightConfig(null); // consumed } - // Wrap with LogicalHighlight if highlight config is set on context (from API request) - if (context.getHighlightConfig() != null) { - RelNode input = context.relBuilder.build(); - LogicalHighlight highlight = LogicalHighlight.create(input, context.getHighlightConfig()); - context.relBuilder.push(highlight); - context.setHighlightConfig(null); // Clear for join safety + if (scan instanceof AliasFieldsWrappable) { + ((AliasFieldsWrappable) scan).wrapProjectForAliasFields(context.relBuilder); } return context.relBuilder.peek(); @@ -3199,13 +3199,6 @@ static ChartConfig fromArguments(ArgumentMap argMap) { } } - @Override - public RelNode visitHighlight(Highlight node, CalcitePlanContext context) { - context.setHighlightConfig(node.getHighlightConfig()); - visitChildren(node, context); - return context.relBuilder.peek(); - } - @Override public RelNode visitTrendline(Trendline node, CalcitePlanContext context) { visitChildren(node, context); diff --git a/core/src/main/java/org/opensearch/sql/calcite/plan/HighlightPushDown.java b/core/src/main/java/org/opensearch/sql/calcite/plan/HighlightPushDown.java new file mode 100644 index 00000000000..29adf1bb960 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/calcite/plan/HighlightPushDown.java @@ -0,0 +1,18 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.plan; + +import org.apache.calcite.rel.RelNode; +import org.opensearch.sql.ast.tree.HighlightConfig; + +/** + * Interface for scan nodes that support highlight pushdown. Highlight is a scan hint (not a + * relational operator), so it is pushed down eagerly during plan construction rather than via an + * optimizer rule. + */ +public interface HighlightPushDown { + RelNode pushDownHighlight(HighlightConfig highlightConfig); +} diff --git a/core/src/main/java/org/opensearch/sql/calcite/plan/rel/LogicalHighlight.java b/core/src/main/java/org/opensearch/sql/calcite/plan/rel/LogicalHighlight.java deleted file mode 100644 index 6f3946729a3..00000000000 --- a/core/src/main/java/org/opensearch/sql/calcite/plan/rel/LogicalHighlight.java +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.calcite.plan.rel; - -import java.util.List; -import lombok.Getter; -import org.apache.calcite.plan.Convention; -import org.apache.calcite.plan.RelOptCluster; -import org.apache.calcite.plan.RelTraitSet; -import org.apache.calcite.rel.RelNode; -import org.apache.calcite.rel.RelWriter; -import org.apache.calcite.rel.SingleRel; -import org.apache.calcite.rel.type.RelDataType; -import org.apache.calcite.rel.type.RelDataTypeFactory; -import org.apache.calcite.sql.type.SqlTypeName; -import org.opensearch.sql.ast.tree.HighlightConfig; -import org.opensearch.sql.expression.HighlightExpression; - -/** - * Logical relational node representing a PPL {@code | highlight} command. Stores the highlight - * configuration (fields, pre/post tags, fragment size) and adds a {@code _highlight} column to the - * output row type. An optimizer rule ({@code HighlightIndexScanRule}) pushes this node down into - * the OpenSearch index scan. - */ -public class LogicalHighlight extends SingleRel { - - @Getter private final HighlightConfig highlightConfig; - private final RelDataType highlightRowType; - - protected LogicalHighlight( - RelOptCluster cluster, - RelTraitSet traitSet, - RelNode input, - HighlightConfig highlightConfig, - RelDataType highlightRowType) { - super(cluster, traitSet, input); - this.highlightConfig = highlightConfig; - this.highlightRowType = highlightRowType; - } - - public static LogicalHighlight create(RelNode input, HighlightConfig highlightConfig) { - final RelOptCluster cluster = input.getCluster(); - RelTraitSet traitSet = cluster.traitSetOf(Convention.NONE); - - // Add _highlight column to the output row type so that downstream operators - // (e.g. visitProject) can reference it before the optimizer rule fires. - RelDataTypeFactory typeFactory = cluster.getTypeFactory(); - RelDataTypeFactory.Builder schemaBuilder = typeFactory.builder(); - schemaBuilder.addAll(input.getRowType().getFieldList()); - if (!input.getRowType().getFieldNames().contains(HighlightExpression.HIGHLIGHT_FIELD)) { - schemaBuilder.add( - HighlightExpression.HIGHLIGHT_FIELD, typeFactory.createSqlType(SqlTypeName.ANY)); - } - - return new LogicalHighlight(cluster, traitSet, input, highlightConfig, schemaBuilder.build()); - } - - @Override - protected RelDataType deriveRowType() { - return highlightRowType; - } - - @Override - public RelNode copy(RelTraitSet traitSet, List inputs) { - assert traitSet.containsIfApplicable(Convention.NONE); - return new LogicalHighlight( - getCluster(), traitSet, sole(inputs), highlightConfig, highlightRowType); - } - - @Override - public RelWriter explainTerms(RelWriter pw) { - return super.explainTerms(pw).item("highlightConfig", highlightConfig.fields()); - } -} diff --git a/core/src/main/java/org/opensearch/sql/executor/QueryService.java b/core/src/main/java/org/opensearch/sql/executor/QueryService.java index bebd50a5e87..320325c8438 100644 --- a/core/src/main/java/org/opensearch/sql/executor/QueryService.java +++ b/core/src/main/java/org/opensearch/sql/executor/QueryService.java @@ -27,6 +27,7 @@ import org.opensearch.sql.analysis.AnalysisContext; import org.opensearch.sql.analysis.Analyzer; import org.opensearch.sql.ast.statement.ExplainMode; +import org.opensearch.sql.ast.tree.HighlightConfig; import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.calcite.CalcitePlanContext; import org.opensearch.sql.calcite.CalciteRelNodeVisitor; @@ -87,8 +88,17 @@ public void execute( UnresolvedPlan plan, QueryType queryType, ResponseListener listener) { + execute(plan, queryType, null, listener); + } + + /** Execute with optional highlight config. */ + public void execute( + UnresolvedPlan plan, + QueryType queryType, + HighlightConfig highlightConfig, + ResponseListener listener) { if (shouldUseCalcite(queryType)) { - executeWithCalcite(plan, queryType, listener); + executeWithCalcite(plan, queryType, highlightConfig, listener); } else { executeWithLegacy(plan, queryType, listener, Optional.empty()); } @@ -100,8 +110,18 @@ public void explain( QueryType queryType, ResponseListener listener, ExplainMode mode) { + explain(plan, queryType, null, listener, mode); + } + + /** Explain with optional highlight config. */ + public void explain( + UnresolvedPlan plan, + QueryType queryType, + HighlightConfig highlightConfig, + ResponseListener listener, + ExplainMode mode) { if (shouldUseCalcite(queryType)) { - explainWithCalcite(plan, queryType, listener, mode); + explainWithCalcite(plan, queryType, highlightConfig, listener, mode); } else { explainWithLegacy(plan, queryType, listener, mode, Optional.empty()); } @@ -110,6 +130,7 @@ public void explain( public void executeWithCalcite( UnresolvedPlan plan, QueryType queryType, + HighlightConfig highlightConfig, ResponseListener listener) { CalcitePlanContext.run( () -> { @@ -121,6 +142,7 @@ public void executeWithCalcite( CalcitePlanContext context = CalcitePlanContext.create( buildFrameworkConfig(), SysLimit.fromSettings(settings), queryType); + context.setHighlightConfig(highlightConfig); RelNode relNode = analyze(plan, context); RelNode calcitePlan = convertToCalcitePlan(relNode, context); analyzeMetric.set(System.nanoTime() - analyzeStart); @@ -140,6 +162,7 @@ public void executeWithCalcite( public void explainWithCalcite( UnresolvedPlan plan, QueryType queryType, + HighlightConfig highlightConfig, ResponseListener listener, ExplainMode mode) { CalcitePlanContext.run( @@ -149,6 +172,7 @@ public void explainWithCalcite( CalcitePlanContext context = CalcitePlanContext.create( buildFrameworkConfig(), SysLimit.fromSettings(settings), queryType); + context.setHighlightConfig(highlightConfig); context.run( () -> { RelNode relNode = analyze(plan, context); diff --git a/core/src/main/java/org/opensearch/sql/executor/execution/QueryPlan.java b/core/src/main/java/org/opensearch/sql/executor/execution/QueryPlan.java index 5896871f5d0..3f6407e8873 100644 --- a/core/src/main/java/org/opensearch/sql/executor/execution/QueryPlan.java +++ b/core/src/main/java/org/opensearch/sql/executor/execution/QueryPlan.java @@ -8,6 +8,7 @@ import java.util.Optional; import org.apache.commons.lang3.NotImplementedException; import org.opensearch.sql.ast.statement.ExplainMode; +import org.opensearch.sql.ast.tree.HighlightConfig; import org.opensearch.sql.ast.tree.Paginate; import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.common.response.ResponseListener; @@ -29,6 +30,8 @@ public class QueryPlan extends AbstractPlan { protected final Optional pageSize; + protected final HighlightConfig highlightConfig; + /** Constructor. */ public QueryPlan( QueryId queryId, @@ -36,11 +39,23 @@ public QueryPlan( UnresolvedPlan plan, QueryService queryService, ResponseListener listener) { + this(queryId, queryType, plan, queryService, listener, null); + } + + /** Constructor with highlight config. */ + public QueryPlan( + QueryId queryId, + QueryType queryType, + UnresolvedPlan plan, + QueryService queryService, + ResponseListener listener, + HighlightConfig highlightConfig) { super(queryId, queryType); this.plan = plan; this.queryService = queryService; this.listener = listener; this.pageSize = Optional.empty(); + this.highlightConfig = highlightConfig; } /** Constructor with page size. */ @@ -56,6 +71,7 @@ public QueryPlan( this.queryService = queryService; this.listener = listener; this.pageSize = Optional.of(pageSize); + this.highlightConfig = null; } @Override @@ -63,7 +79,7 @@ public void execute() { if (pageSize.isPresent()) { queryService.execute(new Paginate(pageSize.get(), plan), getQueryType(), listener); } else { - queryService.execute(plan, getQueryType(), listener); + queryService.execute(plan, getQueryType(), highlightConfig, listener); } } @@ -75,7 +91,7 @@ public void explain( new NotImplementedException( "`explain` feature for paginated requests is not implemented yet.")); } else { - queryService.explain(plan, getQueryType(), listener, mode); + queryService.explain(plan, getQueryType(), highlightConfig, listener, mode); } } } diff --git a/core/src/main/java/org/opensearch/sql/executor/execution/QueryPlanFactory.java b/core/src/main/java/org/opensearch/sql/executor/execution/QueryPlanFactory.java index 7335cf574b2..48e2b3ce5e0 100644 --- a/core/src/main/java/org/opensearch/sql/executor/execution/QueryPlanFactory.java +++ b/core/src/main/java/org/opensearch/sql/executor/execution/QueryPlanFactory.java @@ -122,7 +122,12 @@ public AbstractPlan visitQuery( } } else { return new QueryPlan( - QueryId.queryId(), node.getQueryType(), node.getPlan(), queryService, context.getLeft()); + QueryId.queryId(), + node.getQueryType(), + node.getPlan(), + queryService, + context.getLeft(), + node.getHighlightConfig()); } } diff --git a/core/src/test/java/org/opensearch/sql/executor/execution/QueryPlanTest.java b/core/src/test/java/org/opensearch/sql/executor/execution/QueryPlanTest.java index 77deb9b6a48..128df14ff8e 100644 --- a/core/src/test/java/org/opensearch/sql/executor/execution/QueryPlanTest.java +++ b/core/src/test/java/org/opensearch/sql/executor/execution/QueryPlanTest.java @@ -52,7 +52,7 @@ public void execute_no_page_size() { QueryPlan query = new QueryPlan(queryId, queryType, plan, queryService, queryListener); query.execute(); - verify(queryService, times(1)).execute(any(), any(), any()); + verify(queryService, times(1)).execute(any(), any(), any(), any()); } @Test @@ -60,7 +60,7 @@ public void explain_no_page_size() { QueryPlan query = new QueryPlan(queryId, queryType, plan, queryService, queryListener); query.explain(explainListener, mode); - verify(queryService, times(1)).explain(plan, queryType, explainListener, mode); + verify(queryService, times(1)).explain(plan, queryType, null, explainListener, mode); } @Test diff --git a/docs/user/ppl/interfaces/endpoint.md b/docs/user/ppl/interfaces/endpoint.md index a130cf53c54..00790b88656 100644 --- a/docs/user/ppl/interfaces/endpoint.md +++ b/docs/user/ppl/interfaces/endpoint.md @@ -275,14 +275,15 @@ Expected output (trimmed): | Parameter | Type | Required | Description | |-----------------|-----------------|----------|--------------------------------------------------------------------------------------------------------------| -| `fields` | Object | Yes | An object whose keys are field names or wildcards (e.g. `{"*": {}}`) to highlight. | +| `fields` | Object | Yes | An object whose keys are field names or wildcards to highlight. Each value is an object of per-field options (see Notes). Use `{}` for defaults. Example: `{"*": {}}` or `{"title": {"fragment_size": 200}}`. | | `pre_tags` | Array of string | No | Tags inserted before highlighted tokens. Defaults to ``. | | `post_tags` | Array of string | No | Tags inserted after highlighted tokens. Defaults to ``. | | `fragment_size` | Integer | No | Maximum character size of a highlight fragment. Defaults to `100`. | ### Notes -- Highlighting requires a search query in the PPL statement (e.g. `source=accounts "Holmes"`). Without a query, the `highlights` array entries will be empty. +- Highlighting requires a search term in the PPL statement (e.g. `source=accounts "Holmes"`). Without a search term (e.g. just `source=accounts`), the `highlights` array entries will be empty. - The `highlights` array in the response is parallel to `datarows` — each entry contains the highlighted fragments for the corresponding row. - In the simple array format, `["*"]` highlights all fields. Specific field names like `["firstname", "lastname"]` scope highlighting to those fields only. -- In the object format, only the keys of the `fields` object are used; per-field options inside the value objects are currently ignored. +- In the object format, each key in the `fields` object is a field name or wildcard. Each value is an object of per-field highlight options. Supported per-field options: `fragment_size`, `number_of_fragments`, `type` (`plain`, `unified`, `fvh`), `pre_tags`, `post_tags`, `require_field_match`, `no_match_size`, `order`. Use `{}` for defaults. Example: `{"title": {"fragment_size": 200}, "body": {"type": "plain"}}`. +- Highlights may include fields that are not explicitly projected in the `schema`/`datarows`. For example, using `{"*": {}}` highlights all fields that matched the search query, including fields not selected by `| fields`. In the example above, the `address` field appears in `highlights` because it contains a match ("880 Holmes Lane") even though only `account_number`, `firstname`, and `lastname` are projected. diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteHighlightIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteHighlightIT.java index edd88fede38..320aea3940f 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteHighlightIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteHighlightIT.java @@ -157,6 +157,62 @@ public void testHighlightWithEval() throws IOException { assertHighlightsExist(result); } + @Test + public void testHighlightWildcardInSearchText() throws IOException { + JSONObject result = + executeQueryWithHighlight("source=" + TEST_INDEX_ACCOUNT + " \"Holm*\"", "[\"*\"]"); + assertTrue("Response should contain datarows", result.has("datarows")); + assertTrue("Response should contain highlights array", result.has("highlights")); + } + + @Test + public void testHighlightMixedFullTextAndStructuredFilter() throws IOException { + JSONObject result = + executeQueryWithHighlight( + "source=" + TEST_INDEX_ACCOUNT + " \"Holmes\" | where age > 30 | fields firstname, age", + "[\"*\"]"); + assertTrue("Response should contain datarows", result.has("datarows")); + assertTrue("Response should contain highlights array", result.has("highlights")); + } + + @Test + public void testHighlightPerFieldOptions() throws IOException { + String highlightJson = + "{\"fields\": {\"firstname\": {\"fragment_size\": 200}," + + " \"lastname\": {\"number_of_fragments\": 3}}}"; + JSONObject result = + executeQueryWithHighlight("source=" + TEST_INDEX_ACCOUNT + " \"Holmes\"", highlightJson); + assertHighlightsExist(result); + } + + @Test + public void testHighlightSpecificFields() throws IOException { + String highlightJson = "[\"firstname\", \"lastname\"]"; + JSONObject result = + executeQueryWithHighlight("source=" + TEST_INDEX_ACCOUNT + " \"Holmes\"", highlightJson); + assertHighlightsExist(result); + } + + @Test + public void testHighlightWithHead() throws IOException { + JSONObject result = + executeQueryWithHighlight( + "source=" + TEST_INDEX_ACCOUNT + " \"Holmes\" | head 2", "[\"*\"]"); + assertTrue("Response should contain datarows", result.has("datarows")); + JSONArray dataRows = result.getJSONArray("datarows"); + assertTrue("Should return at most 2 rows", dataRows.length() <= 2); + assertTrue("Response should contain highlights array", result.has("highlights")); + } + + @Test + public void testHighlightWithStats() throws IOException { + // Stats aggregation with highlight - verify request succeeds + JSONObject result = + executeQueryWithHighlight( + "source=" + TEST_INDEX_ACCOUNT + " \"Holmes\" | stats count()", "[\"*\"]"); + assertTrue("Response should contain datarows", result.has("datarows")); + } + @Test public void testHighlightNoSearchQuery() throws IOException { // Without a search query, request should still succeed (highlights may or may not be present) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_osd_format.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_osd_format.yaml index 3dce4195f5d..48d5bb74b55 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_osd_format.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_osd_format.yaml @@ -2,7 +2,6 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _highlight=[$17]) - LogicalHighlight(highlightConfig=[[*]]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[HIGHLIGHT->[*]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","highlight":{"pre_tags":[""],"post_tags":[""],"fragment_size":2147483647,"fields":{"*":{}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) physical: | CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[HIGHLIGHT->[*], PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname, _highlight], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]},"highlight":{"pre_tags":[""],"post_tags":[""],"fragment_size":2147483647,"fields":{"*":{}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_single_term.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_single_term.yaml index c4bfbc0a8bf..151c1fb7986 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_single_term.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_single_term.yaml @@ -2,7 +2,6 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _highlight=[$17]) - LogicalHighlight(highlightConfig=[[Holmes]]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[HIGHLIGHT->[Holmes]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","highlight":{"fields":{"Holmes":{}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) physical: | CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[HIGHLIGHT->[Holmes], PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname, _highlight], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]},"highlight":{"fields":{"Holmes":{}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_wildcard.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_wildcard.yaml index a570f1246c8..5d0351ae136 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_wildcard.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_wildcard.yaml @@ -2,7 +2,6 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _highlight=[$17]) - LogicalHighlight(highlightConfig=[[*]]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[HIGHLIGHT->[*]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","highlight":{"fields":{"*":{}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) physical: | CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[HIGHLIGHT->[*], PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname, _highlight], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]},"highlight":{"fields":{"*":{}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_with_filter.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_with_filter.yaml index be897c9958f..fbfce05e1fc 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_with_filter.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_with_filter.yaml @@ -3,7 +3,6 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(firstname=[$1], age=[$8], _highlight=[$17]) LogicalFilter(condition=[>($8, 30)]) - LogicalHighlight(highlightConfig=[[*]]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[HIGHLIGHT->[*]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","highlight":{"fields":{"*":{}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[HIGHLIGHT->[*], FILTER->>($8, 30), PROJECT->[firstname, age, _highlight], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"range":{"age":{"from":30,"to":null,"include_lower":false,"include_upper":true,"boost":1.0}}},"_source":{"includes":["firstname","age"],"excludes":[]},"highlight":{"fields":{"*":{}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[HIGHLIGHT->[*], PROJECT->[firstname, age, _highlight], FILTER->>($1, 30), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"range":{"age":{"from":30,"to":null,"include_lower":false,"include_upper":true,"boost":1.0}}},"_source":{"includes":["firstname","age"],"excludes":[]},"highlight":{"fields":{"*":{}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_osd_format.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_osd_format.yaml index b459538a488..8f71eb74933 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_osd_format.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_osd_format.yaml @@ -2,8 +2,7 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _highlight=[$17]) - LogicalHighlight(highlightConfig=[[*]]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[HIGHLIGHT->[*]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","highlight":{"pre_tags":[""],"post_tags":[""],"fragment_size":2147483647,"fields":{"*":{}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) physical: | EnumerableLimit(fetch=[10000]) EnumerableCalc(expr#0..17=[{inputs}], proj#0..10=[{exprs}], _highlight=[$t17]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_single_term.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_single_term.yaml index f4270b4f69f..a49fd0816bb 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_single_term.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_single_term.yaml @@ -2,8 +2,7 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _highlight=[$17]) - LogicalHighlight(highlightConfig=[[Holmes]]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[HIGHLIGHT->[Holmes]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","highlight":{"fields":{"Holmes":{}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) physical: | EnumerableLimit(fetch=[10000]) EnumerableCalc(expr#0..17=[{inputs}], proj#0..10=[{exprs}], _highlight=[$t17]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_wildcard.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_wildcard.yaml index 130e542f796..9fc05300144 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_wildcard.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_wildcard.yaml @@ -2,8 +2,7 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _highlight=[$17]) - LogicalHighlight(highlightConfig=[[*]]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[HIGHLIGHT->[*]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","highlight":{"fields":{"*":{}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) physical: | EnumerableLimit(fetch=[10000]) EnumerableCalc(expr#0..17=[{inputs}], proj#0..10=[{exprs}], _highlight=[$t17]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_with_filter.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_with_filter.yaml index c56a7f590b6..27283c4ef99 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_with_filter.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_highlight_with_filter.yaml @@ -3,8 +3,7 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(firstname=[$1], age=[$8], _highlight=[$17]) LogicalFilter(condition=[>($8, 30)]) - LogicalHighlight(highlightConfig=[[*]]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[HIGHLIGHT->[*]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","highlight":{"fields":{"*":{}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) physical: | EnumerableLimit(fetch=[10000]) EnumerableCalc(expr#0..17=[{inputs}], expr#18=[30], expr#19=[>($t8, $t18)], firstname=[$t1], age=[$t8], _highlight=[$t17], $condition=[$t19]) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/HighlightIndexScanRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/HighlightIndexScanRule.java deleted file mode 100644 index 3f24f682820..00000000000 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/HighlightIndexScanRule.java +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.opensearch.planner.rules; - -import org.apache.calcite.plan.RelOptRuleCall; -import org.apache.calcite.rel.RelNode; -import org.immutables.value.Value; -import org.opensearch.sql.calcite.plan.rel.LogicalHighlight; -import org.opensearch.sql.calcite.plan.rule.OpenSearchRuleConfig; -import org.opensearch.sql.calcite.utils.PlanUtils; -import org.opensearch.sql.opensearch.storage.scan.CalciteLogicalIndexScan; - -/** - * Planner rule that pushes a {@link LogicalHighlight} down to {@link CalciteLogicalIndexScan}. This - * adds the {@code _highlight} column to the scan schema and configures the OpenSearch {@code - * HighlightBuilder} for the search request. - */ -@Value.Enclosing -public class HighlightIndexScanRule extends InterruptibleRelRule { - - protected HighlightIndexScanRule(Config config) { - super(config); - } - - @Override - protected void onMatchImpl(RelOptRuleCall call) { - final LogicalHighlight highlight = call.rel(0); - final CalciteLogicalIndexScan scan = call.rel(1); - RelNode newScan = scan.pushDownHighlight(highlight.getHighlightConfig()); - if (newScan != null) { - call.transformTo(newScan); - PlanUtils.tryPruneRelNodes(call); - } - } - - /** Rule configuration. */ - @Value.Immutable - public interface Config extends OpenSearchRuleConfig { - Config DEFAULT = - ImmutableHighlightIndexScanRule.Config.builder() - .build() - .withOperandSupplier( - b0 -> - b0.operand(LogicalHighlight.class) - .oneInput(b1 -> b1.operand(CalciteLogicalIndexScan.class).noInputs())); - - @Override - default HighlightIndexScanRule toRule() { - return new HighlightIndexScanRule(this); - } - } -} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/OpenSearchIndexRules.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/OpenSearchIndexRules.java index 7c09c22bdf1..0068f445ce7 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/OpenSearchIndexRules.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/OpenSearchIndexRules.java @@ -21,10 +21,6 @@ public class OpenSearchIndexRules { // Rule that always pushes down relevance functions regardless of pushdown settings private static final RelevanceFunctionPushdownRule RELEVANCE_FUNCTION_RULE = RelevanceFunctionPushdownRule.Config.DEFAULT.toRule(); - // Rule that always pushes down highlight regardless of pushdown settings, - // consistent with V2 engine where PUSH_DOWN_HIGHLIGHT is always in the default rules - private static final HighlightIndexScanRule HIGHLIGHT_INDEX_SCAN = - HighlightIndexScanRule.Config.DEFAULT.toRule(); /** The rules will apply whatever the pushdown setting is. */ public static final List OPEN_SEARCH_NON_PUSHDOWN_RULES = @@ -33,8 +29,7 @@ public class OpenSearchIndexRules { SYSTEM_INDEX_SCAN_RULE, NESTED_AGGREGATE_RULE, GRAPH_LOOKUP_RULE, - RELEVANCE_FUNCTION_RULE, - HIGHLIGHT_INDEX_SCAN); + RELEVANCE_FUNCTION_RULE); private static final ProjectIndexScanRule PROJECT_INDEX_SCAN = ProjectIndexScanRule.Config.DEFAULT.toRule(); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java index c52904a25d3..edbd1b06393 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java @@ -506,13 +506,14 @@ protected static void applyHighlightPushDown( || highlightConfig.fields().isEmpty()) { return; } - List fields = highlightConfig.fields(); HighlightBuilder hb = new HighlightBuilder(); - for (String fieldName : fields) { - hb.field(fieldName); + for (Map.Entry> entry : highlightConfig.fields().entrySet()) { + HighlightBuilder.Field field = new HighlightBuilder.Field(entry.getKey()); + applyPerFieldOptions(field, entry.getValue()); + hb.field(field); } - // Apply pre_tags / post_tags if provided + // Apply global pre_tags / post_tags if provided if (highlightConfig.preTags() != null && !highlightConfig.preTags().isEmpty()) { hb.preTags(highlightConfig.preTags().toArray(new String[0])); } @@ -520,11 +521,44 @@ protected static void applyHighlightPushDown( hb.postTags(highlightConfig.postTags().toArray(new String[0])); } - // Apply fragment_size only when explicitly specified; otherwise let OpenSearch use its default + // Apply global fragment_size only when explicitly specified if (highlightConfig.fragmentSize() != null) { hb.fragmentSize(highlightConfig.fragmentSize()); } requestBuilder.getSourceBuilder().highlighter(hb); } + + @SuppressWarnings( + "unchecked") // values are trusted types from PPLQueryRequest.parsePerFieldOptions + private static void applyPerFieldOptions( + HighlightBuilder.Field field, Map options) { + if (options == null || options.isEmpty()) { + return; + } + if (options.containsKey("fragment_size")) { + field.fragmentSize(((Number) options.get("fragment_size")).intValue()); + } + if (options.containsKey("number_of_fragments")) { + field.numOfFragments(((Number) options.get("number_of_fragments")).intValue()); + } + if (options.containsKey("type")) { + field.highlighterType((String) options.get("type")); + } + if (options.containsKey("pre_tags")) { + field.preTags(((List) options.get("pre_tags")).toArray(new String[0])); + } + if (options.containsKey("post_tags")) { + field.postTags(((List) options.get("post_tags")).toArray(new String[0])); + } + if (options.containsKey("require_field_match")) { + field.requireFieldMatch((Boolean) options.get("require_field_match")); + } + if (options.containsKey("no_match_size")) { + field.noMatchSize(((Number) options.get("no_match_size")).intValue()); + } + if (options.containsKey("order")) { + field.order((String) options.get("order")); + } + } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java index 2dadb888f05..4d32562f2fd 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java @@ -43,6 +43,7 @@ import org.opensearch.search.aggregations.AggregationBuilder; import org.opensearch.search.aggregations.bucket.composite.CompositeAggregationBuilder; import org.opensearch.sql.ast.tree.HighlightConfig; +import org.opensearch.sql.calcite.plan.HighlightPushDown; import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory; import org.opensearch.sql.calcite.utils.PPLHintUtils; import org.opensearch.sql.common.setting.Settings; @@ -71,7 +72,7 @@ /** The logical relational operator representing a scan of an OpenSearchIndex type. */ @Getter -public class CalciteLogicalIndexScan extends AbstractCalciteIndexScan { +public class CalciteLogicalIndexScan extends AbstractCalciteIndexScan implements HighlightPushDown { private static final Logger LOG = LogManager.getLogger(CalciteLogicalIndexScan.class); public CalciteLogicalIndexScan( @@ -97,7 +98,7 @@ public RelNode pushDownHighlight(HighlightConfig highlightConfig) { .getPushDownContext() .add( PushDownType.HIGHLIGHT, - highlightConfig.fields(), + highlightConfig.fieldNames(), (OSRequestBuilderAction) requestBuilder -> applyHighlightPushDown(requestBuilder, highlightConfig)); return newScan; diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexEnumerator.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexEnumerator.java index 3d6c91acb4a..9a59cdf6163 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexEnumerator.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexEnumerator.java @@ -14,6 +14,7 @@ import org.opensearch.sql.data.model.ExprValue; import org.opensearch.sql.data.model.ExprValueUtils; import org.opensearch.sql.exception.NonFallbackCalciteException; +import org.opensearch.sql.expression.HighlightExpression; import org.opensearch.sql.monitor.ResourceMonitor; import org.opensearch.sql.opensearch.client.OpenSearchClient; import org.opensearch.sql.opensearch.request.OpenSearchRequest; @@ -92,9 +93,9 @@ public Object current() { } private Object resolveForCalcite(ExprValue value, String rawPath) { - if ("_highlight".equals(rawPath)) { - ExprValue hl = ExprValueUtils.getTupleValue(value).get("_highlight"); - return (hl != null && !hl.isMissing()) ? hl : null; + if (HighlightExpression.HIGHLIGHT_FIELD.equals(rawPath)) { + ExprValue hl = ExprValueUtils.getTupleValue(value).get(HighlightExpression.HIGHLIGHT_FIELD); + return (hl != null && !hl.isMissing() && !hl.isNull()) ? hl : null; } return ExprValueUtils.resolveRefPaths(value, List.of(rawPath.split("\\."))).valueForCalcite(); } diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/domain/PPLQueryRequest.java b/ppl/src/main/java/org/opensearch/sql/ppl/domain/PPLQueryRequest.java index cb8f21d8d83..4201c9cf6ab 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/domain/PPLQueryRequest.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/domain/PPLQueryRequest.java @@ -6,8 +6,10 @@ package org.opensearch.sql.ppl.domain; import java.util.ArrayList; +import java.util.LinkedHashMap; import java.util.List; import java.util.Locale; +import java.util.Map; import java.util.Optional; import lombok.Getter; import lombok.Setter; @@ -24,6 +26,8 @@ public class PPLQueryRequest { private static final String DEFAULT_PPL_PATH = "/_plugins/_ppl"; private static final String FETCH_SIZE_FIELD = "fetch_size"; private static final String HIGHLIGHT_FIELD = "highlight"; + private static final int MAX_HIGHLIGHT_FIELDS = 100; + private static final int MAX_TAG_ENTRIES = 10; public static final PPLQueryRequest NULL = new PPLQueryRequest("", null, DEFAULT_PPL_PATH, ""); @@ -130,10 +134,7 @@ public HighlightConfig getHighlightConfig() { // Simple array format: ["*"] or ["error", "login"] JSONArray arr = jsonContent.optJSONArray(HIGHLIGHT_FIELD); if (arr != null) { - List fields = new ArrayList<>(); - for (int i = 0; i < arr.length(); i++) { - fields.add(arr.getString(i)); - } + List fields = parseAndValidateFieldArray(arr); return new HighlightConfig(fields); } @@ -144,31 +145,111 @@ public HighlightConfig getHighlightConfig() { return null; } - // Parse fields from "fields" object keys - List fields = new ArrayList<>(); + // Parse and validate fields with per-field options + Map> fields = new LinkedHashMap<>(); JSONObject fieldsObj = obj.optJSONObject("fields"); if (fieldsObj != null) { + if (fieldsObj.keySet().size() > MAX_HIGHLIGHT_FIELDS) { + throw new IllegalArgumentException( + "highlight fields count exceeds maximum of " + MAX_HIGHLIGHT_FIELDS); + } for (String key : fieldsObj.keySet()) { - fields.add(key); + validateFieldName(key); + Map perFieldOpts = parsePerFieldOptions(fieldsObj.optJSONObject(key)); + fields.put(key, perFieldOpts); } } - // Parse pre_tags - List preTags = jsonArrayToList(obj.optJSONArray("pre_tags")); + // Parse and validate tags + List preTags = parseAndValidateTagArray(obj.optJSONArray("pre_tags"), "pre_tags"); + List postTags = parseAndValidateTagArray(obj.optJSONArray("post_tags"), "post_tags"); - // Parse post_tags - List postTags = jsonArrayToList(obj.optJSONArray("post_tags")); - - // Parse fragment_size - Integer fragmentSize = obj.has("fragment_size") ? obj.getInt("fragment_size") : null; + // Parse and validate fragment_size + Integer fragmentSize = null; + if (obj.has("fragment_size")) { + fragmentSize = obj.getInt("fragment_size"); + if (fragmentSize <= 0) { + throw new IllegalArgumentException("highlight fragment_size must be a positive integer"); + } + } return new HighlightConfig(fields, preTags, postTags, fragmentSize); } - private static List jsonArrayToList(JSONArray arr) { + private static List parseAndValidateFieldArray(JSONArray arr) { + if (arr.length() > MAX_HIGHLIGHT_FIELDS) { + throw new IllegalArgumentException( + "highlight fields count exceeds maximum of " + MAX_HIGHLIGHT_FIELDS); + } + List fields = new ArrayList<>(); + for (int i = 0; i < arr.length(); i++) { + String field = arr.getString(i); + validateFieldName(field); + fields.add(field); + } + return fields; + } + + private static void validateFieldName(String fieldName) { + if (fieldName == null || fieldName.trim().isEmpty()) { + throw new IllegalArgumentException("highlight field name must be a non-empty string"); + } + } + + private static List parseAndValidateTagArray(JSONArray arr, String paramName) { if (arr == null) { return null; } + if (arr.length() > MAX_TAG_ENTRIES) { + throw new IllegalArgumentException( + "highlight " + paramName + " count exceeds maximum of " + MAX_TAG_ENTRIES); + } + List list = new ArrayList<>(); + for (int i = 0; i < arr.length(); i++) { + list.add(arr.getString(i)); + } + return list; + } + + /** + * Parse per-field highlight options from a JSON object. Supported options align with the + * OpenSearch highlight API: {@code fragment_size}, {@code number_of_fragments}, {@code type}, + * {@code pre_tags}, {@code post_tags}, {@code require_field_match}, {@code no_match_size}, {@code + * order}. + */ + private static Map parsePerFieldOptions(JSONObject fieldObj) { + if (fieldObj == null || fieldObj.isEmpty()) { + return Map.of(); + } + Map opts = new LinkedHashMap<>(); + if (fieldObj.has("fragment_size")) { + opts.put("fragment_size", fieldObj.getInt("fragment_size")); + } + if (fieldObj.has("number_of_fragments")) { + opts.put("number_of_fragments", fieldObj.getInt("number_of_fragments")); + } + if (fieldObj.has("type")) { + opts.put("type", fieldObj.getString("type")); + } + if (fieldObj.has("pre_tags")) { + opts.put("pre_tags", jsonArrayToStringList(fieldObj.getJSONArray("pre_tags"))); + } + if (fieldObj.has("post_tags")) { + opts.put("post_tags", jsonArrayToStringList(fieldObj.getJSONArray("post_tags"))); + } + if (fieldObj.has("require_field_match")) { + opts.put("require_field_match", fieldObj.getBoolean("require_field_match")); + } + if (fieldObj.has("no_match_size")) { + opts.put("no_match_size", fieldObj.getInt("no_match_size")); + } + if (fieldObj.has("order")) { + opts.put("order", fieldObj.getString("order")); + } + return opts; + } + + private static List jsonArrayToStringList(JSONArray arr) { List list = new ArrayList<>(); for (int i = 0; i < arr.length(); i++) { list.add(arr.getString(i)); diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstStatementBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstStatementBuilder.java index 006a6351dca..d2c1f610238 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstStatementBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstStatementBuilder.java @@ -16,7 +16,6 @@ import org.opensearch.sql.ast.statement.Query; import org.opensearch.sql.ast.statement.Statement; import org.opensearch.sql.ast.tree.Head; -import org.opensearch.sql.ast.tree.Highlight; import org.opensearch.sql.ast.tree.HighlightConfig; import org.opensearch.sql.ast.tree.Project; import org.opensearch.sql.ast.tree.UnresolvedPlan; @@ -37,13 +36,13 @@ public Statement visitPplStatement(OpenSearchPPLParser.PplStatementContext ctx) if (context.getFetchSize() > 0) { rawPlan = new Head(context.getFetchSize(), 0).attach(rawPlan); } + UnresolvedPlan plan = addSelectAll(rawPlan); + Query query = new Query(plan, 0, PPL); if (context.getHighlightConfig() != null && context.getHighlightConfig().fields() != null && !context.getHighlightConfig().fields().isEmpty()) { - rawPlan = new Highlight(context.getHighlightConfig()).attach(rawPlan); + query.setHighlightConfig(context.getHighlightConfig()); } - UnresolvedPlan plan = addSelectAll(rawPlan); - Query query = new Query(plan, 0, PPL); if (ctx.explainStatement() != null) { if (ctx.explainStatement().explainMode() == null) { return new Explain(query, PPL); diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java index 5d2708fd34e..96c0787d5e3 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java @@ -80,7 +80,6 @@ import org.opensearch.sql.ast.tree.Flatten; import org.opensearch.sql.ast.tree.GraphLookup; import org.opensearch.sql.ast.tree.Head; -import org.opensearch.sql.ast.tree.Highlight; import org.opensearch.sql.ast.tree.Join; import org.opensearch.sql.ast.tree.Lookup; import org.opensearch.sql.ast.tree.MinSpanBin; @@ -731,13 +730,6 @@ public String visitFlatten(Flatten node, String context) { return StringUtils.format("%s | flatten %s", child, field); } - @Override - public String visitHighlight(Highlight node, String context) { - String child = node.getChild().get(0).accept(this, context); - String fields = String.join(", ", node.getHighlightConfig().fields()); - return StringUtils.format("%s | highlight %s", child, fields); - } - @Override public String visitTrendline(Trendline node, String context) { String child = node.getChild().get(0).accept(this, context); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/PPLServiceTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/PPLServiceTest.java index 4faadd4850c..0825f6d1def 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/PPLServiceTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/PPLServiceTest.java @@ -102,12 +102,12 @@ public void onFailure(Exception e) { public void testExecuteShouldPass() { doAnswer( invocation -> { - ResponseListener listener = invocation.getArgument(2); + ResponseListener listener = invocation.getArgument(3); listener.onResponse(new QueryResponse(schema, Collections.emptyList(), Cursor.None)); return null; }) .when(queryService) - .execute(any(), any(), any()); + .execute(any(), any(), any(), any()); pplService.execute( new PPLQueryRequest("search source=t a=1", null, QUERY), @@ -119,12 +119,12 @@ public void testExecuteShouldPass() { public void testExecuteCsvFormatShouldPass() { doAnswer( invocation -> { - ResponseListener listener = invocation.getArgument(2); + ResponseListener listener = invocation.getArgument(3); listener.onResponse(new QueryResponse(schema, Collections.emptyList(), Cursor.None)); return null; }) .when(queryService) - .execute(any(), any(), any()); + .execute(any(), any(), any(), any()); pplService.execute( new PPLQueryRequest("search source=t a=1", null, QUERY, "csv"), @@ -136,12 +136,12 @@ public void testExecuteCsvFormatShouldPass() { public void testExplainShouldPass() { doAnswer( invocation -> { - ResponseListener listener = invocation.getArgument(1); + ResponseListener listener = invocation.getArgument(3); listener.onResponse(new ExplainResponse(new ExplainResponseNode("test"))); return null; }) .when(queryService) - .explain(any(), any(), any(), any()); + .explain(any(), any(), any(), any(), any()); pplService.explain( new PPLQueryRequest("search source=t a=1", null, EXPLAIN), @@ -173,12 +173,12 @@ public void testExplainWithIllegalQueryShouldBeCaughtByHandler() { public void testPrometheusQuery() { doAnswer( invocation -> { - ResponseListener listener = invocation.getArgument(2); + ResponseListener listener = invocation.getArgument(3); listener.onResponse(new QueryResponse(schema, Collections.emptyList(), Cursor.None)); return null; }) .when(queryService) - .execute(any(), any(), any()); + .execute(any(), any(), any(), any()); pplService.execute( new PPLQueryRequest("source = prometheus.http_requests_total", null, QUERY), diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/domain/PPLQueryRequestTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/domain/PPLQueryRequestTest.java index 809ecd357ae..362b4493b93 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/domain/PPLQueryRequestTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/domain/PPLQueryRequestTest.java @@ -116,7 +116,7 @@ public void testGetHighlightConfigSimpleArrayWildcard() { PPLQueryRequest request = new PPLQueryRequest("source=t", json, "/_plugins/_ppl"); HighlightConfig config = request.getHighlightConfig(); assertNotNull(config); - assertEquals(List.of("*"), config.fields()); + assertEquals(List.of("*"), config.fieldNames()); assertNull(config.preTags()); assertNull(config.postTags()); assertNull(config.fragmentSize()); @@ -129,7 +129,7 @@ public void testGetHighlightConfigSimpleArrayMultipleTerms() { PPLQueryRequest request = new PPLQueryRequest("source=t", json, "/_plugins/_ppl"); HighlightConfig config = request.getHighlightConfig(); assertNotNull(config); - assertEquals(List.of("error", "login"), config.fields()); + assertEquals(List.of("error", "login"), config.fieldNames()); assertNull(config.preTags()); assertNull(config.postTags()); assertNull(config.fragmentSize()); @@ -145,7 +145,7 @@ public void testGetHighlightConfigOsdObjectFormatAllFields() { PPLQueryRequest request = new PPLQueryRequest("source=t", json, "/_plugins/_ppl"); HighlightConfig config = request.getHighlightConfig(); assertNotNull(config); - assertEquals(List.of("*"), config.fields()); + assertEquals(List.of("*"), config.fieldNames()); assertEquals(List.of(""), config.preTags()); assertEquals(List.of(""), config.postTags()); assertEquals(Integer.valueOf(2147483647), config.fragmentSize()); @@ -162,8 +162,8 @@ public void testGetHighlightConfigOsdObjectFormatMultipleTags() { HighlightConfig config = request.getHighlightConfig(); assertNotNull(config); assertEquals(2, config.fields().size()); - assertTrue(config.fields().contains("title")); - assertTrue(config.fields().contains("body")); + assertTrue(config.fields().containsKey("title")); + assertTrue(config.fields().containsKey("body")); assertEquals(List.of("", ""), config.preTags()); assertEquals(List.of("", ""), config.postTags()); assertNull(config.fragmentSize()); @@ -177,9 +177,25 @@ public void testGetHighlightConfigOsdObjectFormatFieldsOnly() { PPLQueryRequest request = new PPLQueryRequest("source=t", json, "/_plugins/_ppl"); HighlightConfig config = request.getHighlightConfig(); assertNotNull(config); - assertEquals(List.of("*"), config.fields()); + assertEquals(List.of("*"), config.fieldNames()); assertNull(config.preTags()); assertNull(config.postTags()); assertNull(config.fragmentSize()); } + + @Test + public void testGetHighlightConfigPerFieldOptions() { + JSONObject json = + new JSONObject( + "{\"query\": \"source=t\", \"highlight\": {" + + "\"fields\": {\"title\": {\"fragment_size\": 200, \"number_of_fragments\": 3}," + + " \"body\": {\"type\": \"plain\"}}}}"); + PPLQueryRequest request = new PPLQueryRequest("source=t", json, "/_plugins/_ppl"); + HighlightConfig config = request.getHighlightConfig(); + assertNotNull(config); + assertEquals(2, config.fields().size()); + assertEquals(200, config.fields().get("title").get("fragment_size")); + assertEquals(3, config.fields().get("title").get("number_of_fragments")); + assertEquals("plain", config.fields().get("body").get("type")); + } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstStatementBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstStatementBuilderTest.java index d25a05b80c0..ba31b75b38b 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstStatementBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstStatementBuilderTest.java @@ -120,28 +120,24 @@ public void buildQueryStatementWithFetchSizeAndHeadWithOffset() { @Test public void buildQueryStatementWithHighlight() { - // Highlight wraps the raw plan, then Project(*) wraps on top + // Highlight config is set on the Query statement, not as an AST wrapper HighlightConfig config = new HighlightConfig(List.of("*")); - assertEqualWithHighlight( - "search source=t a=1", - config, - new Query( - project(highlight(search(relation("t"), "a:1"), config), AllFields.of()), 0, PPL)); + Query expected = new Query(project(search(relation("t"), "a:1"), AllFields.of()), 0, PPL); + expected.setHighlightConfig(config); + assertEqualWithHighlight("search source=t a=1", config, expected); } @Test public void buildQueryStatementWithHighlightMultipleTerms() { HighlightConfig config = new HighlightConfig(List.of("error", "login")); - assertEqualWithHighlight( - "search source=t a=1", - config, - new Query( - project(highlight(search(relation("t"), "a:1"), config), AllFields.of()), 0, PPL)); + Query expected = new Query(project(search(relation("t"), "a:1"), AllFields.of()), 0, PPL); + expected.setHighlightConfig(config); + assertEqualWithHighlight("search source=t a=1", config, expected); } @Test public void buildQueryStatementWithHighlightNull() { - // null highlight means no Highlight node injected + // null highlight means no config on the Query assertEqualWithHighlight( "search source=t a=1", null, @@ -150,16 +146,12 @@ public void buildQueryStatementWithHighlightNull() { @Test public void buildQueryStatementWithHighlightAndFetchSize() { - // Both fetch_size and highlight: Head wraps first, then Highlight wraps Head + // Both fetch_size and highlight: Head wraps the plan, config is on the Query HighlightConfig config = new HighlightConfig(List.of("*")); - assertEqualWithHighlightAndFetchSize( - "search source=t a=1", - config, - 100, - new Query( - project(highlight(head(search(relation("t"), "a:1"), 100, 0), config), AllFields.of()), - 0, - PPL)); + Query expected = + new Query(project(head(search(relation("t"), "a:1"), 100, 0), AllFields.of()), 0, PPL); + expected.setHighlightConfig(config); + assertEqualWithHighlightAndFetchSize("search source=t a=1", config, 100, expected); } private void assertEqualWithFetchSize(String query, int fetchSize, Statement expectedStatement) { From 1bd30fa64669ead9c2e2bac6082d3055355208bc Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Wed, 18 Mar 2026 10:37:19 -0700 Subject: [PATCH 22/25] peng - add more IT for highlight cases Signed-off-by: Jialiang Liang --- .../calcite/remote/CalciteHighlightIT.java | 61 ++++++++++++++ .../sql/ppl/domain/PPLQueryRequestTest.java | 84 +++++++++++++++++++ 2 files changed, 145 insertions(+) diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteHighlightIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteHighlightIT.java index 320aea3940f..10347e1c1c9 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteHighlightIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteHighlightIT.java @@ -213,6 +213,67 @@ public void testHighlightWithStats() throws IOException { assertTrue("Response should contain datarows", result.has("datarows")); } + @Test + public void testHighlightBooleanOrSearch() throws IOException { + JSONObject result = + executeQueryWithHighlight( + "source=" + TEST_INDEX_ACCOUNT + " \"Holmes\" OR \"Bond\"", "[\"*\"]"); + JSONArray dataRows = result.getJSONArray("datarows"); + assertTrue("OR search should return results", dataRows.length() > 0); + assertHighlightsExist(result); + // Verify highlights contain fragments for both search terms + JSONArray highlights = result.getJSONArray("highlights"); + boolean foundHolmes = false; + boolean foundBond = false; + for (int i = 0; i < highlights.length(); i++) { + String hlStr = highlights.getJSONObject(i).toString(); + if (hlStr.contains("Holmes")) foundHolmes = true; + if (hlStr.contains("Bond")) foundBond = true; + } + assertTrue("Highlights should contain Holmes fragments", foundHolmes); + assertTrue("Highlights should contain Bond fragments", foundBond); + } + + @Test + public void testHighlightBooleanAndSearch() throws IOException { + JSONObject result = + executeQueryWithHighlight( + "source=" + TEST_INDEX_ACCOUNT + " \"Holmes\" AND \"Lane\"", "[\"*\"]"); + JSONArray dataRows = result.getJSONArray("datarows"); + assertTrue("AND search should return results", dataRows.length() > 0); + assertHighlightsExist(result); + // Verify highlights contain fragments for both terms + JSONArray highlights = result.getJSONArray("highlights"); + boolean foundHolmes = false; + boolean foundLane = false; + for (int i = 0; i < highlights.length(); i++) { + String hlStr = highlights.getJSONObject(i).toString(); + if (hlStr.contains("Holmes")) foundHolmes = true; + if (hlStr.contains("Lane")) foundLane = true; + } + assertTrue("Highlights should contain Holmes fragments", foundHolmes); + assertTrue("Highlights should contain Lane fragments", foundLane); + } + + @Test + public void testHighlightNotSearch() throws IOException { + // NOT queries negate the match — the query succeeds but highlights are empty + // because there are no positive matches to highlight + JSONObject result = + executeQueryWithHighlight("source=" + TEST_INDEX_ACCOUNT + " NOT \"Holmes\"", "[\"*\"]"); + JSONArray dataRows = result.getJSONArray("datarows"); + assertTrue("NOT search should return results", dataRows.length() > 0); + } + + @Test + public void testHighlightBooleanOrWithFilter() throws IOException { + JSONObject result = + executeQueryWithHighlight( + "source=" + TEST_INDEX_ACCOUNT + " \"Holmes\" OR \"Bond\" | where age > 30", "[\"*\"]"); + assertTrue("Response should contain datarows", result.has("datarows")); + assertHighlightsExist(result); + } + @Test public void testHighlightNoSearchQuery() throws IOException { // Without a search query, request should still succeed (highlights may or may not be present) diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/domain/PPLQueryRequestTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/domain/PPLQueryRequestTest.java index 362b4493b93..f254670f23b 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/domain/PPLQueryRequestTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/domain/PPLQueryRequestTest.java @@ -8,6 +8,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertThrows; import static org.junit.Assert.assertTrue; import java.util.List; @@ -198,4 +199,87 @@ public void testGetHighlightConfigPerFieldOptions() { assertEquals(3, config.fields().get("title").get("number_of_fragments")); assertEquals("plain", config.fields().get("body").get("type")); } + + @Test + public void testGetHighlightConfigEmptyFieldNameThrows() { + JSONObject json = new JSONObject("{\"query\": \"source=t\", \"highlight\": [\"\"]}"); + PPLQueryRequest request = new PPLQueryRequest("source=t", json, "/_plugins/_ppl"); + IllegalArgumentException e = + assertThrows(IllegalArgumentException.class, request::getHighlightConfig); + assertTrue(e.getMessage().contains("non-empty")); + } + + @Test + public void testGetHighlightConfigNegativeFragmentSizeThrows() { + JSONObject json = + new JSONObject( + "{\"query\": \"source=t\", \"highlight\": {" + + "\"fields\": {\"*\": {}}, \"fragment_size\": -1}}"); + PPLQueryRequest request = new PPLQueryRequest("source=t", json, "/_plugins/_ppl"); + IllegalArgumentException e = + assertThrows(IllegalArgumentException.class, request::getHighlightConfig); + assertTrue(e.getMessage().contains("fragment_size must be a positive integer")); + } + + @Test + public void testGetHighlightConfigZeroFragmentSizeThrows() { + JSONObject json = + new JSONObject( + "{\"query\": \"source=t\", \"highlight\": {" + + "\"fields\": {\"*\": {}}, \"fragment_size\": 0}}"); + PPLQueryRequest request = new PPLQueryRequest("source=t", json, "/_plugins/_ppl"); + IllegalArgumentException e = + assertThrows(IllegalArgumentException.class, request::getHighlightConfig); + assertTrue(e.getMessage().contains("fragment_size must be a positive integer")); + } + + @Test + public void testGetHighlightConfigExceedMaxFieldsArrayThrows() { + // Build an array with 101 fields + StringBuilder sb = new StringBuilder("{\"query\": \"source=t\", \"highlight\": ["); + for (int i = 0; i < 101; i++) { + if (i > 0) sb.append(","); + sb.append("\"field").append(i).append("\""); + } + sb.append("]}"); + JSONObject json = new JSONObject(sb.toString()); + PPLQueryRequest request = new PPLQueryRequest("source=t", json, "/_plugins/_ppl"); + IllegalArgumentException e = + assertThrows(IllegalArgumentException.class, request::getHighlightConfig); + assertTrue(e.getMessage().contains("fields count exceeds maximum")); + } + + @Test + public void testGetHighlightConfigExceedMaxFieldsObjectThrows() { + // Build an object with 101 fields + StringBuilder sb = new StringBuilder("{\"query\": \"source=t\", \"highlight\": {\"fields\": {"); + for (int i = 0; i < 101; i++) { + if (i > 0) sb.append(","); + sb.append("\"field").append(i).append("\": {}"); + } + sb.append("}}}"); + JSONObject json = new JSONObject(sb.toString()); + PPLQueryRequest request = new PPLQueryRequest("source=t", json, "/_plugins/_ppl"); + IllegalArgumentException e = + assertThrows(IllegalArgumentException.class, request::getHighlightConfig); + assertTrue(e.getMessage().contains("fields count exceeds maximum")); + } + + @Test + public void testGetHighlightConfigExceedMaxTagsThrows() { + // Build pre_tags array with 11 entries + StringBuilder sb = + new StringBuilder( + "{\"query\": \"source=t\", \"highlight\": {\"fields\": {\"*\": {}}, \"pre_tags\": ["); + for (int i = 0; i < 11; i++) { + if (i > 0) sb.append(","); + sb.append("\"tag").append(i).append("\""); + } + sb.append("]}}"); + JSONObject json = new JSONObject(sb.toString()); + PPLQueryRequest request = new PPLQueryRequest("source=t", json, "/_plugins/_ppl"); + IllegalArgumentException e = + assertThrows(IllegalArgumentException.class, request::getHighlightConfig); + assertTrue(e.getMessage().contains("pre_tags count exceeds maximum")); + } } From e2ef1b2040d97979ca3205cd7da3042bade6ba5d Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Thu, 19 Mar 2026 09:55:10 -0700 Subject: [PATCH 23/25] chen - update doc Signed-off-by: Jialiang Liang --- docs/user/ppl/interfaces/endpoint.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/user/ppl/interfaces/endpoint.md b/docs/user/ppl/interfaces/endpoint.md index 00790b88656..b16752f4288 100644 --- a/docs/user/ppl/interfaces/endpoint.md +++ b/docs/user/ppl/interfaces/endpoint.md @@ -280,6 +280,16 @@ Expected output (trimmed): | `post_tags` | Array of string | No | Tags inserted after highlighted tokens. Defaults to ``. | | `fragment_size` | Integer | No | Maximum character size of a highlight fragment. Defaults to `100`. | +### Limits + +| Constraint | Max value | Description | +|---|---|---| +| Highlight fields | 100 | Maximum number of fields in the `highlight` array or `fields` object. | +| Pre/post tags | 10 | Maximum number of entries in each `pre_tags` or `post_tags` array. | +| Fragment size | > 0 | Must be a positive integer. | + +Exceeding these limits returns an error. + ### Notes - Highlighting requires a search term in the PPL statement (e.g. `source=accounts "Holmes"`). Without a search term (e.g. just `source=accounts`), the `highlights` array entries will be empty. From d289025a7f7715310a36e0335a1c0fb3e85b5921 Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Thu, 19 Mar 2026 11:43:24 -0700 Subject: [PATCH 24/25] review - make highlight embeded into datarows/schemas Signed-off-by: Jialiang Liang --- docs/user/ppl/interfaces/endpoint.md | 28 +++--- .../calcite/remote/CalciteHighlightIT.java | 91 ++++++++++++------- .../sql/protocol/response/QueryResult.java | 38 +------- .../format/SimpleJsonResponseFormatter.java | 8 -- .../protocol/response/QueryResultTest.java | 82 ++++------------- .../SimpleJsonResponseFormatterTest.java | 14 ++- 6 files changed, 101 insertions(+), 160 deletions(-) diff --git a/docs/user/ppl/interfaces/endpoint.md b/docs/user/ppl/interfaces/endpoint.md index b16752f4288..a015c9204ff 100644 --- a/docs/user/ppl/interfaces/endpoint.md +++ b/docs/user/ppl/interfaces/endpoint.md @@ -204,7 +204,7 @@ Expected output (trimmed): ## Highlight -You can add a `highlight` parameter to the PPL request body to enable search-result highlighting. This parameter follows the same semantics as the [OpenSearch highlight API](https://docs.opensearch.org/latest/search-plugins/searching-data/highlight/). When enabled, the response includes a top-level `highlights` array containing matching fragments with the specified tags. Each entry in the `highlights` array corresponds to the row at the same index in `datarows`. +You can add a `highlight` parameter to the PPL request body to enable search-result highlighting. This parameter follows the same semantics as the [OpenSearch highlight API](https://docs.opensearch.org/latest/search-plugins/searching-data/highlight/). When enabled, the response includes a `_highlight` column in `schema` and `datarows` containing matching fragments with the specified tags. Each `_highlight` value in a datarow is an object whose keys are field names and whose values are arrays of highlight fragments for the corresponding row. Two formats are supported: @@ -246,25 +246,21 @@ Expected output (trimmed): "schema": [ { "name": "account_number", "type": "bigint" }, { "name": "firstname", "type": "string" }, - { "name": "lastname", "type": "string" } + { "name": "lastname", "type": "string" }, + { "name": "_highlight", "type": "struct" } ], "datarows": [ - [578, "Holmes", "Mcknight"], - [828, "Blanche", "Holmes"], - [1, "Amber", "Duke"] - ], - "highlights": [ - { + [578, "Holmes", "Mcknight", { "firstname": ["@opensearch-dashboards-highlighted-field@Holmes@/opensearch-dashboards-highlighted-field@"], "firstname.keyword": ["@opensearch-dashboards-highlighted-field@Holmes@/opensearch-dashboards-highlighted-field@"] - }, - { + }], + [828, "Blanche", "Holmes", { "lastname": ["@opensearch-dashboards-highlighted-field@Holmes@/opensearch-dashboards-highlighted-field@"], "lastname.keyword": ["@opensearch-dashboards-highlighted-field@Holmes@/opensearch-dashboards-highlighted-field@"] - }, - { + }], + [1, "Amber", "Duke", { "address": ["880 @opensearch-dashboards-highlighted-field@Holmes@/opensearch-dashboards-highlighted-field@ Lane"] - } + }] ], "total": 3, "size": 3 @@ -292,8 +288,8 @@ Exceeding these limits returns an error. ### Notes -- Highlighting requires a search term in the PPL statement (e.g. `source=accounts "Holmes"`). Without a search term (e.g. just `source=accounts`), the `highlights` array entries will be empty. -- The `highlights` array in the response is parallel to `datarows` — each entry contains the highlighted fragments for the corresponding row. +- Highlighting requires a search term in the PPL statement (e.g. `source=accounts "Holmes"`). Without a search term (e.g. just `source=accounts`), the `_highlight` values in datarows will be empty objects. +- The `_highlight` column appears in `schema` and `datarows` as a regular column. Each `_highlight` value is an object whose keys are field names and whose values are arrays of highlight fragments. - In the simple array format, `["*"]` highlights all fields. Specific field names like `["firstname", "lastname"]` scope highlighting to those fields only. - In the object format, each key in the `fields` object is a field name or wildcard. Each value is an object of per-field highlight options. Supported per-field options: `fragment_size`, `number_of_fragments`, `type` (`plain`, `unified`, `fvh`), `pre_tags`, `post_tags`, `require_field_match`, `no_match_size`, `order`. Use `{}` for defaults. Example: `{"title": {"fragment_size": 200}, "body": {"type": "plain"}}`. -- Highlights may include fields that are not explicitly projected in the `schema`/`datarows`. For example, using `{"*": {}}` highlights all fields that matched the search query, including fields not selected by `| fields`. In the example above, the `address` field appears in `highlights` because it contains a match ("880 Holmes Lane") even though only `account_number`, `firstname`, and `lastname` are projected. +- Highlights may include fields that are not explicitly projected in the other columns. For example, using `{"*": {}}` highlights all fields that matched the search query, including fields not selected by `| fields`. In the example above, the `address` field appears in `_highlight` because it contains a match ("880 Holmes Lane") even though only `account_number`, `firstname`, and `lastname` are projected as separate columns. diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteHighlightIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteHighlightIT.java index 10347e1c1c9..14a085327e4 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteHighlightIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteHighlightIT.java @@ -44,13 +44,15 @@ public void testHighlightWildcardWithSearchQuery() throws IOException { public void testHighlightContainsMatchingFragments() throws IOException { JSONObject result = executeQueryWithHighlight("source=" + TEST_INDEX_ACCOUNT + " \"Holmes\"", "[\"*\"]"); - JSONArray highlights = result.getJSONArray("highlights"); - assertTrue("highlights array should not be empty", highlights.length() > 0); + int hlIndex = getHighlightColumnIndex(result); + assertTrue("_highlight column should exist in schema", hlIndex >= 0); + JSONArray dataRows = result.getJSONArray("datarows"); + assertTrue("datarows should not be empty", dataRows.length() > 0); // At least one highlight entry should have non-empty data boolean foundFragment = false; - for (int i = 0; i < highlights.length(); i++) { - JSONObject hlEntry = highlights.getJSONObject(i); - if (hlEntry.length() > 0) { + for (int i = 0; i < dataRows.length(); i++) { + JSONObject hlEntry = dataRows.getJSONArray(i).optJSONObject(hlIndex); + if (hlEntry != null && hlEntry.length() > 0) { foundFragment = true; break; } @@ -69,11 +71,11 @@ public void testHighlightOsdObjectFormat() throws IOException { assertTrue(dataRows.length() > 0); assertHighlightsExist(result); // Verify custom tags are applied - JSONArray highlights = result.getJSONArray("highlights"); + int hlIndex = getHighlightColumnIndex(result); boolean foundCustomTag = false; - for (int i = 0; i < highlights.length(); i++) { - String hlStr = highlights.getJSONObject(i).toString(); - if (hlStr.contains("")) { + for (int i = 0; i < dataRows.length(); i++) { + JSONObject hlEntry = dataRows.getJSONArray(i).optJSONObject(hlIndex); + if (hlEntry != null && hlEntry.toString().contains("")) { foundCustomTag = true; break; } @@ -93,11 +95,12 @@ public void testHighlightOsdObjectFormatWithDashboardsTags() throws IOException assertTrue(dataRows.length() > 0); assertHighlightsExist(result); // Verify dashboards tags are applied - JSONArray highlights = result.getJSONArray("highlights"); + int hlIndex = getHighlightColumnIndex(result); boolean foundDashboardsTag = false; - for (int i = 0; i < highlights.length(); i++) { - String hlStr = highlights.getJSONObject(i).toString(); - if (hlStr.contains("@opensearch-dashboards-highlighted-field@")) { + for (int i = 0; i < dataRows.length(); i++) { + JSONObject hlEntry = dataRows.getJSONArray(i).optJSONObject(hlIndex); + if (hlEntry != null + && hlEntry.toString().contains("@opensearch-dashboards-highlighted-field@")) { foundDashboardsTag = true; break; } @@ -162,7 +165,7 @@ public void testHighlightWildcardInSearchText() throws IOException { JSONObject result = executeQueryWithHighlight("source=" + TEST_INDEX_ACCOUNT + " \"Holm*\"", "[\"*\"]"); assertTrue("Response should contain datarows", result.has("datarows")); - assertTrue("Response should contain highlights array", result.has("highlights")); + assertHighlightsExist(result); } @Test @@ -172,7 +175,7 @@ public void testHighlightMixedFullTextAndStructuredFilter() throws IOException { "source=" + TEST_INDEX_ACCOUNT + " \"Holmes\" | where age > 30 | fields firstname, age", "[\"*\"]"); assertTrue("Response should contain datarows", result.has("datarows")); - assertTrue("Response should contain highlights array", result.has("highlights")); + assertHighlightsExist(result); } @Test @@ -201,7 +204,7 @@ public void testHighlightWithHead() throws IOException { assertTrue("Response should contain datarows", result.has("datarows")); JSONArray dataRows = result.getJSONArray("datarows"); assertTrue("Should return at most 2 rows", dataRows.length() <= 2); - assertTrue("Response should contain highlights array", result.has("highlights")); + assertHighlightsExist(result); } @Test @@ -222,13 +225,16 @@ public void testHighlightBooleanOrSearch() throws IOException { assertTrue("OR search should return results", dataRows.length() > 0); assertHighlightsExist(result); // Verify highlights contain fragments for both search terms - JSONArray highlights = result.getJSONArray("highlights"); + int hlIndex = getHighlightColumnIndex(result); boolean foundHolmes = false; boolean foundBond = false; - for (int i = 0; i < highlights.length(); i++) { - String hlStr = highlights.getJSONObject(i).toString(); - if (hlStr.contains("Holmes")) foundHolmes = true; - if (hlStr.contains("Bond")) foundBond = true; + for (int i = 0; i < dataRows.length(); i++) { + JSONObject hlEntry = dataRows.getJSONArray(i).optJSONObject(hlIndex); + if (hlEntry != null) { + String hlStr = hlEntry.toString(); + if (hlStr.contains("Holmes")) foundHolmes = true; + if (hlStr.contains("Bond")) foundBond = true; + } } assertTrue("Highlights should contain Holmes fragments", foundHolmes); assertTrue("Highlights should contain Bond fragments", foundBond); @@ -243,13 +249,16 @@ public void testHighlightBooleanAndSearch() throws IOException { assertTrue("AND search should return results", dataRows.length() > 0); assertHighlightsExist(result); // Verify highlights contain fragments for both terms - JSONArray highlights = result.getJSONArray("highlights"); + int hlIndex = getHighlightColumnIndex(result); boolean foundHolmes = false; boolean foundLane = false; - for (int i = 0; i < highlights.length(); i++) { - String hlStr = highlights.getJSONObject(i).toString(); - if (hlStr.contains("Holmes")) foundHolmes = true; - if (hlStr.contains("Lane")) foundLane = true; + for (int i = 0; i < dataRows.length(); i++) { + JSONObject hlEntry = dataRows.getJSONArray(i).optJSONObject(hlIndex); + if (hlEntry != null) { + String hlStr = hlEntry.toString(); + if (hlStr.contains("Holmes")) foundHolmes = true; + if (hlStr.contains("Lane")) foundLane = true; + } } assertTrue("Highlights should contain Holmes fragments", foundHolmes); assertTrue("Highlights should contain Lane fragments", foundLane); @@ -282,10 +291,10 @@ public void testHighlightNoSearchQuery() throws IOException { } @Test - public void testWithoutHighlightNoHighlightArray() throws IOException { - // Without highlight parameter, highlights array should NOT appear + public void testWithoutHighlightNoHighlightColumn() throws IOException { + // Without highlight parameter, _highlight column should NOT appear in schema JSONObject result = executeQuery("source=" + TEST_INDEX_BANK); - assertFalse("Response should NOT contain highlights array", result.has("highlights")); + assertTrue("_highlight column should NOT be in schema", getHighlightColumnIndex(result) < 0); } /** @@ -316,10 +325,26 @@ protected JSONObject executeQueryWithHighlight(String query, String highlightJso return jsonify(getResponseBody(response, true)); } - /** Assert that the response contains a non-empty highlights array. */ + /** + * Find the index of the _highlight column in the schema array. + * + * @return the column index, or -1 if not present + */ + private int getHighlightColumnIndex(JSONObject result) { + JSONArray schema = result.getJSONArray("schema"); + for (int i = 0; i < schema.length(); i++) { + if ("_highlight".equals(schema.getJSONObject(i).getString("name"))) { + return i; + } + } + return -1; + } + + /** Assert that the response contains a _highlight column with non-empty highlight data. */ private void assertHighlightsExist(JSONObject result) { - assertTrue("Response should contain highlights array", result.has("highlights")); - JSONArray highlights = result.getJSONArray("highlights"); - assertTrue("Highlights array should not be empty", highlights.length() > 0); + int hlIndex = getHighlightColumnIndex(result); + assertTrue("Schema should contain _highlight column", hlIndex >= 0); + JSONArray dataRows = result.getJSONArray("datarows"); + assertTrue("datarows should not be empty", dataRows.length() > 0); } } diff --git a/protocol/src/main/java/org/opensearch/sql/protocol/response/QueryResult.java b/protocol/src/main/java/org/opensearch/sql/protocol/response/QueryResult.java index 9c0593bd887..a1ea6d462e6 100644 --- a/protocol/src/main/java/org/opensearch/sql/protocol/response/QueryResult.java +++ b/protocol/src/main/java/org/opensearch/sql/protocol/response/QueryResult.java @@ -5,15 +5,11 @@ package org.opensearch.sql.protocol.response; -import static org.opensearch.sql.expression.HighlightExpression.HIGHLIGHT_FIELD; - import java.util.Collection; import java.util.Iterator; import java.util.LinkedHashMap; -import java.util.List; import java.util.Locale; import java.util.Map; -import java.util.stream.Collectors; import lombok.Getter; import org.opensearch.sql.data.model.ExprValue; import org.opensearch.sql.data.model.ExprValueUtils; @@ -75,7 +71,6 @@ public int size() { public Map columnNameTypes() { Map colNameTypes = new LinkedHashMap<>(); schema.getColumns().stream() - .filter(column -> !HIGHLIGHT_FIELD.equals(getColumnName(column))) .forEach( column -> colNameTypes.put( @@ -90,41 +85,10 @@ public Iterator iterator() { .map(ExprValueUtils::getTupleValue) .map( tuple -> - tuple.entrySet().stream() - .filter(e -> !HIGHLIGHT_FIELD.equals(e.getKey())) - .map(e -> e.getValue().value()) - .toArray(Object[]::new)) + tuple.entrySet().stream().map(e -> e.getValue().value()).toArray(Object[]::new)) .iterator(); } - /** - * Extract highlight data from each result row. Each row may contain a {@code _highlight} field - * added by {@code OpenSearchResponse.addHighlightsToBuilder()} and preserved through projection. - * Returns a list parallel to datarows where each entry is either a map of field name to highlight - * fragments, or null if no highlight data exists for that row. - */ - public List> highlights() { - return exprValues.stream() - .map(ExprValueUtils::getTupleValue) - .map( - tuple -> { - ExprValue hl = tuple.get(HIGHLIGHT_FIELD); - if (hl == null || hl.isMissing() || hl.isNull()) { - return null; - } - Map hlMap = new LinkedHashMap<>(); - for (Map.Entry entry : hl.tupleValue().entrySet()) { - hlMap.put( - entry.getKey(), - entry.getValue().collectionValue().stream() - .map(ExprValue::stringValue) - .collect(Collectors.toList())); - } - return (Map) hlMap; - }) - .collect(Collectors.toList()); - } - private String getColumnName(Column column) { return (column.getAlias() != null) ? column.getAlias() : column.getName(); } diff --git a/protocol/src/main/java/org/opensearch/sql/protocol/response/format/SimpleJsonResponseFormatter.java b/protocol/src/main/java/org/opensearch/sql/protocol/response/format/SimpleJsonResponseFormatter.java index abade18e9f9..88afd636712 100644 --- a/protocol/src/main/java/org/opensearch/sql/protocol/response/format/SimpleJsonResponseFormatter.java +++ b/protocol/src/main/java/org/opensearch/sql/protocol/response/format/SimpleJsonResponseFormatter.java @@ -6,8 +6,6 @@ package org.opensearch.sql.protocol.response.format; import java.util.List; -import java.util.Map; -import java.util.Objects; import lombok.Builder; import lombok.Getter; import lombok.RequiredArgsConstructor; @@ -57,11 +55,6 @@ public Object buildJsonObject(QueryResult response) { json.datarows(fetchDataRows(response)); - List> highlights = response.highlights(); - if (highlights.stream().anyMatch(Objects::nonNull)) { - json.highlights(highlights); - } - formatMetric.set(System.nanoTime() - formatTime); json.profile(QueryProfiling.current().finish()); @@ -87,7 +80,6 @@ public static class JsonResponse { private final List schema; private final Object[][] datarows; - private final List> highlights; private long total; private long size; diff --git a/protocol/src/test/java/org/opensearch/sql/protocol/response/QueryResultTest.java b/protocol/src/test/java/org/opensearch/sql/protocol/response/QueryResultTest.java index 69faf1e16cb..ee193570b1b 100644 --- a/protocol/src/test/java/org/opensearch/sql/protocol/response/QueryResultTest.java +++ b/protocol/src/test/java/org/opensearch/sql/protocol/response/QueryResultTest.java @@ -7,7 +7,7 @@ import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.fail; import static org.opensearch.sql.data.model.ExprValueUtils.collectionValue; import static org.opensearch.sql.data.model.ExprValueUtils.tupleValue; @@ -116,7 +116,7 @@ void iterate() { } @Test - void columnNameTypesFiltersHighlightField() { + void columnNameTypesIncludesHighlightField() { ExecutionEngine.Schema schemaWithHighlight = new ExecutionEngine.Schema( ImmutableList.of( @@ -129,11 +129,13 @@ void columnNameTypesFiltersHighlightField() { Collections.singletonList(tupleValue(ImmutableMap.of("name", "John", "age", 20))), Cursor.None); - assertEquals(ImmutableMap.of("name", "string", "age", "integer"), response.columnNameTypes()); + Map colNameTypes = response.columnNameTypes(); + assertEquals("array", colNameTypes.get("_highlight")); + assertEquals(3, colNameTypes.size()); } @Test - void iterateFiltersHighlightField() { + void iterateIncludesHighlightField() { java.util.LinkedHashMap map = new java.util.LinkedHashMap<>(); map.put("name", ExprValueUtils.stringValue("John")); map.put("age", ExprValueUtils.integerValue(20)); @@ -142,66 +144,22 @@ void iterateFiltersHighlightField() { ExprTupleValue.fromExprValueMap( Map.of("name", collectionValue(List.of("highlighted John"))))); ExprValue row = ExprTupleValue.fromExprValueMap(map); - QueryResult response = new QueryResult(schema, Collections.singletonList(row), Cursor.None); - - for (Object[] objects : response) { - assertArrayEquals(new Object[] {"John", 20}, objects); - } - } - - @Test - void highlightsExtractsHighlightData() { - ExprValue row = - ExprTupleValue.fromExprValueMap( - new java.util.LinkedHashMap<>( - Map.of( - "name", ExprValueUtils.stringValue("John"), - "_highlight", - ExprTupleValue.fromExprValueMap( - Map.of("name", collectionValue(List.of("John"))))))); - QueryResult response = new QueryResult(schema, Collections.singletonList(row), Cursor.None); - - List> highlights = response.highlights(); - assertEquals(1, highlights.size()); - assertEquals(Map.of("name", List.of("John")), highlights.get(0)); - } - @Test - void highlightsReturnsNullWhenNoHighlightData() { + ExecutionEngine.Schema schemaWithHighlight = + new ExecutionEngine.Schema( + ImmutableList.of( + new ExecutionEngine.Schema.Column("name", null, STRING), + new ExecutionEngine.Schema.Column("age", null, INTEGER), + new ExecutionEngine.Schema.Column("_highlight", null, ARRAY))); QueryResult response = - new QueryResult( - schema, - Collections.singletonList(tupleValue(ImmutableMap.of("name", "John", "age", 20))), - Cursor.None); - - List> highlights = response.highlights(); - assertEquals(1, highlights.size()); - assertNull(highlights.get(0)); - } - - @Test - void highlightsReturnsNullWhenHighlightIsMissing() { - java.util.LinkedHashMap map = new java.util.LinkedHashMap<>(); - map.put("name", ExprValueUtils.stringValue("John")); - map.put("_highlight", ExprValueUtils.LITERAL_MISSING); - ExprValue row = ExprTupleValue.fromExprValueMap(map); - QueryResult response = new QueryResult(schema, Collections.singletonList(row), Cursor.None); + new QueryResult(schemaWithHighlight, Collections.singletonList(row), Cursor.None); - List> highlights = response.highlights(); - assertEquals(1, highlights.size()); - assertNull(highlights.get(0)); - } - - @Test - void highlightsReturnsNullWhenHighlightIsNull() { - java.util.LinkedHashMap map = new java.util.LinkedHashMap<>(); - map.put("name", ExprValueUtils.stringValue("John")); - map.put("_highlight", ExprValueUtils.LITERAL_NULL); - ExprValue row = ExprTupleValue.fromExprValueMap(map); - QueryResult response = new QueryResult(schema, Collections.singletonList(row), Cursor.None); - - List> highlights = response.highlights(); - assertEquals(1, highlights.size()); - assertNull(highlights.get(0)); + for (Object[] objects : response) { + assertEquals(3, objects.length); + assertEquals("John", objects[0]); + assertEquals(20, objects[1]); + // _highlight value should be present (a map) + assertTrue(objects[2] instanceof Map); + } } } diff --git a/protocol/src/test/java/org/opensearch/sql/protocol/response/format/SimpleJsonResponseFormatterTest.java b/protocol/src/test/java/org/opensearch/sql/protocol/response/format/SimpleJsonResponseFormatterTest.java index 5377c1c4d46..778b97f892c 100644 --- a/protocol/src/test/java/org/opensearch/sql/protocol/response/format/SimpleJsonResponseFormatterTest.java +++ b/protocol/src/test/java/org/opensearch/sql/protocol/response/format/SimpleJsonResponseFormatterTest.java @@ -168,6 +168,12 @@ void formatResponseWithArrayValue() { @Test void formatResponseWithHighlights() { + ExecutionEngine.Schema schemaWithHighlight = + new ExecutionEngine.Schema( + ImmutableList.of( + new ExecutionEngine.Schema.Column("firstname", null, STRING), + new ExecutionEngine.Schema.Column("age", null, INTEGER), + new ExecutionEngine.Schema.Column("_highlight", null, STRING))); java.util.LinkedHashMap map = new java.util.LinkedHashMap<>(); map.put("firstname", ExprValueUtils.stringValue("John")); map.put("age", ExprValueUtils.integerValue(20)); @@ -176,14 +182,14 @@ void formatResponseWithHighlights() { ExprTupleValue.fromExprValueMap( Map.of("firstname", collectionValue(List.of("John"))))); ExprValue row = ExprTupleValue.fromExprValueMap(map); - QueryResult response = new QueryResult(schema, Collections.singletonList(row)); + QueryResult response = new QueryResult(schemaWithHighlight, Collections.singletonList(row)); SimpleJsonResponseFormatter formatter = new SimpleJsonResponseFormatter(COMPACT); String result = formatter.format(response); assertEquals( "{\"schema\":[{\"name\":\"firstname\",\"type\":\"string\"}," - + "{\"name\":\"age\",\"type\":\"integer\"}]," - + "\"datarows\":[[\"John\",20]]," - + "\"highlights\":[{\"firstname\":[\"John\"]}]," + + "{\"name\":\"age\",\"type\":\"integer\"}," + + "{\"name\":\"_highlight\",\"type\":\"string\"}]," + + "\"datarows\":[[\"John\",20,{\"firstname\":[\"John\"]}]]," + "\"total\":1,\"size\":1}", result); } From 08d3c20bfc91f9c25828a6a9d28699d9edf449b6 Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Thu, 19 Mar 2026 12:10:35 -0700 Subject: [PATCH 25/25] fix security IT Signed-off-by: Jialiang Liang --- .../security/CalciteCrossClusterSearchIT.java | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/integ-test/src/test/java/org/opensearch/sql/security/CalciteCrossClusterSearchIT.java b/integ-test/src/test/java/org/opensearch/sql/security/CalciteCrossClusterSearchIT.java index 20a2a539c70..e55e406de7b 100644 --- a/integ-test/src/test/java/org/opensearch/sql/security/CalciteCrossClusterSearchIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/security/CalciteCrossClusterSearchIT.java @@ -16,6 +16,7 @@ import java.io.IOException; import java.util.Locale; import org.apache.commons.text.StringEscapeUtils; +import org.json.JSONArray; import org.json.JSONObject; import org.junit.Assert; import org.junit.Test; @@ -485,10 +486,18 @@ public void testCrossClusterHighlightWildcard() throws IOException { String.format( "search source=%s \\\"Hattie\\\" | fields firstname", TEST_INDEX_BANK_REMOTE), "[\"*\"]"); - verifySchema(result, schema("firstname", "string")); - verifyDataRows(result, rows("Hattie")); - var highlights = result.getJSONArray("highlights"); - var highlight = highlights.getJSONObject(0); + JSONArray schemaArray = result.getJSONArray("schema"); + int hlIndex = -1; + for (int i = 0; i < schemaArray.length(); i++) { + if ("_highlight".equals(schemaArray.getJSONObject(i).getString("name"))) { + hlIndex = i; + break; + } + } + Assert.assertTrue("Schema should contain _highlight column", hlIndex >= 0); + JSONArray dataRows = result.getJSONArray("datarows"); + Assert.assertTrue("Should have at least one row", dataRows.length() > 0); + var highlight = dataRows.getJSONArray(0).getJSONObject(hlIndex); Assert.assertTrue( "Highlight should contain Hattie", highlight.getJSONArray("firstname").getString(0).contains("Hattie"));