From ad4449519ab7178abb7edb283bc5faeeaad4ec77 Mon Sep 17 00:00:00 2001 From: Thy Tran <58045538+ThyTran1402@users.noreply.github.com> Date: Mon, 9 Mar 2026 19:22:33 -0400 Subject: [PATCH 1/5] added cloudwatch style contains operator Signed-off-by: Thy Tran <58045538+ThyTran1402@users.noreply.github.com> --- .../opensearch/sql/ppl/WhereCommandIT.java | 28 +++++++++++++++++ .../src/main/antlr4/OpenSearchPPLLexer.g4 | 1 + .../src/main/antlr4/OpenSearchPPLParser.g4 | 1 + ppl/src/main/antlr/OpenSearchPPLLexer.g4 | 1 + ppl/src/main/antlr/OpenSearchPPLParser.g4 | 2 ++ .../sql/ppl/parser/AstExpressionBuilder.java | 9 ++++++ .../ppl/parser/AstExpressionBuilderTest.java | 30 +++++++++++++++++++ 7 files changed, 72 insertions(+) diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/WhereCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/WhereCommandIT.java index 224ebd1e1f6..6366c7b7604 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/WhereCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/WhereCommandIT.java @@ -144,6 +144,34 @@ public void testLikeOperatorCaseInsensitive() throws IOException { verifyDataRows(result3, rows("Amber")); } + @Test + public void testContainsOperator() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | where firstname contains 'mbe' | fields firstname", + TEST_INDEX_ACCOUNT)); + verifyDataRows(result, rows("Amber")); + + result = + executeQuery( + String.format( + "source=%s | where firstname contains 'zzz' | fields firstname", + TEST_INDEX_ACCOUNT)); + assertEquals(0, result.getInt("total")); + } + + @Test + public void testContainsOperatorCaseInsensitive() throws IOException { + // contains uses ilike semantics - case insensitive + JSONObject result = + executeQuery( + String.format( + "source=%s | where firstname contains 'MBE' | fields firstname", + TEST_INDEX_ACCOUNT)); + verifyDataRows(result, rows("Amber")); + } + @Test public void testIsNullFunction() throws IOException { JSONObject result = diff --git a/language-grammar/src/main/antlr4/OpenSearchPPLLexer.g4 b/language-grammar/src/main/antlr4/OpenSearchPPLLexer.g4 index b7dc4b7286d..2248374d8d9 100644 --- a/language-grammar/src/main/antlr4/OpenSearchPPLLexer.g4 +++ b/language-grammar/src/main/antlr4/OpenSearchPPLLexer.g4 @@ -413,6 +413,7 @@ REDUCE: 'REDUCE'; // BOOL FUNCTIONS LIKE: 'LIKE'; +CONTAINS: 'CONTAINS'; ISNULL: 'ISNULL'; ISNOTNULL: 'ISNOTNULL'; BETWEEN: 'BETWEEN'; diff --git a/language-grammar/src/main/antlr4/OpenSearchPPLParser.g4 b/language-grammar/src/main/antlr4/OpenSearchPPLParser.g4 index cae57b53181..0b8203b7ec2 100644 --- a/language-grammar/src/main/antlr4/OpenSearchPPLParser.g4 +++ b/language-grammar/src/main/antlr4/OpenSearchPPLParser.g4 @@ -945,6 +945,7 @@ geoIpProperty | GREATER | NOT_GREATER | REGEXP + | CONTAINS ; singleFieldRelevanceFunctionName diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index 732d3944a68..bbb57729303 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -459,6 +459,7 @@ CAST: 'CAST'; // BOOL FUNCTIONS LIKE: 'LIKE'; ILIKE: 'ILIKE'; +CONTAINS: 'CONTAINS'; ISNULL: 'ISNULL'; ISNOTNULL: 'ISNOTNULL'; CIDRMATCH: 'CIDRMATCH'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 53cb4eda36c..09beb7f9ad9 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -1444,6 +1444,7 @@ positionFunctionName | REGEXP | LIKE | ILIKE + | CONTAINS ; singleFieldRelevanceFunctionName @@ -1609,6 +1610,7 @@ searchableKeyWord | ELSE | ARROW | BETWEEN + | CONTAINS | EXISTS | SOURCE | INDEX diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java index 471c0c2f1c9..137db9e03c6 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java @@ -213,6 +213,15 @@ public UnresolvedExpression visitCompareExpr(CompareExprContext ctx) { String operator = ctx.comparisonOperator().getText(); if ("==".equals(operator)) { operator = EQUAL.getName().getFunctionName(); + } else if ("contains".equalsIgnoreCase(operator)) { + UnresolvedExpression left = visit(ctx.left); + UnresolvedExpression right = visit(ctx.right); + if (!(right instanceof Literal) || ((Literal) right).getType() != DataType.STRING) { + throw new SemanticCheckException( + "The right-hand side of 'contains' must be a string literal"); + } + String wrapped = "%" + ((Literal) right).getValue() + "%"; + return new Compare(ILIKE.getName().getFunctionName(), left, new Literal(wrapped, DataType.STRING)); } else if (LIKE.getName().getFunctionName().equalsIgnoreCase(operator) && UnresolvedPlanHelper.isCalciteEnabled(astBuilder.getSettings())) { operator = diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java index b316e461889..6bce5ecddd9 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java @@ -66,6 +66,7 @@ import org.opensearch.sql.ast.tree.Chart; import org.opensearch.sql.calcite.plan.OpenSearchConstants; import org.opensearch.sql.common.antlr.SyntaxCheckException; +import org.opensearch.sql.exception.SemanticCheckException; public class AstExpressionBuilderTest extends AstBuilderTest { @Test @@ -227,6 +228,35 @@ public void testLikeOperatorCaseInsensitive() { filter(relation("t"), compare("ilike", field("a"), stringLiteral("pattern")))); } + @Test + public void testContainsOperatorExpr() { + assertEqual( + "source=t | where a contains 'hello'", + filter(relation("t"), compare("ilike", field("a"), stringLiteral("%hello%")))); + + assertEqual( + "source=t | where message contains 'err'", + filter(relation("t"), compare("ilike", field("message"), stringLiteral("%err%")))); + } + + @Test + public void testContainsOperatorCaseInsensitive() { + assertEqual( + "source=t | where a CONTAINS 'hello'", + filter(relation("t"), compare("ilike", field("a"), stringLiteral("%hello%")))); + + assertEqual( + "source=t | where a Contains 'hello'", + filter(relation("t"), compare("ilike", field("a"), stringLiteral("%hello%")))); + } + + @Test + public void testContainsOperatorNonLiteralRhsThrows() { + assertThrows( + SemanticCheckException.class, + () -> assertEqual("source=t | where a contains b", (Node) null)); + } + @Test public void testBooleanIsNullFunction() { assertEqual( From 852e5fb4c9761e9f7b6b99ddbade3e4c5ceb6de5 Mon Sep 17 00:00:00 2001 From: Thy Tran <58045538+ThyTran1402@users.noreply.github.com> Date: Tue, 10 Mar 2026 11:05:02 -0400 Subject: [PATCH 2/5] added escape \ before wrapping with % and udpated condition.md, index.md Signed-off-by: Thy Tran <58045538+ThyTran1402@users.noreply.github.com> --- docs/user/ppl/functions/condition.md | 66 +++++++++++++++++++ docs/user/ppl/functions/index.md | 1 + .../sql/ppl/parser/AstExpressionBuilder.java | 4 +- .../ppl/parser/AstExpressionBuilderTest.java | 24 +++++++ 4 files changed, 94 insertions(+), 1 deletion(-) diff --git a/docs/user/ppl/functions/condition.md b/docs/user/ppl/functions/condition.md index 512b5edbbe6..3be2a29e7e3 100644 --- a/docs/user/ppl/functions/condition.md +++ b/docs/user/ppl/functions/condition.md @@ -758,6 +758,72 @@ fetched rows / total rows = 1/1 +-----+ ``` +## CONTAINS + +### Description + +Usage: `field contains 'substring'` returns TRUE if the field value contains the given substring (case-insensitive), FALSE otherwise. + +The `contains` operator is a CloudWatch-style comparison operator that performs case-insensitive substring matching. It is sugar for an `ilike` comparison with `%substring%` wildcards. + +Syntax: ` contains ''` + +- The left-hand side must be a field reference. +- The right-hand side must be a string literal. Using a field reference on the right-hand side will raise a semantic error. +- Matching is case-insensitive. + +**Argument type:** `STRING` +**Return type:** `BOOLEAN` + +### Example + +Basic substring filter: + +```ppl +source=logs +| where message contains 'error' +| fields timestamp, message +``` + +Expected output: + +```text +fetched rows / total rows = 2/5 ++---------------------+----------------------------------+ +| timestamp | message | +|---------------------+----------------------------------| +| 2024-01-15 10:23:45 | ERROR: Connection timeout | +| 2024-01-15 11:02:10 | An error occurred during startup | ++---------------------+----------------------------------+ +``` + +Case-insensitive matching (all of the following are equivalent): + +```ppl +source=logs | where message contains 'error' +source=logs | where message CONTAINS 'error' +source=logs | where message Contains 'Error' +``` + +Combining with other conditions: + +```ppl +source=accounts +| where employer contains 'tech' AND age > 30 +| fields firstname, employer, age +``` + +Expected output: + +```text +fetched rows / total rows = 1/4 ++-----------+----------------+-----+ +| firstname | employer | age | ++-----------+----------------+-----| +| Amber | TechCorp | 32 | ++-----------+----------------+-----+ +``` + ## REGEXP_MATCH ### Description diff --git a/docs/user/ppl/functions/index.md b/docs/user/ppl/functions/index.md index 146288e19dc..cdfbbd201ce 100644 --- a/docs/user/ppl/functions/index.md +++ b/docs/user/ppl/functions/index.md @@ -57,6 +57,7 @@ PPL supports a wide range of built-in functions for data processing and analysis - [EARLIEST](condition.md/#earliest) - [LATEST](condition.md/#latest) - [REGEXP_MATCH](condition.md/#regexp_match) + - [CONTAINS](condition.md/#contains) - [Type Conversion Functions](conversion.md) - [CAST](conversion.md/#cast) diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java index 137db9e03c6..1cb33437b90 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java @@ -220,7 +220,9 @@ public UnresolvedExpression visitCompareExpr(CompareExprContext ctx) { throw new SemanticCheckException( "The right-hand side of 'contains' must be a string literal"); } - String wrapped = "%" + ((Literal) right).getValue() + "%"; + String raw = ((Literal) right).getValue().toString(); + String escaped = raw.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_"); + String wrapped = "%" + escaped + "%"; return new Compare(ILIKE.getName().getFunctionName(), left, new Literal(wrapped, DataType.STRING)); } else if (LIKE.getName().getFunctionName().equalsIgnoreCase(operator) && UnresolvedPlanHelper.isCalciteEnabled(astBuilder.getSettings())) { diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java index 6bce5ecddd9..af10b53defb 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java @@ -257,6 +257,30 @@ public void testContainsOperatorNonLiteralRhsThrows() { () -> assertEqual("source=t | where a contains b", (Node) null)); } + @Test + public void testContainsOperatorEscapesSpecialChars() { + // % must be escaped so it is treated as a literal character, not a wildcard + assertEqual( + "source=t | where a contains '%'", + filter(relation("t"), compare("ilike", field("a"), stringLiteral("%\\%%")))); + + // _ must be escaped so it is treated as a literal character, not a single-char wildcard + assertEqual( + "source=t | where a contains '_'", + filter(relation("t"), compare("ilike", field("a"), stringLiteral("%\\_%")))); + + // backslash in PPL is written as '\\'; unquotes to \, then escaped to \\ in the pattern + // Java: "source=t | where a contains '\\\\'" produces PPL: source=t | where a contains '\\' + assertEqual( + "source=t | where a contains '\\\\'", + filter(relation("t"), compare("ilike", field("a"), stringLiteral("%\\\\%")))); + + // mixed special characters are all escaped + assertEqual( + "source=t | where a contains 'foo%bar_baz'", + filter(relation("t"), compare("ilike", field("a"), stringLiteral("%foo\\%bar\\_baz%")))); + } + @Test public void testBooleanIsNullFunction() { assertEqual( From f5b3bbf10191de85a5a73e6c73f6828de46db618 Mon Sep 17 00:00:00 2001 From: Thy Tran <58045538+ThyTran1402@users.noreply.github.com> Date: Mon, 16 Mar 2026 15:04:56 -0400 Subject: [PATCH 3/5] cleanup Signed-off-by: Thy Tran <58045538+ThyTran1402@users.noreply.github.com> --- .../org/opensearch/sql/ppl/parser/AstExpressionBuilder.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java index 1cb33437b90..c58eca20575 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java @@ -223,7 +223,8 @@ public UnresolvedExpression visitCompareExpr(CompareExprContext ctx) { String raw = ((Literal) right).getValue().toString(); String escaped = raw.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_"); String wrapped = "%" + escaped + "%"; - return new Compare(ILIKE.getName().getFunctionName(), left, new Literal(wrapped, DataType.STRING)); + return new Compare( + ILIKE.getName().getFunctionName(), left, new Literal(wrapped, DataType.STRING)); } else if (LIKE.getName().getFunctionName().equalsIgnoreCase(operator) && UnresolvedPlanHelper.isCalciteEnabled(astBuilder.getSettings())) { operator = From ac8c74fdeaff0644209aaff948141ecbd8ffdebc Mon Sep 17 00:00:00 2001 From: Thy Tran <58045538+ThyTran1402@users.noreply.github.com> Date: Thu, 19 Mar 2026 21:09:29 -0400 Subject: [PATCH 4/5] cleanup and fixed doctests and integ tests Signed-off-by: Thy Tran <58045538+ThyTran1402@users.noreply.github.com> --- docs/user/ppl/functions/condition.md | 41 +++++++++---------- .../opensearch/sql/ppl/WhereCommandIT.java | 4 +- 2 files changed, 22 insertions(+), 23 deletions(-) diff --git a/docs/user/ppl/functions/condition.md b/docs/user/ppl/functions/condition.md index 3be2a29e7e3..cb5dff9107e 100644 --- a/docs/user/ppl/functions/condition.md +++ b/docs/user/ppl/functions/condition.md @@ -780,48 +780,47 @@ Syntax: ` contains ''` Basic substring filter: ```ppl -source=logs -| where message contains 'error' -| fields timestamp, message +source=accounts +| where firstname contains 'mbe' +| fields firstname, age ``` Expected output: ```text -fetched rows / total rows = 2/5 -+---------------------+----------------------------------+ -| timestamp | message | -|---------------------+----------------------------------| -| 2024-01-15 10:23:45 | ERROR: Connection timeout | -| 2024-01-15 11:02:10 | An error occurred during startup | -+---------------------+----------------------------------+ +fetched rows / total rows = 1/1 ++-----------+-----+ +| firstname | age | +|-----------+-----| +| Amber | 32 | ++-----------+-----+ ``` Case-insensitive matching (all of the following are equivalent): -```ppl -source=logs | where message contains 'error' -source=logs | where message CONTAINS 'error' -source=logs | where message Contains 'Error' +```ppl ignore +source=accounts | where firstname contains 'mbe' +source=accounts | where firstname CONTAINS 'MBE' +source=accounts | where firstname Contains 'Mbe' ``` Combining with other conditions: ```ppl source=accounts -| where employer contains 'tech' AND age > 30 +| where employer contains 'ami' AND age > 30 | fields firstname, employer, age ``` Expected output: ```text -fetched rows / total rows = 1/4 -+-----------+----------------+-----+ -| firstname | employer | age | -+-----------+----------------+-----| -| Amber | TechCorp | 32 | -+-----------+----------------+-----+ +fetched rows / total rows = 1/1 ++-----------+----------+-----+ +| firstname | employer | age | +|-----------+----------+-----| +| Amber | Pyrami | 32 | ++-----------+----------+-----+ ``` ## REGEXP_MATCH diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/WhereCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/WhereCommandIT.java index 6366c7b7604..a386987e532 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/WhereCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/WhereCommandIT.java @@ -151,7 +151,7 @@ public void testContainsOperator() throws IOException { String.format( "source=%s | where firstname contains 'mbe' | fields firstname", TEST_INDEX_ACCOUNT)); - verifyDataRows(result, rows("Amber")); + verifyDataRows(result, rows("Amber"), rows("Chambers")); result = executeQuery( @@ -169,7 +169,7 @@ public void testContainsOperatorCaseInsensitive() throws IOException { String.format( "source=%s | where firstname contains 'MBE' | fields firstname", TEST_INDEX_ACCOUNT)); - verifyDataRows(result, rows("Amber")); + verifyDataRows(result, rows("Amber"), rows("Chambers")); } @Test From 0dc8e95708ae2f7c8138920a0356055ee9c939f5 Mon Sep 17 00:00:00 2001 From: Thy Tran <58045538+ThyTran1402@users.noreply.github.com> Date: Fri, 20 Mar 2026 15:59:08 -0400 Subject: [PATCH 5/5] added and registered ilike() Signed-off-by: Thy Tran <58045538+ThyTran1402@users.noreply.github.com> --- .../operator/predicate/BinaryPredicateOperators.java | 7 +++++++ .../storage/script/filter/FilterQueryBuilder.java | 1 + 2 files changed, 8 insertions(+) diff --git a/core/src/main/java/org/opensearch/sql/expression/operator/predicate/BinaryPredicateOperators.java b/core/src/main/java/org/opensearch/sql/expression/operator/predicate/BinaryPredicateOperators.java index 3543fc22a1c..a041589285f 100644 --- a/core/src/main/java/org/opensearch/sql/expression/operator/predicate/BinaryPredicateOperators.java +++ b/core/src/main/java/org/opensearch/sql/expression/operator/predicate/BinaryPredicateOperators.java @@ -53,6 +53,7 @@ public static void register(BuiltinFunctionRepository repository) { repository.register(greater()); repository.register(gte()); repository.register(like()); + repository.register(ilike()); repository.register(notLike()); repository.register(regexp()); } @@ -391,6 +392,12 @@ private static DefaultFunctionResolver like() { impl(nullMissingHandling(OperatorUtils::matches3), BOOLEAN, STRING, STRING, BOOLEAN)); } + private static DefaultFunctionResolver ilike() { + return define( + BuiltinFunctionName.ILIKE.getName(), + impl(nullMissingHandling(OperatorUtils::matches2), BOOLEAN, STRING, STRING)); + } + private static DefaultFunctionResolver regexp() { return define( BuiltinFunctionName.REGEXP.getName(), diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilder.java index b7d097b4b88..6ca25b7e9b7 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilder.java @@ -68,6 +68,7 @@ public ScriptQueryUnSupportedException(String message) { .put(BuiltinFunctionName.LTE.getName(), new RangeQuery(Comparison.LTE)) .put(BuiltinFunctionName.GTE.getName(), new RangeQuery(Comparison.GTE)) .put(BuiltinFunctionName.LIKE.getName(), new LikeQuery()) + .put(BuiltinFunctionName.ILIKE.getName(), new LikeQuery()) .put(BuiltinFunctionName.MATCH.getName(), new MatchQuery()) .put(BuiltinFunctionName.MATCH_PHRASE.getName(), new MatchPhraseQuery()) .put(BuiltinFunctionName.MATCHPHRASE.getName(), new MatchPhraseQuery())