From bbacdcd4092a5c6896aca2eca9e2b893abe34311 Mon Sep 17 00:00:00 2001 From: ps48 Date: Wed, 3 Sep 2025 13:07:33 -0700 Subject: [PATCH 01/10] mvjoin support in PPL Caclite Signed-off-by: ps48 --- .../function/BuiltinFunctionName.java | 1 + .../expression/function/PPLFuncImpTable.java | 17 +++ docs/user/ppl/functions/collection.rst | 48 +++++++ .../remote/CalciteArrayFunctionIT.java | 92 +++++++++++++ .../sql/calcite/remote/CalciteExplainIT.java | 11 ++ ppl/src/main/antlr/OpenSearchPPLLexer.g4 | 1 + ppl/src/main/antlr/OpenSearchPPLParser.g4 | 1 + .../calcite/CalcitePPLArrayFunctionTest.java | 129 ++++++++++++++++++ .../calcite/CalcitePPLFunctionTypeTest.java | 29 ++++ .../ppl/utils/PPLQueryDataAnonymizerTest.java | 20 ++- 10 files changed, 337 insertions(+), 12 deletions(-) create mode 100644 ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLArrayFunctionTest.java diff --git a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java index 257f9fdaa35..16fe2ea151c 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java @@ -62,6 +62,7 @@ public enum BuiltinFunctionName { /** Collection functions */ ARRAY(FunctionName.of("array")), ARRAY_LENGTH(FunctionName.of("array_length")), + MVJOIN(FunctionName.of("mvjoin")), FORALL(FunctionName.of("forall")), EXISTS(FunctionName.of("exists")), FILTER(FunctionName.of("filter")), diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java index c2a22497417..dc2bdf8145c 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java @@ -144,6 +144,7 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.MULTIPLY; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MULTIPLYFUNCTION; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MULTI_MATCH; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MVJOIN; import static org.opensearch.sql.expression.function.BuiltinFunctionName.NOT; import static org.opensearch.sql.expression.function.BuiltinFunctionName.NOTEQUAL; import static org.opensearch.sql.expression.function.BuiltinFunctionName.NOW; @@ -816,6 +817,22 @@ void populate() { registerOperator(WEEKOFYEAR, PPLBuiltinOperators.WEEK); registerOperator(INTERNAL_PATTERN_PARSER, PPLBuiltinOperators.PATTERN_PARSER); + + // Register MVJOIN with two different implementations + // For single string values - just return the string (register this first so it's checked + // first) + register( + MVJOIN, + (FunctionImp2) (builder, value, delimiter) -> value, + PPLTypeChecker.family(SqlTypeFamily.CHARACTER, SqlTypeFamily.CHARACTER)); + // For arrays - use Calcite's ARRAY_JOIN + register( + MVJOIN, + (FunctionImp2) + (builder, array, delimiter) -> + builder.makeCall(SqlLibraryOperators.ARRAY_JOIN, array, delimiter), + PPLTypeChecker.family(SqlTypeFamily.ARRAY, SqlTypeFamily.CHARACTER)); + registerOperator(ARRAY, PPLBuiltinOperators.ARRAY); registerOperator(ARRAY_LENGTH, SqlLibraryOperators.ARRAY_LENGTH); registerOperator(FORALL, PPLBuiltinOperators.FORALL); diff --git a/docs/user/ppl/functions/collection.rst b/docs/user/ppl/functions/collection.rst index 05b3c1834bf..daa86c614e9 100644 --- a/docs/user/ppl/functions/collection.rst +++ b/docs/user/ppl/functions/collection.rst @@ -40,6 +40,54 @@ Example:: | ["1", "demo"] | +----------------------------------+ +MVJOIN +------ + +Description +>>>>>>>>>>> + +Version: 3.2.0 + +Usage: mvjoin(array, delimiter) joins string array elements into a single string, separated by the specified delimiter. NULL elements are excluded from the output. Only string arrays are supported. When given a single string value instead of an array, the function returns it unchanged (the delimiter parameter is ignored). + +Argument type: array: ARRAY of STRING or STRING, delimiter: STRING + +Return type: STRING + +Example:: + + PPL> source=people | eval result = mvjoin(array('a', 'b', 'c'), ',') | fields result | head 1 + fetched rows / total rows = 1/1 + +----------------------------------+ + | result | + |----------------------------------| + | a,b,c | + +----------------------------------+ + + PPL> source=people | eval result = mvjoin(array('1', '2', '3'), ' | ') | fields result | head 1 + fetched rows / total rows = 1/1 + +----------------------------------+ + | result | + |----------------------------------| + | 1 | 2 | 3 | + +----------------------------------+ + + PPL> source=people | eval result = mvjoin(array('a', null, 'c'), ',') | fields result | head 1 + fetched rows / total rows = 1/1 + +----------------------------------+ + | result | + |----------------------------------| + | a,c | + +----------------------------------+ + + PPL> source=people | eval result = mvjoin('hello', ',') | fields result | head 1 + fetched rows / total rows = 1/1 + +----------------------------------+ + | result | + |----------------------------------| + | hello | + +----------------------------------+ + ARRAY_LENGTH ------------ diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteArrayFunctionIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteArrayFunctionIT.java index 905b46cc36b..2408d1dc75b 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteArrayFunctionIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteArrayFunctionIT.java @@ -241,4 +241,96 @@ public void testReduceWithUDF() throws IOException { verifyDataRows(actual, rows(60)); } + + @Test + public void testMvjoinWithStringArray() throws IOException { + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval result = mvjoin(array('a', 'b', 'c'), ',') | fields result | head" + + " 1", + TEST_INDEX_BANK)); + + verifySchema(actual, schema("result", "string")); + verifyDataRows(actual, rows("a,b,c")); + } + + @Test + public void testMvjoinWithStringifiedNumbers() throws IOException { + // Note: mvjoin only supports string arrays + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval result = mvjoin(array('1', '2', '3'), ' | ') | fields result |" + + " head 1", + TEST_INDEX_BANK)); + + verifySchema(actual, schema("result", "string")); + verifyDataRows(actual, rows("1 | 2 | 3")); + } + + @Test + public void testMvjoinWithMixedStringValues() throws IOException { + // mvjoin only supports string arrays + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval result = mvjoin(array('1', 'text', '2.5'), ';') | fields result |" + + " head 1", + TEST_INDEX_BANK)); + + verifySchema(actual, schema("result", "string")); + verifyDataRows(actual, rows("1;text;2.5")); + } + + @Test + public void testMvjoinWithEmptyArray() throws IOException { + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval result = mvjoin(array(), '-') | fields result | head 1", + TEST_INDEX_BANK)); + + verifySchema(actual, schema("result", "string")); + verifyDataRows(actual, rows("")); + } + + @Test + public void testMvjoinWithSingleStringValue() throws IOException { + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval result = mvjoin('hello', ',') | fields result | head 1", + TEST_INDEX_BANK)); + + verifySchema(actual, schema("result", "string")); + verifyDataRows(actual, rows("hello")); + } + + @Test + public void testMvjoinWithStringBooleans() throws IOException { + // mvjoin only supports string arrays + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval result = mvjoin(array('true', 'false', 'true'), '|') | fields" + + " result | head 1", + TEST_INDEX_BANK)); + + verifySchema(actual, schema("result", "string")); + verifyDataRows(actual, rows("true|false|true")); + } + + @Test + public void testMvjoinWithSpecialDelimiters() throws IOException { + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval result = mvjoin(array('apple', 'banana', 'cherry'), ' AND ') |" + + " fields result | head 1", + TEST_INDEX_BANK)); + + verifySchema(actual, schema("result", "string")); + verifyDataRows(actual, rows("apple AND banana AND cherry")); + } } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index a1d5574763f..088ee8f4cf6 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -5,6 +5,8 @@ package org.opensearch.sql.calcite.remote; +import static org.opensearch.sql.legacy.TestUtils.*; +import static org.junit.Assert.assertTrue; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_LOGS; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_NESTED_SIMPLE; @@ -557,6 +559,15 @@ public void testExplainAppendCommand() throws IOException { TEST_INDEX_BANK, TEST_INDEX_BANK))); } + @Test + public void testMvjoinExplain() throws IOException { + String query = + "source=opensearch-sql_test_index_account | eval result = mvjoin(array('a', 'b', 'c'), ',')" + + " | fields result | head 1"; + var result = explainQueryToString(query); + assertTrue(result.contains("ARRAY_JOIN")); + assertTrue(result.contains("ARRAY")); + } @Test public void testPushdownLimitIntoAggregation() throws IOException { diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index c86f12dd764..4c37be2f318 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -421,6 +421,7 @@ ISBLANK: 'ISBLANK'; // COLLECTION FUNCTIONS ARRAY: 'ARRAY'; ARRAY_LENGTH: 'ARRAY_LENGTH'; +MVJOIN: 'MVJOIN'; FORALL: 'FORALL'; FILTER: 'FILTER'; TRANSFORM: 'TRANSFORM'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 87230eedd63..6f1499affcd 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -930,6 +930,7 @@ geoipFunctionName collectionFunctionName : ARRAY | ARRAY_LENGTH + | MVJOIN | FORALL | EXISTS | FILTER diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLArrayFunctionTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLArrayFunctionTest.java new file mode 100644 index 00000000000..981ca112b21 --- /dev/null +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLArrayFunctionTest.java @@ -0,0 +1,129 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ppl.calcite; + +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.test.CalciteAssert; +import org.junit.Test; + +public class CalcitePPLArrayFunctionTest extends CalcitePPLAbstractTest { + + public CalcitePPLArrayFunctionTest() { + super(CalciteAssert.SchemaSpec.SCOTT_WITH_TEMPORAL); + } + + @Test + public void testMvjoinWithStringArray() { + String ppl = + "source=EMP | eval joined = mvjoin(array('a', 'b', 'c'), ',') | head 1 | fields joined"; + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(joined=[$8])\n" + + " LogicalSort(fetch=[1])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], joined=[ARRAY_JOIN(array('a', 'b', 'c'), ',')])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedResult = "joined=a,b,c\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT ARRAY_JOIN(`array`('a', 'b', 'c'), ',') `joined`\n" + + "FROM `scott`.`EMP`\n" + + "LIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testMvjoinWithSingleStringValue() { + String ppl = "source=EMP | eval joined = mvjoin('hello', ',') | head 1 | fields joined"; + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(joined=[$8])\n" + + " LogicalSort(fetch=[1])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], joined=['hello':VARCHAR])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedResult = "joined=hello\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = "SELECT 'hello' `joined`\n" + "FROM `scott`.`EMP`\n" + "LIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testMvjoinWithDifferentDelimiter() { + String ppl = + "source=EMP | eval joined = mvjoin(array('apple', 'banana', 'cherry'), ' | ') | head 1 |" + + " fields joined"; + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(joined=[$8])\n" + + " LogicalSort(fetch=[1])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], joined=[ARRAY_JOIN(array('apple':VARCHAR," + + " 'banana':VARCHAR, 'cherry':VARCHAR), ' | ':VARCHAR)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedResult = "joined=apple | banana | cherry\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT ARRAY_JOIN(`array`('apple', 'banana', 'cherry'), ' | ') `joined`\n" + + "FROM `scott`.`EMP`\n" + + "LIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testMvjoinWithEmptyArray() { + String ppl = "source=EMP | eval joined = mvjoin(array(), ',') | head 1 | fields joined"; + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(joined=[$8])\n" + + " LogicalSort(fetch=[1])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], joined=[ARRAY_JOIN(array(), ',')])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedResult = "joined=\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT ARRAY_JOIN(`array`(), ',') `joined`\n" + "FROM `scott`.`EMP`\n" + "LIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testMvjoinWithFieldReference() { + String ppl = + "source=EMP | eval joined = mvjoin(array(ENAME, JOB), '-') | head 1 | fields joined"; + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(joined=[$8])\n" + + " LogicalSort(fetch=[1])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], joined=[ARRAY_JOIN(array($1, $2), '-')])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT ARRAY_JOIN(`array`(`ENAME`, `JOB`), '-') `joined`\n" + + "FROM `scott`.`EMP`\n" + + "LIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } +} diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLFunctionTypeTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLFunctionTypeTest.java index 500d6873f89..44db59b314c 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLFunctionTypeTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLFunctionTypeTest.java @@ -175,4 +175,33 @@ public void testLog2WithWrongArgShouldThrow() { "source=EMP | eval log2 = log2(ENAME, JOB) | fields log2", "LOG2 function expects {[INTEGER]|[DOUBLE]}, but got [STRING,STRING]"); } + + @Test + public void testMvjoinRejectsNonStringValues() { + // mvjoin should reject non-string single values + Exception e = + Assert.assertThrows( + ExpressionEvaluationException.class, + () -> + getRelNode("source=EMP | eval result = mvjoin(42, ',') | fields result | head 1")); + + verifyErrorMessageContains( + e, "MVJOIN function expects {[STRING,STRING],[ARRAY,STRING]}, but got [INTEGER,STRING]"); + } + + @Test + public void testMvjoinRejectsNumericArrays() { + // mvjoin should reject non-string arrays at runtime + // Note: Type checking doesn't happen at PPL level for array element types + Exception e = + Assert.assertThrows( + RuntimeException.class, + () -> + getRelNode( + "source=EMP | eval result = mvjoin(array(1, 2, 3), ',') | fields result | head" + + " 1")); + + // The actual error comes from Calcite's ARRAY_JOIN operator + verifyErrorMessageContains(e, "arrayToString supports only String or ByteString"); + } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index c805e5a5dfb..58f0455c379 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -582,20 +582,16 @@ public void testRegex() { } @Test - public void testRexCommand() { - when(settings.getSettingValue(Key.PPL_REX_MAX_MATCH_LIMIT)).thenReturn(10); - - assertEquals( - "source=t | rex field=message mode=extract \"(?[A-Z]+)\" max_match=1", - anonymize("source=t | rex field=message \"(?[A-Z]+)\"")); + public void testMvjoin() { + // Test mvjoin with array of strings assertEquals( - "source=t | rex field=lastname mode=extract \"(?^[A-Z])\" max_match=1 | fields +" - + " lastname,initial", - anonymize( - "source=t | rex field=lastname \"(?^[A-Z])\" | fields lastname, initial")); + "source=t | eval result=mvjoin(array(***,***,***),***) | fields + result", + anonymize("source=t | eval result=mvjoin(array('a', 'b', 'c'), ',') | fields result")); + + // Test mvjoin with single string value assertEquals( - "source=t | rex field=name mode=extract \"(?[A-Z])\" max_match=3", - anonymize("source=t | rex field=name \"(?[A-Z])\" max_match=3")); + "source=t | eval result=mvjoin(***,***) | fields + result", + anonymize("source=t | eval result=mvjoin('hello', ',') | fields result")); } @Test From 5259ac5370073b3f2dd34544f6db5c74517cf3bc Mon Sep 17 00:00:00 2001 From: ps48 Date: Wed, 3 Sep 2025 14:48:33 -0700 Subject: [PATCH 02/10] fix texts Signed-off-by: ps48 --- .../ppl/calcite/CalcitePPLFunctionTypeTest.java | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLFunctionTypeTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLFunctionTypeTest.java index 44db59b314c..eaa59f5b786 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLFunctionTypeTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLFunctionTypeTest.java @@ -188,20 +188,4 @@ public void testMvjoinRejectsNonStringValues() { verifyErrorMessageContains( e, "MVJOIN function expects {[STRING,STRING],[ARRAY,STRING]}, but got [INTEGER,STRING]"); } - - @Test - public void testMvjoinRejectsNumericArrays() { - // mvjoin should reject non-string arrays at runtime - // Note: Type checking doesn't happen at PPL level for array element types - Exception e = - Assert.assertThrows( - RuntimeException.class, - () -> - getRelNode( - "source=EMP | eval result = mvjoin(array(1, 2, 3), ',') | fields result | head" - + " 1")); - - // The actual error comes from Calcite's ARRAY_JOIN operator - verifyErrorMessageContains(e, "arrayToString supports only String or ByteString"); - } } From bf80878ec1008c003dcfc0247eeefc65da55091d Mon Sep 17 00:00:00 2001 From: ps48 Date: Wed, 3 Sep 2025 14:53:22 -0700 Subject: [PATCH 03/10] update docs Signed-off-by: ps48 --- docs/user/ppl/functions/collection.rst | 98 +++++++++++++------------- 1 file changed, 49 insertions(+), 49 deletions(-) diff --git a/docs/user/ppl/functions/collection.rst b/docs/user/ppl/functions/collection.rst index daa86c614e9..8c2e2c1b8a5 100644 --- a/docs/user/ppl/functions/collection.rst +++ b/docs/user/ppl/functions/collection.rst @@ -40,54 +40,6 @@ Example:: | ["1", "demo"] | +----------------------------------+ -MVJOIN ------- - -Description ->>>>>>>>>>> - -Version: 3.2.0 - -Usage: mvjoin(array, delimiter) joins string array elements into a single string, separated by the specified delimiter. NULL elements are excluded from the output. Only string arrays are supported. When given a single string value instead of an array, the function returns it unchanged (the delimiter parameter is ignored). - -Argument type: array: ARRAY of STRING or STRING, delimiter: STRING - -Return type: STRING - -Example:: - - PPL> source=people | eval result = mvjoin(array('a', 'b', 'c'), ',') | fields result | head 1 - fetched rows / total rows = 1/1 - +----------------------------------+ - | result | - |----------------------------------| - | a,b,c | - +----------------------------------+ - - PPL> source=people | eval result = mvjoin(array('1', '2', '3'), ' | ') | fields result | head 1 - fetched rows / total rows = 1/1 - +----------------------------------+ - | result | - |----------------------------------| - | 1 | 2 | 3 | - +----------------------------------+ - - PPL> source=people | eval result = mvjoin(array('a', null, 'c'), ',') | fields result | head 1 - fetched rows / total rows = 1/1 - +----------------------------------+ - | result | - |----------------------------------| - | a,c | - +----------------------------------+ - - PPL> source=people | eval result = mvjoin('hello', ',') | fields result | head 1 - fetched rows / total rows = 1/1 - +----------------------------------+ - | result | - |----------------------------------| - | hello | - +----------------------------------+ - ARRAY_LENGTH ------------ @@ -246,4 +198,52 @@ Example:: | result | |------------| | 80 | - +------------+ \ No newline at end of file + +------------+ + +MVJOIN +------ + +Description +>>>>>>>>>>> + +Version: 3.3.0 + +Usage: mvjoin(array, delimiter) joins string array elements into a single string, separated by the specified delimiter. NULL elements are excluded from the output. Only string arrays are supported. When given a single string value instead of an array, the function returns it unchanged (the delimiter parameter is ignored). + +Argument type: array: ARRAY of STRING or STRING, delimiter: STRING + +Return type: STRING + +Example:: + + PPL> source=people | eval result = mvjoin(array('a', 'b', 'c'), ',') | fields result | head 1 + fetched rows / total rows = 1/1 + +----------------------------------+ + | result | + |----------------------------------| + | a,b,c | + +----------------------------------+ + + PPL> source=people | eval result = mvjoin(array('1', '2', '3'), ' | ') | fields result | head 1 + fetched rows / total rows = 1/1 + +----------------------------------+ + | result | + |----------------------------------| + | 1 | 2 | 3 | + +----------------------------------+ + + PPL> source=people | eval result = mvjoin(array('a', null, 'c'), ',') | fields result | head 1 + fetched rows / total rows = 1/1 + +----------------------------------+ + | result | + |----------------------------------| + | a,c | + +----------------------------------+ + + PPL> source=people | eval result = mvjoin('hello', ',') | fields result | head 1 + fetched rows / total rows = 1/1 + +----------------------------------+ + | result | + |----------------------------------| + | hello | + +----------------------------------+ \ No newline at end of file From 8b8a1c152839d4034b64489c79c2224de2e274d4 Mon Sep 17 00:00:00 2001 From: ps48 Date: Wed, 3 Sep 2025 14:54:52 -0700 Subject: [PATCH 04/10] update doc examples Signed-off-by: ps48 --- docs/user/ppl/functions/collection.rst | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/docs/user/ppl/functions/collection.rst b/docs/user/ppl/functions/collection.rst index 8c2e2c1b8a5..b1cf5171b62 100644 --- a/docs/user/ppl/functions/collection.rst +++ b/docs/user/ppl/functions/collection.rst @@ -223,22 +223,6 @@ Example:: |----------------------------------| | a,b,c | +----------------------------------+ - - PPL> source=people | eval result = mvjoin(array('1', '2', '3'), ' | ') | fields result | head 1 - fetched rows / total rows = 1/1 - +----------------------------------+ - | result | - |----------------------------------| - | 1 | 2 | 3 | - +----------------------------------+ - - PPL> source=people | eval result = mvjoin(array('a', null, 'c'), ',') | fields result | head 1 - fetched rows / total rows = 1/1 - +----------------------------------+ - | result | - |----------------------------------| - | a,c | - +----------------------------------+ PPL> source=people | eval result = mvjoin('hello', ',') | fields result | head 1 fetched rows / total rows = 1/1 From 4875c8a7de28b193b1ca736f9b067b0595ebb2cb Mon Sep 17 00:00:00 2001 From: ps48 Date: Thu, 4 Sep 2025 10:09:03 -0700 Subject: [PATCH 05/10] rebase main, update test Signed-off-by: ps48 --- .../org/opensearch/sql/calcite/remote/CalciteExplainIT.java | 4 ++-- .../resources/expectedOutput/calcite/explain_mvjoin.json | 6 ++++++ .../expectedOutput/calcite_no_pushdown/explain_mvjoin.json | 6 ++++++ 3 files changed, 14 insertions(+), 2 deletions(-) create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_mvjoin.json create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_mvjoin.json diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index 088ee8f4cf6..bf14089a8ea 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -565,8 +565,8 @@ public void testMvjoinExplain() throws IOException { "source=opensearch-sql_test_index_account | eval result = mvjoin(array('a', 'b', 'c'), ',')" + " | fields result | head 1"; var result = explainQueryToString(query); - assertTrue(result.contains("ARRAY_JOIN")); - assertTrue(result.contains("ARRAY")); + String expected = loadExpectedPlan("explain_mvjoin.json"); + assertJsonEqualsIgnoreId(expected, result); } @Test diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_mvjoin.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_mvjoin.json new file mode 100644 index 00000000000..a539122e998 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_mvjoin.json @@ -0,0 +1,6 @@ +{ + "calcite": { + "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalSort(fetch=[1])\n LogicalProject(result=[ARRAY_JOIN(array('a', 'b', 'c'), ',')])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", + "physical": "EnumerableCalc(expr#0..16=[{inputs}], expr#17=['a'], expr#18=['b'], expr#19=['c'], expr#20=[array($t17, $t18, $t19)], expr#21=[','], expr#22=[ARRAY_JOIN($t20, $t21)], result=[$t22])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[LIMIT->1, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":1,\"timeout\":\"1m\"}, requestedTotalSize=1, pageSize=null, startFrom=0)])\n" + } +} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_mvjoin.json b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_mvjoin.json new file mode 100644 index 00000000000..f8c0bc1c908 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_mvjoin.json @@ -0,0 +1,6 @@ +{ + "calcite": { + "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalSort(fetch=[1])\n LogicalProject(result=[ARRAY_JOIN(array('a', 'b', 'c'), ',')])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", + "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..16=[{inputs}], expr#17=['a'], expr#18=['b'], expr#19=['c'], expr#20=[array($t17, $t18, $t19)], expr#21=[','], expr#22=[ARRAY_JOIN($t20, $t21)], result=[$t22])\n EnumerableLimit(fetch=[1])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n" + } +} \ No newline at end of file From b74c8ee2efaa6e7ce3331157bb2f2d6f44678448 Mon Sep 17 00:00:00 2001 From: ps48 Date: Wed, 10 Sep 2025 12:23:57 -0700 Subject: [PATCH 06/10] update test with real array fields Signed-off-by: ps48 --- .../remote/CalciteArrayFunctionIT.java | 49 +++++++++++++++++++ .../sql/calcite/remote/CalciteExplainIT.java | 3 +- 2 files changed, 51 insertions(+), 1 deletion(-) diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteArrayFunctionIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteArrayFunctionIT.java index 2408d1dc75b..8fe05f57380 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteArrayFunctionIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteArrayFunctionIT.java @@ -10,6 +10,7 @@ import java.io.IOException; import java.util.List; +import org.json.JSONArray; import org.json.JSONObject; import org.junit.jupiter.api.Test; import org.opensearch.client.ResponseException; @@ -21,6 +22,7 @@ public void init() throws Exception { super.init(); enableCalcite(); loadIndex(Index.BANK); + loadIndex(Index.ARRAY); } @Test @@ -333,4 +335,51 @@ public void testMvjoinWithSpecialDelimiters() throws IOException { verifySchema(actual, schema("result", "string")); verifyDataRows(actual, rows("apple AND banana AND cherry")); } + + @Test + public void testMvjoinWithArrayFromRealFields() throws IOException { + // Test mvjoin on arrays created from real fields using array() function + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval names_array = array(firstname, lastname) | eval result =" + + " mvjoin(names_array, ',') | fields firstname, lastname, result | head 1", + TEST_INDEX_BANK)); + + verifySchema( + actual, + schema("firstname", "string"), + schema("lastname", "string"), + schema("result", "string")); + // Verify that mvjoin correctly joins the firstname and lastname fields + JSONArray dataRows = actual.getJSONArray("datarows"); + assertTrue(dataRows.length() > 0); + JSONArray firstRow = dataRows.getJSONArray(0); + assertEquals(firstRow.getString(0) + "," + firstRow.getString(1), firstRow.getString(2)); + } + + @Test + public void testMvjoinWithMultipleRealFields() throws IOException { + // Test mvjoin with arrays created from multiple real fields + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval info_array = array(city, state, employer) | eval result =" + + " mvjoin(info_array, ' | ') | fields city, state, employer, result | head 1", + TEST_INDEX_BANK)); + + verifySchema( + actual, + schema("city", "string"), + schema("state", "string"), + schema("employer", "string"), + schema("result", "string")); + // Verify that mvjoin correctly joins the city, state, and employer fields + JSONArray dataRows = actual.getJSONArray("datarows"); + assertTrue(dataRows.length() > 0); + JSONArray firstRow = dataRows.getJSONArray(0); + assertEquals( + firstRow.getString(0) + " | " + firstRow.getString(1) + " | " + firstRow.getString(2), + firstRow.getString(3)); + } } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index bf14089a8ea..f2a2fdc53ad 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -5,8 +5,8 @@ package org.opensearch.sql.calcite.remote; -import static org.opensearch.sql.legacy.TestUtils.*; import static org.junit.Assert.assertTrue; +import static org.opensearch.sql.legacy.TestUtils.*; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_LOGS; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_NESTED_SIMPLE; @@ -559,6 +559,7 @@ public void testExplainAppendCommand() throws IOException { TEST_INDEX_BANK, TEST_INDEX_BANK))); } + @Test public void testMvjoinExplain() throws IOException { String query = From 7937bb6d3db4e50859ef7048ceedc084bb5ab579 Mon Sep 17 00:00:00 2001 From: ps48 Date: Fri, 12 Sep 2025 10:35:49 -0700 Subject: [PATCH 07/10] use verifyQueryThrowsException in CalcitePPLFunctionTypeTest Signed-off-by: ps48 --- .../ppl/calcite/CalcitePPLFunctionTypeTest.java | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLFunctionTypeTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLFunctionTypeTest.java index eaa59f5b786..97721f948e0 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLFunctionTypeTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLFunctionTypeTest.java @@ -176,16 +176,12 @@ public void testLog2WithWrongArgShouldThrow() { "LOG2 function expects {[INTEGER]|[DOUBLE]}, but got [STRING,STRING]"); } + // mvjoin should reject non-string single values @Test public void testMvjoinRejectsNonStringValues() { - // mvjoin should reject non-string single values - Exception e = - Assert.assertThrows( - ExpressionEvaluationException.class, - () -> - getRelNode("source=EMP | eval result = mvjoin(42, ',') | fields result | head 1")); - - verifyErrorMessageContains( - e, "MVJOIN function expects {[STRING,STRING],[ARRAY,STRING]}, but got [INTEGER,STRING]"); + verifyQueryThrowsException( + "source=EMP | eval result = mvjoin(42, ',') | fields result | head 1", + "MVJOIN function expects {[STRING,STRING],[ARRAY,STRING]}, but got [INTEGER,STRING]" + ); } } From 9e8b850dbf8a141f2d2a886a9a8d112736edf5fe Mon Sep 17 00:00:00 2001 From: ps48 Date: Fri, 12 Sep 2025 10:38:57 -0700 Subject: [PATCH 08/10] spotless check fix Signed-off-by: ps48 --- .../sql/ppl/calcite/CalcitePPLFunctionTypeTest.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLFunctionTypeTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLFunctionTypeTest.java index 97721f948e0..8a11a973ecb 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLFunctionTypeTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLFunctionTypeTest.java @@ -180,8 +180,7 @@ public void testLog2WithWrongArgShouldThrow() { @Test public void testMvjoinRejectsNonStringValues() { verifyQueryThrowsException( - "source=EMP | eval result = mvjoin(42, ',') | fields result | head 1", - "MVJOIN function expects {[STRING,STRING],[ARRAY,STRING]}, but got [INTEGER,STRING]" - ); + "source=EMP | eval result = mvjoin(42, ',') | fields result | head 1", + "MVJOIN function expects {[STRING,STRING],[ARRAY,STRING]}, but got [INTEGER,STRING]"); } } From 94034bca98b94997071d7921c4026012fdbd2495 Mon Sep 17 00:00:00 2001 From: ps48 Date: Mon, 15 Sep 2025 11:29:25 -0700 Subject: [PATCH 09/10] remove string,string registration for mvjoin Signed-off-by: ps48 --- .../expression/function/PPLFuncImpTable.java | 9 +---- docs/user/ppl/functions/collection.rst | 29 ++++++++-------- .../remote/CalciteArrayFunctionIT.java | 12 ------- .../calcite/CalcitePPLArrayFunctionTest.java | 20 ----------- .../calcite/CalcitePPLFunctionTypeTest.java | 2 +- .../ppl/utils/PPLQueryDataAnonymizerTest.java | 33 ++++++++++++++----- 6 files changed, 42 insertions(+), 63 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java index dc2bdf8145c..59ff2866bee 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java @@ -818,14 +818,7 @@ void populate() { registerOperator(INTERNAL_PATTERN_PARSER, PPLBuiltinOperators.PATTERN_PARSER); - // Register MVJOIN with two different implementations - // For single string values - just return the string (register this first so it's checked - // first) - register( - MVJOIN, - (FunctionImp2) (builder, value, delimiter) -> value, - PPLTypeChecker.family(SqlTypeFamily.CHARACTER, SqlTypeFamily.CHARACTER)); - // For arrays - use Calcite's ARRAY_JOIN + // Register MVJOIN to use Calcite's ARRAY_JOIN register( MVJOIN, (FunctionImp2) diff --git a/docs/user/ppl/functions/collection.rst b/docs/user/ppl/functions/collection.rst index b1cf5171b62..95d55fa7e2d 100644 --- a/docs/user/ppl/functions/collection.rst +++ b/docs/user/ppl/functions/collection.rst @@ -208,9 +208,9 @@ Description Version: 3.3.0 -Usage: mvjoin(array, delimiter) joins string array elements into a single string, separated by the specified delimiter. NULL elements are excluded from the output. Only string arrays are supported. When given a single string value instead of an array, the function returns it unchanged (the delimiter parameter is ignored). +Usage: mvjoin(array, delimiter) joins string array elements into a single string, separated by the specified delimiter. NULL elements are excluded from the output. Only string arrays are supported. -Argument type: array: ARRAY of STRING or STRING, delimiter: STRING +Argument type: array: ARRAY of STRING, delimiter: STRING Return type: STRING @@ -218,16 +218,17 @@ Example:: PPL> source=people | eval result = mvjoin(array('a', 'b', 'c'), ',') | fields result | head 1 fetched rows / total rows = 1/1 - +----------------------------------+ - | result | - |----------------------------------| - | a,b,c | - +----------------------------------+ - - PPL> source=people | eval result = mvjoin('hello', ',') | fields result | head 1 + +------------------------------------+ + | result | + |------------------------------------| + | "a,b,c" | + +------------------------------------+ + + PPL> source=accounts | eval names_array = array(firstname, lastname) | eval result = mvjoin(names_array, ', ') | fields result | head 1 fetched rows / total rows = 1/1 - +----------------------------------+ - | result | - |----------------------------------| - | hello | - +----------------------------------+ \ No newline at end of file + +------------------------------------------+ + | result | + |------------------------------------------| + | "Amber, Duke" | + +------------------------------------------+ + diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteArrayFunctionIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteArrayFunctionIT.java index 8fe05f57380..7aa448bf45a 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteArrayFunctionIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteArrayFunctionIT.java @@ -297,18 +297,6 @@ public void testMvjoinWithEmptyArray() throws IOException { verifyDataRows(actual, rows("")); } - @Test - public void testMvjoinWithSingleStringValue() throws IOException { - JSONObject actual = - executeQuery( - String.format( - "source=%s | eval result = mvjoin('hello', ',') | fields result | head 1", - TEST_INDEX_BANK)); - - verifySchema(actual, schema("result", "string")); - verifyDataRows(actual, rows("hello")); - } - @Test public void testMvjoinWithStringBooleans() throws IOException { // mvjoin only supports string arrays diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLArrayFunctionTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLArrayFunctionTest.java index 981ca112b21..cd98e18e4be 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLArrayFunctionTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLArrayFunctionTest.java @@ -39,26 +39,6 @@ public void testMvjoinWithStringArray() { verifyPPLToSparkSQL(root, expectedSparkSql); } - @Test - public void testMvjoinWithSingleStringValue() { - String ppl = "source=EMP | eval joined = mvjoin('hello', ',') | head 1 | fields joined"; - RelNode root = getRelNode(ppl); - - String expectedLogical = - "LogicalProject(joined=[$8])\n" - + " LogicalSort(fetch=[1])\n" - + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," - + " SAL=[$5], COMM=[$6], DEPTNO=[$7], joined=['hello':VARCHAR])\n" - + " LogicalTableScan(table=[[scott, EMP]])\n"; - verifyLogical(root, expectedLogical); - - String expectedResult = "joined=hello\n"; - verifyResult(root, expectedResult); - - String expectedSparkSql = "SELECT 'hello' `joined`\n" + "FROM `scott`.`EMP`\n" + "LIMIT 1"; - verifyPPLToSparkSQL(root, expectedSparkSql); - } - @Test public void testMvjoinWithDifferentDelimiter() { String ppl = diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLFunctionTypeTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLFunctionTypeTest.java index 8a11a973ecb..8da3818cc04 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLFunctionTypeTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLFunctionTypeTest.java @@ -181,6 +181,6 @@ public void testLog2WithWrongArgShouldThrow() { public void testMvjoinRejectsNonStringValues() { verifyQueryThrowsException( "source=EMP | eval result = mvjoin(42, ',') | fields result | head 1", - "MVJOIN function expects {[STRING,STRING],[ARRAY,STRING]}, but got [INTEGER,STRING]"); + "MVJOIN function expects {[ARRAY,STRING]}, but got [INTEGER,STRING]"); } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index 58f0455c379..191d8a0d129 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -582,16 +582,20 @@ public void testRegex() { } @Test - public void testMvjoin() { - // Test mvjoin with array of strings - assertEquals( - "source=t | eval result=mvjoin(array(***,***,***),***) | fields + result", - anonymize("source=t | eval result=mvjoin(array('a', 'b', 'c'), ',') | fields result")); + public void testRexCommand() { + when(settings.getSettingValue(Key.PPL_REX_MAX_MATCH_LIMIT)).thenReturn(10); - // Test mvjoin with single string value assertEquals( - "source=t | eval result=mvjoin(***,***) | fields + result", - anonymize("source=t | eval result=mvjoin('hello', ',') | fields result")); + "source=t | rex field=message mode=extract \"(?[A-Z]+)\" max_match=1", + anonymize("source=t | rex field=message \"(?[A-Z]+)\"")); + assertEquals( + "source=t | rex field=lastname mode=extract \"(?^[A-Z])\" max_match=1 | fields +" + + " lastname,initial", + anonymize( + "source=t | rex field=lastname \"(?^[A-Z])\" | fields lastname, initial")); + assertEquals( + "source=t | rex field=name mode=extract \"(?[A-Z])\" max_match=3", + anonymize("source=t | rex field=name \"(?[A-Z])\" max_match=3")); } @Test @@ -606,6 +610,19 @@ public void testRexSedMode() { anonymize("source=t | rex field=data mode=sed \"s/sensitive/clean/g\" | fields data")); } + @Test + public void testMvjoin() { + // Test mvjoin with array of strings + assertEquals( + "source=t | eval result=mvjoin(array(***,***,***),***) | fields + result", + anonymize("source=t | eval result=mvjoin(array('a', 'b', 'c'), ',') | fields result")); + + // Test mvjoin with single string value + assertEquals( + "source=t | eval result=mvjoin(***,***) | fields + result", + anonymize("source=t | eval result=mvjoin('hello', ',') | fields result")); + } + @Test public void testRexWithOffsetField() { when(settings.getSettingValue(Key.PPL_REX_MAX_MATCH_LIMIT)).thenReturn(10); From 1823f0c5e438c7347da4ba24f03bf5782759ed8b Mon Sep 17 00:00:00 2001 From: ps48 Date: Mon, 15 Sep 2025 11:36:59 -0700 Subject: [PATCH 10/10] remove string,string test Signed-off-by: ps48 --- .../opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java | 5 ----- 1 file changed, 5 deletions(-) diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index 191d8a0d129..7e9c1000d00 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -616,11 +616,6 @@ public void testMvjoin() { assertEquals( "source=t | eval result=mvjoin(array(***,***,***),***) | fields + result", anonymize("source=t | eval result=mvjoin(array('a', 'b', 'c'), ',') | fields result")); - - // Test mvjoin with single string value - assertEquals( - "source=t | eval result=mvjoin(***,***) | fields + result", - anonymize("source=t | eval result=mvjoin('hello', ',') | fields result")); } @Test