diff --git a/common/src/main/java/org/opensearch/sql/common/utils/StringUtils.java b/common/src/main/java/org/opensearch/sql/common/utils/StringUtils.java index c81f56ef634..4d26c6190e9 100644 --- a/common/src/main/java/org/opensearch/sql/common/utils/StringUtils.java +++ b/common/src/main/java/org/opensearch/sql/common/utils/StringUtils.java @@ -63,8 +63,11 @@ public static String unquoteText(String text) { for (int chIndex = 1; chIndex < text.length() - 1; chIndex++) { currentChar = text.charAt(chIndex); nextChar = text.charAt(chIndex + 1); - if (currentChar == enclosingQuote && nextChar == currentChar) { + + if ((currentChar == '\\' && (nextChar == '"' || nextChar == '\\' || nextChar == '\'')) + || (currentChar == nextChar && currentChar == enclosingQuote)) { chIndex++; + currentChar = nextChar; } textSB.append(currentChar); } diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index 7e7b2cc25d5..54f156e9a32 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -85,6 +85,7 @@ import org.opensearch.sql.ast.tree.Relation; import org.opensearch.sql.ast.tree.RelationSubquery; import org.opensearch.sql.ast.tree.Rename; +import org.opensearch.sql.ast.tree.Replace; import org.opensearch.sql.ast.tree.Reverse; import org.opensearch.sql.ast.tree.Rex; import org.opensearch.sql.ast.tree.SPath; @@ -800,6 +801,11 @@ public LogicalPlan visitCloseCursor(CloseCursor closeCursor, AnalysisContext con return new LogicalCloseCursor(closeCursor.getChild().get(0).accept(this, context)); } + @Override + public LogicalPlan visitReplace(Replace node, AnalysisContext context) { + throw getOnlyForCalciteException("Replace"); + } + @Override public LogicalPlan visitJoin(Join node, AnalysisContext context) { throw getOnlyForCalciteException("Join"); diff --git a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java index 85e7d9318e4..f478d89eff7 100644 --- a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java @@ -73,6 +73,7 @@ import org.opensearch.sql.ast.tree.Relation; import org.opensearch.sql.ast.tree.RelationSubquery; import org.opensearch.sql.ast.tree.Rename; +import org.opensearch.sql.ast.tree.Replace; import org.opensearch.sql.ast.tree.Reverse; import org.opensearch.sql.ast.tree.Rex; import org.opensearch.sql.ast.tree.SPath; @@ -246,6 +247,10 @@ public T visitRename(Rename node, C context) { return visitChildren(node, context); } + public T visitReplace(Replace node, C context) { + return visitChildren(node, context); + } + public T visitEval(Eval node, C context) { return visitChildren(node, context); } diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Replace.java b/core/src/main/java/org/opensearch/sql/ast/tree/Replace.java new file mode 100644 index 00000000000..8b2c18cd56c --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Replace.java @@ -0,0 +1,58 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ast.tree; + +import com.google.common.collect.ImmutableList; +import java.util.List; +import java.util.Set; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.Setter; +import lombok.ToString; +import org.jetbrains.annotations.Nullable; +import org.opensearch.sql.ast.AbstractNodeVisitor; +import org.opensearch.sql.ast.expression.Field; + +@Getter +@Setter +@ToString +@EqualsAndHashCode(callSuper = false) +public class Replace extends UnresolvedPlan { + private final List replacePairs; + private final Set fieldList; + @Nullable private UnresolvedPlan child; + + /** + * Constructor with multiple pattern/replacement pairs. + * + * @param replacePairs List of pattern/replacement pairs + * @param fieldList Set of fields to apply replacements to + */ + public Replace(List replacePairs, Set fieldList) { + this.replacePairs = replacePairs; + this.fieldList = fieldList; + } + + @Override + public Replace attach(UnresolvedPlan child) { + if (null == this.child) { + this.child = child; + } else { + this.child.attach(child); + } + return this; + } + + @Override + public List getChild() { + return this.child == null ? ImmutableList.of() : ImmutableList.of(this.child); + } + + @Override + public T accept(AbstractNodeVisitor nodeVisitor, C context) { + return nodeVisitor.visitReplace(this, context); + } +} diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/ReplacePair.java b/core/src/main/java/org/opensearch/sql/ast/tree/ReplacePair.java new file mode 100644 index 00000000000..e3f3897fdf1 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/ast/tree/ReplacePair.java @@ -0,0 +1,22 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ast.tree; + +import lombok.AllArgsConstructor; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.ToString; +import org.opensearch.sql.ast.expression.Literal; + +/** A pair of pattern and replacement literals for the Replace command. */ +@Getter +@AllArgsConstructor +@EqualsAndHashCode +@ToString +public class ReplacePair { + private final Literal pattern; + private final Literal replacement; +} diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index b3d4f859b17..d9c0772845f 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -121,6 +121,8 @@ import org.opensearch.sql.ast.tree.Regex; import org.opensearch.sql.ast.tree.Relation; import org.opensearch.sql.ast.tree.Rename; +import org.opensearch.sql.ast.tree.Replace; +import org.opensearch.sql.ast.tree.ReplacePair; import org.opensearch.sql.ast.tree.Rex; import org.opensearch.sql.ast.tree.SPath; import org.opensearch.sql.ast.tree.Search; @@ -2414,6 +2416,51 @@ public RelNode visitValues(Values values, CalcitePlanContext context) { } } + @Override + public RelNode visitReplace(Replace node, CalcitePlanContext context) { + visitChildren(node, context); + + List fieldNames = context.relBuilder.peek().getRowType().getFieldNames(); + + // Create a set of field names to replace for quick lookup + Set fieldsToReplace = + node.getFieldList().stream().map(f -> f.getField().toString()).collect(Collectors.toSet()); + + // Validate that all fields to replace exist by calling field() on each + // This leverages relBuilder.field()'s built-in validation which throws + // IllegalArgumentException if any field doesn't exist + for (String fieldToReplace : fieldsToReplace) { + context.relBuilder.field(fieldToReplace); + } + + List projectList = new ArrayList<>(); + + // Project all fields, replacing specified ones in-place + for (String fieldName : fieldNames) { + if (fieldsToReplace.contains(fieldName)) { + // Replace this field in-place with all pattern/replacement pairs applied sequentially + RexNode fieldRef = context.relBuilder.field(fieldName); + + // Apply all replacement pairs sequentially (nested REPLACE calls) + for (ReplacePair pair : node.getReplacePairs()) { + RexNode patternNode = rexVisitor.analyze(pair.getPattern(), context); + RexNode replacementNode = rexVisitor.analyze(pair.getReplacement(), context); + fieldRef = + context.relBuilder.call( + SqlStdOperatorTable.REPLACE, fieldRef, patternNode, replacementNode); + } + + projectList.add(fieldRef); + } else { + // Keep original field unchanged + projectList.add(context.relBuilder.field(fieldName)); + } + } + + context.relBuilder.project(projectList, fieldNames); + return context.relBuilder.peek(); + } + private void buildParseRelNode(Parse node, CalcitePlanContext context) { RexNode sourceField = rexVisitor.analyze(node.getSourceField(), context); ParseMethod parseMethod = node.getParseMethod(); diff --git a/docs/category.json b/docs/category.json index cd24a9e1213..67c6b901000 100644 --- a/docs/category.json +++ b/docs/category.json @@ -40,6 +40,7 @@ "user/ppl/cmd/rare.rst", "user/ppl/cmd/regex.rst", "user/ppl/cmd/rename.rst", + "user/ppl/cmd/replace.rst", "user/ppl/cmd/rex.rst", "user/ppl/cmd/search.rst", "user/ppl/cmd/showdatasources.rst", @@ -68,4 +69,4 @@ "bash_settings": [ "user/ppl/admin/settings.rst" ] -} +} \ No newline at end of file diff --git a/docs/user/dql/expressions.rst b/docs/user/dql/expressions.rst index 4816121be2c..460fa47c181 100644 --- a/docs/user/dql/expressions.rst +++ b/docs/user/dql/expressions.rst @@ -46,7 +46,7 @@ Here is an example for different type of literals:: +---------+---------+---------+---------+--------+---------+---------+-----------+----------+ | "Hello" | 'Hello' | "It""s" | 'It''s' | "It's" | '"Its"' | 'It\'s' | 'It\\\'s' | "\I\t\s" | |---------+---------+---------+---------+--------+---------+---------+-----------+----------| - | Hello | Hello | It"s | It's | It's | "Its" | It\'s | It\\\'s | \I\t\s | + | Hello | Hello | It"s | It's | It's | "Its" | It's | It\'s | \I\t\s | +---------+---------+---------+---------+--------+---------+---------+-----------+----------+ diff --git a/docs/user/ppl/cmd/replace.rst b/docs/user/ppl/cmd/replace.rst new file mode 100644 index 00000000000..bcb0d57e677 --- /dev/null +++ b/docs/user/ppl/cmd/replace.rst @@ -0,0 +1,127 @@ +============= +replace +============= + +.. rubric:: Table of contents + +.. contents:: + :local: + :depth: 2 + + +Description +============ +Using ``replace`` command to replace text in one or more fields in the search result. + +Note: This command is only available when Calcite engine is enabled. + + +Syntax +============ +replace '' WITH '' [, '' WITH '']... IN [, ]... + + +Parameters +========== +* **pattern**: mandatory. The text pattern you want to replace. Currently supports only plain text literals (no wildcards or regular expressions). +* **replacement**: mandatory. The text you want to replace with. +* **field-name**: mandatory. One or more field names where the replacement should occur. + + +Examples +======== + +Example 1: Replace text in one field +------------------------------------ + +The example shows replacing text in one field. + +PPL query:: + + os> source=accounts | replace "IL" WITH "Illinois" IN state | fields state; + fetched rows / total rows = 4/4 + +----------+ + | state | + |----------| + | Illinois | + | TN | + | VA | + | MD | + +----------+ + + +Example 2: Replace text in multiple fields +------------------------------------ + +The example shows replacing text in multiple fields. + +PPL query:: + + os> source=accounts | replace "IL" WITH "Illinois" IN state, address | fields state, address; + fetched rows / total rows = 4/4 + +----------+----------------------+ + | state | address | + |----------+----------------------| + | Illinois | 880 Holmes Lane | + | TN | 671 Bristol Street | + | VA | 789 Madison Street | + | MD | 467 Hutchinson Court | + +----------+----------------------+ + + +Example 3: Replace with other commands in a pipeline +------------------------------------ + +The example shows using replace with other commands in a query pipeline. + +PPL query:: + + os> source=accounts | replace "IL" WITH "Illinois" IN state | where age > 30 | fields state, age; + fetched rows / total rows = 3/3 + +----------+-----+ + | state | age | + |----------+-----| + | Illinois | 32 | + | TN | 36 | + | MD | 33 | + +----------+-----+ + +Example 4: Replace with multiple pattern/replacement pairs +------------------------------------ + +The example shows using multiple pattern/replacement pairs in a single replace command. The replacements are applied sequentially. + +PPL query:: + + os> source=accounts | replace "IL" WITH "Illinois", "TN" WITH "Tennessee" IN state | fields state; + fetched rows / total rows = 4/4 + +-----------+ + | state | + |-----------| + | Illinois | + | Tennessee | + | VA | + | MD | + +-----------+ + +Example 5: Pattern matching with LIKE and replace +------------------------------------ + +Since replace command only supports plain string literals, you can use LIKE command with replace for pattern matching needs. + +PPL query:: + + os> source=accounts | where LIKE(address, '%Holmes%') | replace "Holmes" WITH "HOLMES" IN address | fields address, state, gender, age, city; + fetched rows / total rows = 1/1 + +-----------------+-------+--------+-----+--------+ + | address | state | gender | age | city | + |-----------------+-------+--------+-----+--------| + | 880 HOLMES Lane | IL | M | 32 | Brogan | + +-----------------+-------+--------+-----+--------+ + + +Limitations +=========== +* Only supports plain text literals for pattern matching. Wildcards and regular expressions are not supported. +* Pattern and replacement values must be string literals. +* The replace command modifies the specified fields in-place. \ No newline at end of file diff --git a/docs/user/ppl/functions/string.rst b/docs/user/ppl/functions/string.rst index 0e5ce44b730..91007ed329a 100644 --- a/docs/user/ppl/functions/string.rst +++ b/docs/user/ppl/functions/string.rst @@ -215,6 +215,14 @@ Argument type: STRING, STRING (regex pattern), STRING (replacement) Return type: STRING +**Important - Regex Special Characters**: The pattern is interpreted as a regular expression. Characters like ``.``, ``*``, ``+``, ``[``, ``]``, ``(``, ``)``, ``{``, ``}``, ``^``, ``$``, ``|``, ``?``, and ``\`` have special meaning in regex. To match them literally, escape with backslashes: + +* To match ``example.com``: use ``'example\\.com'`` (escape the dots) +* To match ``value*``: use ``'value\\*'`` (escape the asterisk) +* To match ``price+tax``: use ``'price\\+tax'`` (escape the plus) + +For strings with many special characters, use ``\\Q...\\E`` to quote the entire literal string (e.g., ``'\\Qhttps://example.com/path?id=123\\E'`` matches that exact URL). + Literal String Replacement Examples:: os> source=people | eval `REPLACE('helloworld', 'world', 'universe')` = REPLACE('helloworld', 'world', 'universe'), `REPLACE('helloworld', 'invalid', 'universe')` = REPLACE('helloworld', 'invalid', 'universe') | fields `REPLACE('helloworld', 'world', 'universe')`, `REPLACE('helloworld', 'invalid', 'universe')` @@ -225,6 +233,16 @@ Literal String Replacement Examples:: | hellouniverse | helloworld | +--------------------------------------------+----------------------------------------------+ +Escaping Special Characters Examples:: + + os> source=people | eval `Replace domain` = REPLACE('api.example.com', 'example\\.com', 'newsite.org'), `Replace with quote` = REPLACE('https://api.example.com/v1', '\\Qhttps://api.example.com\\E', 'http://localhost:8080') | fields `Replace domain`, `Replace with quote` + fetched rows / total rows = 1/1 + +-----------------+--------------------------+ + | Replace domain | Replace with quote | + |-----------------+--------------------------| + | api.newsite.org | http://localhost:8080/v1 | + +-----------------+--------------------------+ + Regex Pattern Examples:: os> source=people | eval `Remove digits` = REPLACE('test123', '\d+', ''), `Collapse spaces` = REPLACE('hello world', ' +', ' '), `Remove special` = REPLACE('hello@world!', '[^a-zA-Z]', '') | fields `Remove digits`, `Collapse spaces`, `Remove special` diff --git a/docs/user/ppl/index.rst b/docs/user/ppl/index.rst index 2329dc1d0dd..98b6c00b8f9 100644 --- a/docs/user/ppl/index.rst +++ b/docs/user/ppl/index.rst @@ -122,6 +122,8 @@ The query start with search command and then flowing a set of command delimited - `trendline command `_ + - `replace command `_ + - `where command `_ * **Functions** diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java index 3a512ca635f..69507c71aa5 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java @@ -89,6 +89,7 @@ CalciteRegexCommandIT.class, CalciteRexCommandIT.class, CalciteRenameCommandIT.class, + CalciteReplaceCommandIT.class, CalciteResourceMonitorIT.class, CalciteSearchCommandIT.class, CalciteSettingsIT.class, diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index cc8a7a950a6..a3f903e97c9 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -1121,4 +1121,15 @@ public void testPushDownMinOrMaxAggOnDerivedField() throws IOException { + "| stats MIN(balance2), MAX(balance2)", TEST_INDEX_ACCOUNT))); } + + @Test + public void testReplaceCommandExplain() throws IOException { + String expected = loadExpectedPlan("explain_replace_command.yaml"); + assertYamlEqualsJsonIgnoreId( + expected, + explainQueryToString( + String.format( + "source=%s | replace 'IL' WITH 'Illinois' IN state | fields state", + TEST_INDEX_ACCOUNT))); + } } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReplaceCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReplaceCommandIT.java new file mode 100644 index 00000000000..1ec2bf86d2d --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReplaceCommandIT.java @@ -0,0 +1,292 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.remote; + +import static org.opensearch.sql.legacy.TestsConstants.*; +import static org.opensearch.sql.util.MatcherUtils.*; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.Test; +import org.opensearch.sql.common.antlr.SyntaxCheckException; +import org.opensearch.sql.ppl.PPLIntegTestCase; + +public class CalciteReplaceCommandIT extends PPLIntegTestCase { + + public void init() throws Exception { + super.init(); + enableCalcite(); + disallowCalciteFallback(); + loadIndex(Index.STATE_COUNTRY); + } + + @Test + public void testReplaceWithFields() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source = %s | replace 'USA' WITH 'United States' IN country | fields name, age," + + " country", + TEST_INDEX_STATE_COUNTRY)); + + verifySchema( + result, schema("name", "string"), schema("age", "int"), schema("country", "string")); + + verifyDataRows( + result, + rows("Jake", 70, "United States"), + rows("Hello", 30, "United States"), + rows("John", 25, "Canada"), + rows("Jane", 20, "Canada")); + } + + @Test + public void testMultipleReplace() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source = %s | replace 'USA' WITH 'United States' IN country | replace 'Jane' WITH" + + " 'Joseph' IN name | fields name, country, state, month, year, age", + TEST_INDEX_STATE_COUNTRY)); + + verifySchema( + result, + schema("name", "string"), + schema("country", "string"), + schema("state", "string"), + schema("month", "int"), + schema("year", "int"), + schema("age", "int")); + + verifyDataRows( + result, + rows("Jake", "United States", "California", 4, 2023, 70), + rows("Hello", "United States", "New York", 4, 2023, 30), + rows("John", "Canada", "Ontario", 4, 2023, 25), + rows("Joseph", "Canada", "Quebec", 4, 2023, 20)); + } + + @Test + public void testReplaceWithSort() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source = %s | replace 'US' WITH 'United States' IN country | sort country", + TEST_INDEX_STATE_COUNTRY)); + + verifySchema( + result, + schema("name", "string"), + schema("age", "int"), + schema("state", "string"), + schema("country", "string"), + schema("year", "int"), + schema("month", "int")); + } + + @Test + public void testReplaceWithWhereClause() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source = %s | where country = 'US' | replace 'US' WITH 'United States' IN country", + TEST_INDEX_STATE_COUNTRY)); + + verifySchema( + result, + schema("name", "string"), + schema("age", "int"), + schema("state", "string"), + schema("country", "string"), + schema("year", "int"), + schema("month", "int")); + } + + @Test + public void testEmptyStringReplacement() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source = %s | replace 'USA' WITH '' IN country | fields name, country, state," + + " month, year, age", + TEST_INDEX_STATE_COUNTRY)); + + verifySchema( + result, + schema("name", "string"), + schema("country", "string"), + schema("state", "string"), + schema("month", "int"), + schema("year", "int"), + schema("age", "int")); + + verifyDataRows( + result, + rows("Jake", "", "California", 4, 2023, 70), + rows("Hello", "", "New York", 4, 2023, 30), + rows("John", "Canada", "Ontario", 4, 2023, 25), + rows("Jane", "Canada", "Quebec", 4, 2023, 20)); + } + + @Test + public void testMultipleFieldsInClause() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source = %s | replace 'USA' WITH 'United States' IN country,state | fields name," + + " country, state, month, year, age", + TEST_INDEX_STATE_COUNTRY)); + + verifySchema( + result, + schema("name", "string"), + schema("country", "string"), + schema("state", "string"), + schema("month", "int"), + schema("year", "int"), + schema("age", "int")); + + verifyDataRows( + result, + rows("Jake", "United States", "California", 4, 2023, 70), + rows("Hello", "United States", "New York", 4, 2023, 30), + rows("John", "Canada", "Ontario", 4, 2023, 25), + rows("Jane", "Canada", "Quebec", 4, 2023, 20)); + } + + @Test + public void testReplaceNonExistentField() { + Throwable e = + assertThrowsWithReplace( + IllegalArgumentException.class, + () -> + executeQuery( + String.format( + "source = %s | replace 'USA' WITH 'United States' IN non_existent_field", + TEST_INDEX_STATE_COUNTRY))); + verifyErrorMessageContains(e, "field [non_existent_field] not found"); + } + + @Test + public void testReplaceAfterFieldRemoved() { + Throwable e = + assertThrowsWithReplace( + IllegalArgumentException.class, + () -> + executeQuery( + String.format( + "source = %s | fields name, age | replace 'USA' WITH 'United States' IN" + + " country", + TEST_INDEX_STATE_COUNTRY))); + verifyErrorMessageContains(e, "field [country] not found; input fields are: [name, age]"); + } + + @Test + public void testMissingInClause() { + Throwable e = + assertThrowsWithReplace( + SyntaxCheckException.class, + () -> + executeQuery( + String.format( + "source = %s | replace 'USA' WITH 'United States'", + TEST_INDEX_STATE_COUNTRY))); + + verifyErrorMessageContains(e, "[]"); + verifyErrorMessageContains(e, "Expecting tokens in {'IN', ','}"); + } + + @Test + public void testDuplicateFieldsInReplace() { + Throwable e = + assertThrowsWithReplace( + IllegalArgumentException.class, + () -> + executeQuery( + String.format( + "source = %s | replace 'USA' WITH 'United States' IN country, state," + + " country", + TEST_INDEX_STATE_COUNTRY))); + verifyErrorMessageContains(e, "Duplicate fields [country] in Replace command"); + } + + @Test + public void testNonStringLiteralPattern() { + Throwable e = + assertThrowsWithReplace( + SyntaxCheckException.class, + () -> + executeQuery( + String.format( + "source = %s | replace 23 WITH 'test' IN field1", + TEST_INDEX_STATE_COUNTRY))); + verifyErrorMessageContains(e, "[23]"); + verifyErrorMessageContains(e, "Expecting tokens in {DQUOTA_STRING, SQUOTA_STRING}"); + } + + @Test + public void testNonStringLiteralReplacement() { + Throwable e = + assertThrowsWithReplace( + SyntaxCheckException.class, + () -> + executeQuery( + String.format( + "source = %s | replace 'test' WITH 45 IN field1", + TEST_INDEX_STATE_COUNTRY))); + verifyErrorMessageContains(e, "[45]"); + verifyErrorMessageContains(e, "Expecting tokens in {DQUOTA_STRING, SQUOTA_STRING}"); + } + + @Test + public void testMultiplePairsInSingleCommand() throws IOException { + // Test replacing multiple patterns in a single command + JSONObject result = + executeQuery( + String.format( + "source = %s | replace 'USA' WITH 'United States', 'Canada' WITH 'CA' IN country" + + " | fields name, country, state, month, year, age", + TEST_INDEX_STATE_COUNTRY)); + + verifySchema( + result, + schema("name", "string"), + schema("country", "string"), + schema("state", "string"), + schema("month", "int"), + schema("year", "int"), + schema("age", "int")); + + verifyDataRows( + result, + rows("Jake", "United States", "California", 4, 2023, 70), + rows("Hello", "United States", "New York", 4, 2023, 30), + rows("John", "CA", "Ontario", 4, 2023, 25), + rows("Jane", "CA", "Quebec", 4, 2023, 20)); + } + + @Test + public void testMultiplePairsSequentialApplication() throws IOException { + // Test that replacements are applied sequentially (order matters) + // If we have "Ontario" WITH "ON", "ON" WITH "Ontario Province" + // then "Ontario" becomes "ON" first, then that "ON" becomes "Ontario Province" + JSONObject result = + executeQuery( + String.format( + "source = %s | replace 'Ontario' WITH 'ON', 'ON' WITH 'Ontario Province' IN state" + + " | fields name, state", + TEST_INDEX_STATE_COUNTRY)); + + verifySchema(result, schema("name", "string"), schema("state", "string")); + + verifyDataRows( + result, + rows("Jake", "California"), + rows("Hello", "New York"), + rows("John", "Ontario Province"), + rows("Jane", "Quebec")); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/SearchCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/SearchCommandIT.java index 4620ca586c0..b251e5bc694 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/SearchCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/SearchCommandIT.java @@ -523,7 +523,7 @@ public void testWildcardEscaping() throws IOException { executeQuery( String.format( "search source=%s" - + " `attributes.error.type`=\\\"C:\\\\\\\\Users\\\\\\\\admin\\\"" + + " `attributes.error.type`=\\\"C:\\\\\\\\\\\\\\\\Users\\\\\\\\\\\\\\\\admin\\\"" + " | sort time | fields attributes.error.type", TEST_INDEX_OTEL_LOGS)); verifyDataRows(backslashSearch, rows("C:\\Users\\admin")); diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/WildcardQueryIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/WildcardQueryIT.java index c6e43010d90..8123f887f23 100644 --- a/integ-test/src/test/java/org/opensearch/sql/sql/WildcardQueryIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/sql/WildcardQueryIT.java @@ -191,7 +191,7 @@ public void test_backslash_wildcard() throws IOException { String query = "SELECT KeywordBody FROM " + TEST_INDEX_WILDCARD - + " WHERE wildcard_query(KeywordBody, '*\\\\\\\\\\\\_')"; + + " WHERE wildcard_query(KeywordBody, '*\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\_')"; JSONObject result = executeJdbcRequest(query); verifyDataRows(result, rows("test backslash wildcard \\_")); } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_replace_command.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_replace_command.yaml new file mode 100644 index 00000000000..a867c569168 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_replace_command.yaml @@ -0,0 +1,8 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(state=[REPLACE($7, 'IL':VARCHAR, 'Illinois':VARCHAR)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableCalc(expr#0=[{inputs}], expr#1=['IL':VARCHAR], expr#2=['Illinois':VARCHAR], expr#3=[REPLACE($t0, $t1, $t2)], $f0=[$t3]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["state"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_replace_command.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_replace_command.yaml new file mode 100644 index 00000000000..bbebbf37f80 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_replace_command.yaml @@ -0,0 +1,9 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(state=[REPLACE($7, 'IL':VARCHAR, 'Illinois':VARCHAR)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..16=[{inputs}], expr#17=['IL':VARCHAR], expr#18=['Illinois':VARCHAR], expr#19=[REPLACE($t7, $t17, $t18)], state=[$t19]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) \ No newline at end of file diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 62e2a21204b..93bb7dc7c45 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -79,6 +79,7 @@ commands | regexCommand | timechartCommand | rexCommand + | replaceCommand ; commandName @@ -121,6 +122,7 @@ commandName | APPEND | MULTISEARCH | REX + | REPLACE ; searchCommand @@ -208,6 +210,14 @@ renameCommand : RENAME renameClasue (COMMA? renameClasue)* ; +replaceCommand + : REPLACE replacePair (COMMA replacePair)* IN fieldList + ; + +replacePair + : pattern=stringLiteral WITH replacement=stringLiteral + ; + statsCommand : STATS statsArgs statsAggTerm (COMMA statsAggTerm)* (statsByClause)? (dedupSplitArg)? ; diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index 6d4f5382f5a..dd165726a70 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -76,6 +76,8 @@ import org.opensearch.sql.ast.tree.Regex; import org.opensearch.sql.ast.tree.Relation; import org.opensearch.sql.ast.tree.Rename; +import org.opensearch.sql.ast.tree.Replace; +import org.opensearch.sql.ast.tree.ReplacePair; import org.opensearch.sql.ast.tree.Reverse; import org.opensearch.sql.ast.tree.Rex; import org.opensearch.sql.ast.tree.SPath; @@ -382,6 +384,25 @@ public UnresolvedPlan visitRenameCommand(RenameCommandContext ctx) { .collect(Collectors.toList())); } + /** Replace command. */ + @Override + public UnresolvedPlan visitReplaceCommand(OpenSearchPPLParser.ReplaceCommandContext ctx) { + // Parse all replacement pairs + List replacePairs = + ctx.replacePair().stream().map(this::buildReplacePair).collect(Collectors.toList()); + + Set fieldList = getUniqueFieldSet(ctx.fieldList()); + + return new Replace(replacePairs, fieldList); + } + + /** Build a ReplacePair from parse context. */ + private ReplacePair buildReplacePair(OpenSearchPPLParser.ReplacePairContext ctx) { + Literal pattern = (Literal) internalVisitExpression(ctx.pattern); + Literal replacement = (Literal) internalVisitExpression(ctx.replacement); + return new ReplacePair(pattern, replacement); + } + /** Stats command. */ @Override public UnresolvedPlan visitStatsCommand(StatsCommandContext ctx) { @@ -670,6 +691,30 @@ private List getFieldList(FieldListContext ctx) { .collect(Collectors.toList()); } + private Set getUniqueFieldSet(FieldListContext ctx) { + List fields = + ctx.fieldExpression().stream() + .map(field -> (Field) internalVisitExpression(field)) + .collect(Collectors.toList()); + + Set uniqueFields = new java.util.LinkedHashSet<>(fields); + + if (uniqueFields.size() < fields.size()) { + // Find duplicates for error message + Set seen = new HashSet<>(); + Set duplicates = + fields.stream() + .map(f -> f.getField().toString()) + .filter(name -> !seen.add(name)) + .collect(Collectors.toSet()); + + throw new IllegalArgumentException( + String.format("Duplicate fields [%s] in Replace command", String.join(", ", duplicates))); + } + + return uniqueFields; + } + /** Rare command. */ @Override public UnresolvedPlan visitRareCommand(OpenSearchPPLParser.RareCommandContext ctx) { diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java index 1efdedab7ee..64f5dfe3be0 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java @@ -78,6 +78,7 @@ import org.opensearch.sql.ast.tree.Regex; import org.opensearch.sql.ast.tree.Relation; import org.opensearch.sql.ast.tree.Rename; +import org.opensearch.sql.ast.tree.Replace; import org.opensearch.sql.ast.tree.Reverse; import org.opensearch.sql.ast.tree.Rex; import org.opensearch.sql.ast.tree.SPath; @@ -281,6 +282,30 @@ public String visitRename(Rename node, String context) { return StringUtils.format("%s | rename %s", child, renames); } + @Override + public String visitReplace(Replace node, String context) { + // Get the child query string + String child = node.getChild().get(0).accept(this, context); + + // Build pattern/replacement pairs string + String pairs = + node.getReplacePairs().stream() + .map( + pair -> + StringUtils.format( + "%s WITH %s", + visitExpression(pair.getPattern()), visitExpression(pair.getReplacement()))) + .collect(Collectors.joining(", ")); + + // Get field list + String fieldListStr = + " IN " + + node.getFieldList().stream().map(Field::toString).collect(Collectors.joining(", ")); + + // Build the replace command string + return StringUtils.format("%s | replace %s%s", child, pairs, fieldListStr); + } + /** Build {@link LogicalAggregation}. */ @Override public String visitAggregation(Aggregation node, String context) { diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReplaceTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReplaceTest.java new file mode 100644 index 00000000000..abde8b3a5bb --- /dev/null +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReplaceTest.java @@ -0,0 +1,328 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ppl.calcite; + +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.test.CalciteAssert; +import org.junit.Test; +import org.opensearch.sql.common.antlr.SyntaxCheckException; + +public class CalcitePPLReplaceTest extends CalcitePPLAbstractTest { + + public CalcitePPLReplaceTest() { + super(CalciteAssert.SchemaSpec.SCOTT_WITH_TEMPORAL); + } + + @Test + public void testBasicReplace() { + String ppl = "source=EMP | replace \"CLERK\" WITH \"EMPLOYEE\" IN JOB"; + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[REPLACE($2, 'CLERK':VARCHAR," + + " 'EMPLOYEE':VARCHAR)], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, REPLACE(`JOB`, 'CLERK', 'EMPLOYEE') `JOB`, `MGR`, `HIREDATE`," + + " `SAL`, `COMM`, `DEPTNO`\n" + + "FROM `scott`.`EMP`"; + + verifyPPLToSparkSQL(root, expectedSparkSql); + + String expectedResult = + "EMPNO=7369; ENAME=SMITH; JOB=EMPLOYEE; MGR=7902; HIREDATE=1980-12-17; SAL=800.00;" + + " COMM=null; DEPTNO=20\n" + + "EMPNO=7499; ENAME=ALLEN; JOB=SALESMAN; MGR=7698; HIREDATE=1981-02-20; SAL=1600.00;" + + " COMM=300.00; DEPTNO=30\n" + + "EMPNO=7521; ENAME=WARD; JOB=SALESMAN; MGR=7698; HIREDATE=1981-02-22; SAL=1250.00;" + + " COMM=500.00; DEPTNO=30\n" + + "EMPNO=7566; ENAME=JONES; JOB=MANAGER; MGR=7839; HIREDATE=1981-02-04; SAL=2975.00;" + + " COMM=null; DEPTNO=20\n" + + "EMPNO=7654; ENAME=MARTIN; JOB=SALESMAN; MGR=7698; HIREDATE=1981-09-28; SAL=1250.00;" + + " COMM=1400.00; DEPTNO=30\n" + + "EMPNO=7698; ENAME=BLAKE; JOB=MANAGER; MGR=7839; HIREDATE=1981-01-05; SAL=2850.00;" + + " COMM=null; DEPTNO=30\n" + + "EMPNO=7782; ENAME=CLARK; JOB=MANAGER; MGR=7839; HIREDATE=1981-06-09; SAL=2450.00;" + + " COMM=null; DEPTNO=10\n" + + "EMPNO=7788; ENAME=SCOTT; JOB=ANALYST; MGR=7566; HIREDATE=1987-04-19; SAL=3000.00;" + + " COMM=null; DEPTNO=20\n" + + "EMPNO=7839; ENAME=KING; JOB=PRESIDENT; MGR=null; HIREDATE=1981-11-17; SAL=5000.00;" + + " COMM=null; DEPTNO=10\n" + + "EMPNO=7844; ENAME=TURNER; JOB=SALESMAN; MGR=7698; HIREDATE=1981-09-08; SAL=1500.00;" + + " COMM=0.00; DEPTNO=30\n" + + "EMPNO=7876; ENAME=ADAMS; JOB=EMPLOYEE; MGR=7788; HIREDATE=1987-05-23; SAL=1100.00;" + + " COMM=null; DEPTNO=20\n" + + "EMPNO=7900; ENAME=JAMES; JOB=EMPLOYEE; MGR=7698; HIREDATE=1981-12-03; SAL=950.00;" + + " COMM=null; DEPTNO=30\n" + + "EMPNO=7902; ENAME=FORD; JOB=ANALYST; MGR=7566; HIREDATE=1981-12-03; SAL=3000.00;" + + " COMM=null; DEPTNO=20\n" + + "EMPNO=7934; ENAME=MILLER; JOB=EMPLOYEE; MGR=7782; HIREDATE=1982-01-23; SAL=1300.00;" + + " COMM=null; DEPTNO=10\n"; + + verifyResult(root, expectedResult); + } + + @Test + public void testMultipleFieldsReplace() { + String ppl = + "source=EMP | replace \"CLERK\" WITH \"EMPLOYEE\" IN JOB | replace \"20\" WITH \"RESEARCH\"" + + " IN DEPTNO"; + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[REPLACE($2, 'CLERK':VARCHAR," + + " 'EMPLOYEE':VARCHAR)], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6]," + + " DEPTNO=[REPLACE($7, '20':VARCHAR, 'RESEARCH':VARCHAR)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, REPLACE(`JOB`, 'CLERK', 'EMPLOYEE') `JOB`, `MGR`, `HIREDATE`," + + " `SAL`, `COMM`, REPLACE(`DEPTNO`, '20', 'RESEARCH') `DEPTNO`\n" + + "FROM `scott`.`EMP`"; + + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testReplaceSameValueInMultipleFields() { + // In EMP table, both JOB and MGR fields contain numeric values + String ppl = "source=EMP | replace \"7839\" WITH \"CEO\" IN MGR, EMPNO"; + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(EMPNO=[REPLACE($0, '7839':VARCHAR, 'CEO':VARCHAR)], ENAME=[$1], JOB=[$2]," + + " MGR=[REPLACE($3, '7839':VARCHAR, 'CEO':VARCHAR)], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT REPLACE(`EMPNO`, '7839', 'CEO') `EMPNO`, `ENAME`, `JOB`," + + " REPLACE(`MGR`, '7839', 'CEO') `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`\n" + + "FROM `scott`.`EMP`"; + + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testReplaceWithPipeline() { + String ppl = + "source=EMP | where JOB = 'CLERK' | replace \"CLERK\" WITH \"EMPLOYEE\" IN JOB | sort SAL"; + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalSort(sort0=[$5], dir0=[ASC-nulls-first])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[REPLACE($2, 'CLERK':VARCHAR," + + " 'EMPLOYEE':VARCHAR)], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7])\n" + + " LogicalFilter(condition=[=($2, 'CLERK':VARCHAR)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, REPLACE(`JOB`, 'CLERK', 'EMPLOYEE') `JOB`, `MGR`, `HIREDATE`," + + " `SAL`, `COMM`, `DEPTNO`\n" + + "FROM `scott`.`EMP`\n" + + "WHERE `JOB` = 'CLERK'\n" + + "ORDER BY `SAL`"; + + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test(expected = SyntaxCheckException.class) + public void testReplaceWithoutWithKeywordShouldFail() { + String ppl = "source=EMP | replace \"CLERK\" \"EMPLOYEE\" IN JOB"; + getRelNode(ppl); + } + + @Test(expected = SyntaxCheckException.class) + public void testReplaceWithoutInKeywordShouldFail() { + String ppl = "source=EMP | replace \"CLERK\" WITH \"EMPLOYEE\" JOB"; + getRelNode(ppl); + } + + @Test(expected = SyntaxCheckException.class) + public void testReplaceWithExpressionShouldFail() { + String ppl = "source=EMP | replace EMPNO + 1 WITH \"EMPLOYEE\" IN JOB"; + getRelNode(ppl); + } + + @Test(expected = IllegalArgumentException.class) + public void testReplaceWithInvalidFieldShouldFail() { + String ppl = "source=EMP | replace \"CLERK\" WITH \"EMPLOYEE\" IN INVALID_FIELD"; + getRelNode(ppl); + } + + @Test(expected = IllegalArgumentException.class) + public void testReplaceWithMultipleInKeywordsShouldFail() { + String ppl = "source=EMP | replace \"CLERK\" WITH \"EMPLOYEE\" IN JOB IN ENAME"; + getRelNode(ppl); + } + + @Test(expected = SyntaxCheckException.class) + public void testReplaceWithMissingQuotesShouldFail() { + String ppl = "source=EMP | replace CLERK WITH EMPLOYEE IN JOB"; + getRelNode(ppl); + } + + @Test(expected = SyntaxCheckException.class) + public void testReplaceWithMissingReplacementValueShouldFail() { + String ppl = "source=EMP | replace \"CLERK\" WITH IN JOB"; + getRelNode(ppl); + } + + @Test + public void testReplaceWithEvalAndReplaceOnSameField() { + // Test verifies that in-place replacement works correctly when there are additional fields + // created by eval. The eval creates new_JOB, and replace modifies JOB in-place. + String ppl = + "source=EMP | eval new_JOB = 'existing' | replace \"CLERK\" WITH \"EMPLOYEE\" IN JOB"; + RelNode root = getRelNode(ppl); + + // With in-place replacement, JOB is modified and new_JOB remains as created by eval + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[REPLACE($2, 'CLERK':VARCHAR," + + " 'EMPLOYEE':VARCHAR)], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7], new_JOB=['existing':VARCHAR])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, REPLACE(`JOB`, 'CLERK', 'EMPLOYEE') `JOB`, `MGR`, `HIREDATE`," + + " `SAL`, `COMM`, `DEPTNO`, 'existing' `new_JOB`\n" + + "FROM `scott`.`EMP`"; + + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testReplaceWithMultiplePairs() { + // Test with multiple pattern/replacement pairs in a single command + String ppl = + "source=EMP | replace \"CLERK\" WITH \"EMPLOYEE\", \"MANAGER\" WITH \"SUPERVISOR\" IN JOB"; + RelNode root = getRelNode(ppl); + + // Should generate nested REPLACE calls: REPLACE(REPLACE(JOB, 'CLERK', 'EMPLOYEE'), 'MANAGER', + // 'SUPERVISOR') + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[REPLACE(REPLACE($2, 'CLERK':VARCHAR," + + " 'EMPLOYEE':VARCHAR), 'MANAGER':VARCHAR, 'SUPERVISOR':VARCHAR)], MGR=[$3]," + + " HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, REPLACE(REPLACE(`JOB`, 'CLERK', 'EMPLOYEE'), 'MANAGER'," + + " 'SUPERVISOR') `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`\n" + + "FROM `scott`.`EMP`"; + + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testReplaceWithThreePairs() { + // Test with three pattern/replacement pairs + String ppl = + "source=EMP | replace \"CLERK\" WITH \"EMPLOYEE\", \"MANAGER\" WITH \"SUPERVISOR\"," + + " \"ANALYST\" WITH \"RESEARCHER\" IN JOB"; + RelNode root = getRelNode(ppl); + + // Should generate triple nested REPLACE calls + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[REPLACE(REPLACE(REPLACE($2," + + " 'CLERK':VARCHAR, 'EMPLOYEE':VARCHAR), 'MANAGER':VARCHAR, 'SUPERVISOR':VARCHAR)," + + " 'ANALYST':VARCHAR, 'RESEARCHER':VARCHAR)], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, REPLACE(REPLACE(REPLACE(`JOB`, 'CLERK', 'EMPLOYEE')," + + " 'MANAGER', 'SUPERVISOR'), 'ANALYST', 'RESEARCHER') `JOB`, `MGR`, `HIREDATE`," + + " `SAL`, `COMM`, `DEPTNO`\n" + + "FROM `scott`.`EMP`"; + + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testReplaceWithMultiplePairsOnMultipleFields() { + // Test with multiple pattern/replacement pairs applied to multiple fields + String ppl = + "source=EMP | replace \"CLERK\" WITH \"EMPLOYEE\", \"MANAGER\" WITH \"SUPERVISOR\" IN JOB," + + " ENAME"; + RelNode root = getRelNode(ppl); + + // Should apply the same nested REPLACE calls to both JOB and ENAME fields + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[REPLACE(REPLACE($1, 'CLERK':VARCHAR," + + " 'EMPLOYEE':VARCHAR), 'MANAGER':VARCHAR, 'SUPERVISOR':VARCHAR)]," + + " JOB=[REPLACE(REPLACE($2, 'CLERK':VARCHAR, 'EMPLOYEE':VARCHAR)," + + " 'MANAGER':VARCHAR, 'SUPERVISOR':VARCHAR)], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, REPLACE(REPLACE(`ENAME`, 'CLERK', 'EMPLOYEE'), 'MANAGER'," + + " 'SUPERVISOR') `ENAME`, REPLACE(REPLACE(`JOB`, 'CLERK', 'EMPLOYEE'), 'MANAGER'," + + " 'SUPERVISOR') `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`\n" + + "FROM `scott`.`EMP`"; + + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testReplaceWithMultiplePairsSequentialApplication() { + // Test that replacements are applied sequentially + // This test demonstrates the order matters: if we have "20" WITH "30", "30" WITH "40" + // then "20" will become "30" first, then that "30" becomes "40", resulting in "40" + String ppl = "source=EMP | replace \"20\" WITH \"30\", \"30\" WITH \"40\" IN DEPTNO"; + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[REPLACE(REPLACE($7, '20':VARCHAR, '30':VARCHAR)," + + " '30':VARCHAR, '40':VARCHAR)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`," + + " REPLACE(REPLACE(`DEPTNO`, '20', '30'), '30', '40') `DEPTNO`\n" + + "FROM `scott`.`EMP`"; + + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test(expected = SyntaxCheckException.class) + public void testReplaceWithMultiplePairsMissingWithKeywordShouldFail() { + // Missing WITH keyword between pairs + String ppl = + "source=EMP | replace \"CLERK\" \"EMPLOYEE\", \"MANAGER\" WITH \"SUPERVISOR\" IN JOB"; + getRelNode(ppl); + } + + @Test(expected = SyntaxCheckException.class) + public void testReplaceWithMultiplePairsTrailingCommaShouldFail() { + // Trailing comma after last pair + String ppl = "source=EMP | replace \"CLERK\" WITH \"EMPLOYEE\", IN JOB"; + getRelNode(ppl); + } +} diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStringFunctionTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStringFunctionTest.java index d1c11aec0af..1e97052dea0 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStringFunctionTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStringFunctionTest.java @@ -285,12 +285,12 @@ public void testReplaceWithRegexPattern() { String ppl = "source=EMP | eval no_digits = replace(JOB, '\\\\d+', '') | fields JOB, no_digits"; RelNode root = getRelNode(ppl); String expectedLogical = - "LogicalProject(JOB=[$2], no_digits=[REGEXP_REPLACE($2, '\\\\d+':VARCHAR, '':VARCHAR)])\n" + "LogicalProject(JOB=[$2], no_digits=[REGEXP_REPLACE($2, '\\d+':VARCHAR, '':VARCHAR)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `JOB`, REGEXP_REPLACE(`JOB`, '\\\\d+', '') `no_digits`\n" + "FROM `scott`.`EMP`"; + "SELECT `JOB`, REGEXP_REPLACE(`JOB`, '\\d+', '') `no_digits`\n" + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -303,12 +303,12 @@ public void testReplaceWithRegexCaptureGroups() { RelNode root = getRelNode(ppl); String expectedLogical = "LogicalProject(ENAME=[$1], swapped=[REGEXP_REPLACE($1, '^(.)(.)':VARCHAR," - + " '\\$2\\$1')])\n" + + " '$2$1')])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `ENAME`, REGEXP_REPLACE(`ENAME`, '^(.)(.)', '\\$2\\$1') `swapped`\n" + "SELECT `ENAME`, REGEXP_REPLACE(`ENAME`, '^(.)(.)', '$2$1') `swapped`\n" + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java index 54438dda946..205867c9269 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java @@ -1255,4 +1255,16 @@ public void testMultisearchSingleSubsearchThrowsException() { "Multisearch command requires at least two subsearches. Provided: 1", exception.getMessage()); } + + @Test + public void testReplaceCommand() { + // Test basic single pattern replacement + plan("source=t | replace 'old' WITH 'new' IN field"); + } + + @Test + public void testReplaceCommandWithMultiplePairs() { + // Test multiple pattern/replacement pairs + plan("source=t | replace 'a' WITH 'A', 'b' WITH 'B' IN field"); + } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index 51b345a83e4..fc897594e9b 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -617,6 +617,49 @@ public void testGrok() { anonymize("source=t | grok email '.+@%{HOSTNAME:host}' | fields email, host")); } + @Test + public void testReplaceCommandSingleField() { + assertEquals( + "source=table | replace *** WITH *** IN Field(field=fieldname, fieldArgs=[])", + anonymize("source=EMP | replace \"value\" WITH \"newvalue\" IN fieldname")); + } + + @Test + public void testReplaceCommandMultipleFields() { + assertEquals( + "source=table | replace *** WITH *** IN Field(field=fieldname1, fieldArgs=[])," + + " Field(field=fieldname2, fieldArgs=[])", + anonymize("source=EMP | replace \"value\" WITH \"newvalue\" IN fieldname1, fieldname2")); + } + + @Test(expected = Exception.class) + public void testReplaceCommandWithoutInShouldFail() { + anonymize("source=EMP | replace \"value\" WITH \"newvalue\""); + } + + @Test + public void testReplaceCommandSpecialCharactersInFields() { + assertEquals( + "source=table | replace *** WITH *** IN Field(field=user.name, fieldArgs=[])," + + " Field(field=user.email, fieldArgs=[])", + anonymize("source=EMP | replace \"value\" WITH \"newvalue\" IN user.name, user.email")); + } + + @Test + public void testReplaceCommandWithWildcards() { + assertEquals( + "source=table | replace *** WITH *** IN Field(field=fieldname, fieldArgs=[])", + anonymize("source=EMP | replace \"CLERK*\" WITH \"EMPLOYEE*\" IN fieldname")); + } + + @Test + public void testReplaceCommandWithMultipleWildcards() { + assertEquals( + "source=table | replace *** WITH *** IN Field(field=fieldname1, fieldArgs=[])," + + " Field(field=fieldname2, fieldArgs=[])", + anonymize("source=EMP | replace \"*TEST*\" WITH \"*NEW*\" IN fieldname1, fieldname2")); + } + @Test public void testPatterns() { when(settings.getSettingValue(Key.PATTERN_METHOD)).thenReturn("SIMPLE_PATTERN");