From 6128a7e5b8d6cefcee0e64e5a01391200817e3c1 Mon Sep 17 00:00:00 2001 From: Manasvini B S Date: Wed, 3 Sep 2025 15:21:52 -0700 Subject: [PATCH 01/19] Add replace command with Calcite Signed-off-by: Manasvini B S # Conflicts: # core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java # docs/category.json # Conflicts: # docs/category.json --- .../org/opensearch/sql/analysis/Analyzer.java | 6 + .../sql/ast/AbstractNodeVisitor.java | 5 + .../org/opensearch/sql/ast/tree/Replace.java | 97 +++++++ .../sql/calcite/CalciteRelNodeVisitor.java | 36 +++ docs/category.json | 28 ++ docs/user/ppl/cmd/replace.rst | 107 ++++++++ docs/user/ppl/index.rst | 2 + .../sql/calcite/CalciteNoPushdownIT.java | 1 + .../remote/CalciteReplaceCommandIT.java | 250 ++++++++++++++++++ ppl/src/main/antlr/OpenSearchPPLParser.g4 | 6 + .../opensearch/sql/ppl/parser/AstBuilder.java | 20 ++ .../sql/ppl/utils/PPLQueryDataAnonymizer.java | 20 ++ .../ppl/calcite/CalcitePPLReplaceTest.java | 241 +++++++++++++++++ .../ppl/utils/PPLQueryDataAnonymizerTest.java | 43 +++ 14 files changed, 862 insertions(+) create mode 100644 core/src/main/java/org/opensearch/sql/ast/tree/Replace.java create mode 100644 docs/user/ppl/cmd/replace.rst create mode 100644 integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReplaceCommandIT.java create mode 100644 ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReplaceTest.java diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index 537b67d73d2..9f78b245942 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -85,6 +85,7 @@ import org.opensearch.sql.ast.tree.Relation; import org.opensearch.sql.ast.tree.RelationSubquery; import org.opensearch.sql.ast.tree.Rename; +import org.opensearch.sql.ast.tree.Replace; import org.opensearch.sql.ast.tree.Reverse; import org.opensearch.sql.ast.tree.Rex; import org.opensearch.sql.ast.tree.SPath; @@ -801,6 +802,11 @@ public LogicalPlan visitCloseCursor(CloseCursor closeCursor, AnalysisContext con return new LogicalCloseCursor(closeCursor.getChild().get(0).accept(this, context)); } + @Override + public LogicalPlan visitReplace(Replace node, AnalysisContext context) { + throw getOnlyForCalciteException("Replace"); + } + @Override public LogicalPlan visitJoin(Join node, AnalysisContext context) { throw getOnlyForCalciteException("Join"); diff --git a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java index e444b040763..f5d2a1623b3 100644 --- a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java @@ -73,6 +73,7 @@ import org.opensearch.sql.ast.tree.Relation; import org.opensearch.sql.ast.tree.RelationSubquery; import org.opensearch.sql.ast.tree.Rename; +import org.opensearch.sql.ast.tree.Replace; import org.opensearch.sql.ast.tree.Reverse; import org.opensearch.sql.ast.tree.Rex; import org.opensearch.sql.ast.tree.SPath; @@ -245,6 +246,10 @@ public T visitRename(Rename node, C context) { return visitChildren(node, context); } + public T visitReplace(Replace node, C context) { + return visitChildren(node, context); + } + public T visitEval(Eval node, C context) { return visitChildren(node, context); } diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Replace.java b/core/src/main/java/org/opensearch/sql/ast/tree/Replace.java new file mode 100644 index 00000000000..0bc43854078 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Replace.java @@ -0,0 +1,97 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ast.tree; + +import com.google.common.collect.ImmutableList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.Setter; +import lombok.ToString; +import org.opensearch.sql.ast.AbstractNodeVisitor; +import org.opensearch.sql.ast.expression.DataType; +import org.opensearch.sql.ast.expression.Field; +import org.opensearch.sql.ast.expression.Literal; +import org.opensearch.sql.ast.expression.UnresolvedExpression; + +@Getter +@Setter +@ToString +@EqualsAndHashCode(callSuper = false) +public class Replace extends UnresolvedPlan { + private final UnresolvedExpression pattern; + private final UnresolvedExpression replacement; + private final List fieldList; + private UnresolvedPlan child; + + public Replace( + UnresolvedExpression pattern, UnresolvedExpression replacement, List fieldList) { + this.pattern = pattern; + this.replacement = replacement; + this.fieldList = fieldList; + validate(); + } + + public void validate() { + if (pattern == null) { + throw new IllegalArgumentException("Pattern expression cannot be null in Replace command"); + } + if (replacement == null) { + throw new IllegalArgumentException( + "Replacement expression cannot be null in Replace command"); + } + + // Validate pattern is a string literal + if (!(pattern instanceof Literal && ((Literal) pattern).getType() == DataType.STRING)) { + throw new IllegalArgumentException("Pattern must be a string literal in Replace command"); + } + + // Validate replacement is a string literal + if (!(replacement instanceof Literal && ((Literal) replacement).getType() == DataType.STRING)) { + throw new IllegalArgumentException("Replacement must be a string literal in Replace command"); + } + + if (fieldList == null || fieldList.isEmpty()) { + throw new IllegalArgumentException( + "Field list cannot be empty in Replace command. Use IN clause to specify the field."); + } + + Set uniqueFields = new HashSet<>(); + List duplicates = + fieldList.stream() + .map(field -> field.getField().toString()) + .filter(fieldName -> !uniqueFields.add(fieldName)) + .collect(Collectors.toList()); + + if (!duplicates.isEmpty()) { + throw new IllegalArgumentException( + String.format("Duplicate fields [%s] in Replace command", String.join(", ", duplicates))); + } + } + + @Override + public Replace attach(UnresolvedPlan child) { + if (null == this.child) { + this.child = child; + } else { + this.child.attach(child); + } + return this; + } + + @Override + public List getChild() { + return this.child == null ? ImmutableList.of() : ImmutableList.of(this.child); + } + + @Override + public T accept(AbstractNodeVisitor nodeVisitor, C context) { + return nodeVisitor.visitReplace(this, context); + } +} diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index f161b9fc4ab..a74db231075 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -121,6 +121,7 @@ import org.opensearch.sql.ast.tree.Regex; import org.opensearch.sql.ast.tree.Relation; import org.opensearch.sql.ast.tree.Rename; +import org.opensearch.sql.ast.tree.Replace; import org.opensearch.sql.ast.tree.Rex; import org.opensearch.sql.ast.tree.SPath; import org.opensearch.sql.ast.tree.Search; @@ -157,6 +158,7 @@ public class CalciteRelNodeVisitor extends AbstractNodeVisitor fieldNames = context.relBuilder.peek().getRowType().getFieldNames(); + RexNode patternNode = rexVisitor.analyze(node.getPattern(), context); + RexNode replacementNode = rexVisitor.analyze(node.getReplacement(), context); + + List projectList = new ArrayList<>(); + List newFieldNames = new ArrayList<>(); + + // First add all original fields + for (String fieldName : fieldNames) { + RexNode fieldRef = context.relBuilder.field(fieldName); + projectList.add(fieldRef); + newFieldNames.add(fieldName); + } + + // Then add new fields with replaced content using new_ prefix + for (Field field : node.getFieldList()) { + String fieldName = field.getField().toString(); + RexNode fieldRef = context.relBuilder.field(fieldName); + + RexNode replaceCall = + context.relBuilder.call( + SqlStdOperatorTable.REPLACE, fieldRef, patternNode, replacementNode); + projectList.add(replaceCall); + newFieldNames.add(NEW_FIELD_PREFIX + fieldName); + } + + context.relBuilder.project(projectList, newFieldNames); + return context.relBuilder.peek(); + } + private void buildParseRelNode(Parse node, CalcitePlanContext context) { RexNode sourceField = rexVisitor.analyze(node.getSourceField(), context); ParseMethod parseMethod = node.getParseMethod(); diff --git a/docs/category.json b/docs/category.json index cd24a9e1213..95e3a004195 100644 --- a/docs/category.json +++ b/docs/category.json @@ -2,6 +2,7 @@ "bash": [ "user/ppl/interfaces/endpoint.rst", "user/ppl/interfaces/protocol.rst", + "user/ppl/admin/settings.rst", "user/optimization/optimization.rst", "user/admin/settings.rst" ], @@ -61,6 +62,33 @@ "user/ppl/functions/json.rst", "user/ppl/functions/math.rst", "user/ppl/functions/relevance.rst", + "user/ppl/functions/string.rst" + ], + "sql_cli": [ + "user/dql/expressions.rst", + "user/general/comments.rst", + "user/general/datatypes.rst", + "user/general/identifiers.rst", + "user/general/values.rst", + "user/dql/basics.rst", + "user/dql/functions.rst", + "user/dql/window.rst", + "user/beyond/partiql.rst", + "user/dql/aggregations.rst", + "user/dql/complex.rst", + "user/dql/metadata.rst" + ], + "ppl_cli_calcite": [ + "user/ppl/cmd/append.rst", + "user/ppl/cmd/eventstats.rst", + "user/ppl/cmd/fields.rst", + "user/ppl/cmd/regex.rst", + "user/ppl/cmd/rename.rst", + "user/ppl/cmd/rex.rst", + "user/ppl/cmd/stats.rst", + "user/ppl/cmd/timechart.rst", + "user/ppl/cmd/search.rst", + "user/ppl/cmd/replace.rst" "user/ppl/functions/string.rst", "user/ppl/general/datatypes.rst", "user/ppl/general/identifiers.rst" diff --git a/docs/user/ppl/cmd/replace.rst b/docs/user/ppl/cmd/replace.rst new file mode 100644 index 00000000000..f2a193e54ec --- /dev/null +++ b/docs/user/ppl/cmd/replace.rst @@ -0,0 +1,107 @@ +============= +replace +============= + +.. rubric:: Table of contents + +.. contents:: + :local: + :depth: 2 + + +Description +============ +| Using ``replace`` command to replace text in one or more fields in the search result. +* The command creates new fields with *new_* prefix for replaced content (e.g., replacing text in 'country' creates 'new_country') +* If a field with *new_* prefix already exists (e.g., 'new_country'), a number will be appended to create a unique field name (e.g., 'new_country0') + + +Version +======= +3.3.0 + + +Syntax +============ +replace '' WITH '' IN [, ]... + +Note: This command is only available when Calcite engine is enabled. + +* pattern: mandatory. The text pattern you want to replace. Currently supports only plain text literals (no wildcards or regular expressions). +* replacement: mandatory. The text you want to replace with. +* field list: mandatory. One or more field names where the replacement should occur. + + +Example 1: Replace text in one field +==================================== + +The example shows replacing text in one field. + +PPL query:: + + os> source=accounts | replace "IL" WITH "Illinois" IN state | fields state, new_state; + fetched rows / total rows = 4/4 + +-------+-----------+ + | state | new_state | + |-------+-----------| + | IL | Illinois | + | TN | TN | + | VA | VA | + | MD | MD | + +-------+-----------+ + + +Example 2: Replace text in multiple fields +========================================== + +The example shows replacing text in multiple fields. + +PPL query:: + + os> source=accounts | replace "IL" WITH "Illinois" IN state, address | fields state, address, new_state, new_address; + fetched rows / total rows = 4/4 + +-------+----------------------+-----------+----------------------+ + | state | address | new_state | new_address | + |-------+----------------------+-----------+----------------------| + | IL | 880 Holmes Lane | Illinois | 880 Holmes Lane | + | TN | 671 Bristol Street | TN | 671 Bristol Street | + | VA | 789 Madison Street | VA | 789 Madison Street | + | MD | 467 Hutchinson Court | MD | 467 Hutchinson Court | + +-------+----------------------+-----------+----------------------+ + + +Example 3: Replace with IN clause and other commands +==================================================== + +The example shows using replace with other commands. + +PPL query:: + + os> source=accounts | replace "IL" WITH "Illinois" IN state | where age > 30 | fields state, age, new_state; + fetched rows / total rows = 3/3 + +-------+-----+-----------+ + | state | age | new_state | + |-------+-----+-----------| + | IL | 32 | Illinois | + | TN | 36 | TN | + | MD | 33 | MD | + +-------+-----+-----------+ + +Example 4: Pattern matching with LIKE and replace +================================================= + +Since replace command only supports plain string literals, you can use LIKE command with replace for pattern matching needs. + +PPL query:: + + os> source=accounts | where LIKE(address, '%Holmes%') | replace "Holmes" WITH "HOLMES" IN address | fields address, state, gender, age, city, new_address; + fetched rows / total rows = 1/1 + +-----------------+-------+--------+-----+--------+-----------------+ + | address | state | gender | age | city | new_address | + |-----------------+-------+--------+-----+--------+-----------------| + | 880 Holmes Lane | IL | M | 32 | Brogan | 880 HOLMES Lane | + +-----------------+-------+--------+-----+--------+-----------------+ + +Note +==== +* For each field specified in the IN clause, a new field is created with prefix *new_* containing the replaced text. The original fields remain unchanged. \ No newline at end of file diff --git a/docs/user/ppl/index.rst b/docs/user/ppl/index.rst index 36065997a42..17b4797df39 100644 --- a/docs/user/ppl/index.rst +++ b/docs/user/ppl/index.rst @@ -124,6 +124,8 @@ The query start with search command and then flowing a set of command delimited - `trendline command `_ + - `replace command `_ + - `where command `_ * **Functions** diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java index 3a512ca635f..69507c71aa5 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java @@ -89,6 +89,7 @@ CalciteRegexCommandIT.class, CalciteRexCommandIT.class, CalciteRenameCommandIT.class, + CalciteReplaceCommandIT.class, CalciteResourceMonitorIT.class, CalciteSearchCommandIT.class, CalciteSettingsIT.class, diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReplaceCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReplaceCommandIT.java new file mode 100644 index 00000000000..34eb31d97d8 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReplaceCommandIT.java @@ -0,0 +1,250 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.remote; + +import static org.opensearch.sql.legacy.TestsConstants.*; +import static org.opensearch.sql.util.MatcherUtils.*; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.Test; +import org.opensearch.sql.common.antlr.SyntaxCheckException; +import org.opensearch.sql.ppl.PPLIntegTestCase; + +public class CalciteReplaceCommandIT extends PPLIntegTestCase { + + public void init() throws Exception { + super.init(); + enableCalcite(); + disallowCalciteFallback(); + loadIndex(Index.STATE_COUNTRY); + } + + @Test + public void testReplaceWithFields() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source = %s | replace 'USA' WITH 'United States' IN country | fields name, age," + + " new_country", + TEST_INDEX_STATE_COUNTRY)); + + verifySchema( + result, schema("name", "string"), schema("age", "int"), schema("new_country", "string")); + + verifyDataRows( + result, + rows("Jake", 70, "United States"), + rows("Hello", 30, "United States"), + rows("John", 25, "Canada"), + rows("Jane", 20, "Canada")); + } + + @Test + public void testMultipleReplace() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source = %s | replace 'USA' WITH 'United States' IN country | replace 'Jane' WITH" + + " 'Joseph' IN name", + TEST_INDEX_STATE_COUNTRY)); + + verifySchema( + result, + schema("name", "string"), + schema("age", "int"), + schema("state", "string"), + schema("country", "string"), + schema("year", "int"), + schema("month", "int"), + schema("new_name", "string"), + schema("new_country", "string")); + + verifyDataRows( + result, + rows("Jake", "USA", "California", 4, 2023, 70, "United States", "Jake"), + rows("Hello", "USA", "New York", 4, 2023, 30, "United States", "Hello"), + rows("John", "Canada", "Ontario", 4, 2023, 25, "Canada", "John"), + rows("Jane", "Canada", "Quebec", 4, 2023, 20, "Canada", "Joseph")); + } + + @Test + public void testReplaceWithSort() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source = %s | replace 'US' WITH 'United States' IN country | sort new_country", + TEST_INDEX_STATE_COUNTRY)); + + verifySchema( + result, + schema("name", "string"), + schema("age", "int"), + schema("state", "string"), + schema("country", "string"), + schema("year", "int"), + schema("month", "int"), + schema("new_country", "string")); + } + + @Test + public void testReplaceWithWhereClause() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source = %s | where country = 'US' | replace 'US' WITH 'United States' IN country", + TEST_INDEX_STATE_COUNTRY)); + + verifySchema( + result, + schema("name", "string"), + schema("age", "int"), + schema("state", "string"), + schema("country", "string"), + schema("year", "int"), + schema("month", "int"), + schema("new_country", "string")); + } + + @Test + public void testEmptyStringReplacement() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source = %s | replace 'USA' WITH '' IN country", TEST_INDEX_STATE_COUNTRY)); + + verifySchema( + result, + schema("name", "string"), + schema("age", "int"), + schema("state", "string"), + schema("country", "string"), + schema("year", "int"), + schema("month", "int"), + schema("new_country", "string")); + + verifyDataRows( + result, + rows("Jake", "USA", "California", 4, 2023, 70, ""), + rows("Hello", "USA", "New York", 4, 2023, 30, ""), + rows("John", "Canada", "Ontario", 4, 2023, 25, "Canada"), + rows("Jane", "Canada", "Quebec", 4, 2023, 20, "Canada")); + } + + @Test + public void testMultipleFieldsInClause() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source = %s | replace 'USA' WITH 'United States' IN country,state", + TEST_INDEX_STATE_COUNTRY)); + + verifySchema( + result, + schema("name", "string"), + schema("age", "int"), + schema("state", "string"), + schema("country", "string"), + schema("year", "int"), + schema("month", "int"), + schema("new_state", "string"), + schema("new_country", "string")); + + verifyDataRows( + result, + rows("Jake", "USA", "California", 4, 2023, 70, "United States", "California"), + rows("Hello", "USA", "New York", 4, 2023, 30, "United States", "New York"), + rows("John", "Canada", "Ontario", 4, 2023, 25, "Canada", "Ontario"), + rows("Jane", "Canada", "Quebec", 4, 2023, 20, "Canada", "Quebec")); + } + + @Test + public void testReplaceNonExistentField() { + Throwable e = + assertThrowsWithReplace( + IllegalArgumentException.class, + () -> + executeQuery( + String.format( + "source = %s | replace 'USA' WITH 'United States' IN non_existent_field", + TEST_INDEX_STATE_COUNTRY))); + verifyErrorMessageContains( + e, + "field [non_existent_field] not found; input fields are: [name, country, state, month," + + " year, age, _id, _index, _score, _maxscore, _sort, _routing]"); + } + + @Test + public void testReplaceAfterFieldRemoved() { + Throwable e = + assertThrowsWithReplace( + IllegalArgumentException.class, + () -> + executeQuery( + String.format( + "source = %s | fields name, age | replace 'USA' WITH 'United States' IN" + + " country", + TEST_INDEX_STATE_COUNTRY))); + verifyErrorMessageContains(e, "field [country] not found; input fields are: [name, age]"); + } + + @Test + public void testMissingInClause() { + Throwable e = + assertThrowsWithReplace( + SyntaxCheckException.class, + () -> + executeQuery( + String.format( + "source = %s | replace 'USA' WITH 'United States'", + TEST_INDEX_STATE_COUNTRY))); + + verifyErrorMessageContains(e, "[] is not a valid term at this part of the query"); + verifyErrorMessageContains(e, "Expecting tokens: 'IN'"); + } + + @Test + public void testDuplicateFieldsInReplace() { + Throwable e = + assertThrowsWithReplace( + IllegalArgumentException.class, + () -> + executeQuery( + String.format( + "source = %s | replace 'USA' WITH 'United States' IN country, state," + + " country", + TEST_INDEX_STATE_COUNTRY))); + verifyErrorMessageContains(e, "Duplicate fields [country] in Replace command"); + } + + @Test + public void testNonStringLiteralPattern() { + Throwable e = + assertThrowsWithReplace( + SyntaxCheckException.class, + () -> + executeQuery( + String.format( + "source = %s | replace 23 WITH 'test' IN field1", + TEST_INDEX_STATE_COUNTRY))); + verifyErrorMessageContains(e, "is not a valid term at this part of the query"); + verifyErrorMessageContains(e, "Expecting tokens: DQUOTA_STRING, SQUOTA_STRING"); + } + + @Test + public void testNonStringLiteralReplacement() { + Throwable e = + assertThrowsWithReplace( + SyntaxCheckException.class, + () -> + executeQuery( + String.format( + "source = %s | replace 'test' WITH 45 IN field1", + TEST_INDEX_STATE_COUNTRY))); + verifyErrorMessageContains(e, "is not a valid term at this part of the query"); + verifyErrorMessageContains(e, "Expecting tokens: DQUOTA_STRING, SQUOTA_STRING"); + } +} diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index e13447b68e9..d3c3cf63210 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -79,6 +79,7 @@ commands | regexCommand | timechartCommand | rexCommand + | replaceCommand ; commandName @@ -117,6 +118,7 @@ commandName | APPEND | MULTISEARCH | REX + | REPLACE ; searchCommand @@ -204,6 +206,10 @@ renameCommand : RENAME renameClasue (COMMA? renameClasue)* ; +replaceCommand + : REPLACE pattern=stringLiteral WITH replacement=stringLiteral IN fieldList + ; + statsCommand : STATS statsArgs statsAggTerm (COMMA statsAggTerm)* (statsByClause)? (dedupSplitArg)? ; diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index e84e85a9e8e..8b530772741 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -76,6 +76,7 @@ import org.opensearch.sql.ast.tree.Regex; import org.opensearch.sql.ast.tree.Relation; import org.opensearch.sql.ast.tree.Rename; +import org.opensearch.sql.ast.tree.Replace; import org.opensearch.sql.ast.tree.Reverse; import org.opensearch.sql.ast.tree.Rex; import org.opensearch.sql.ast.tree.SPath; @@ -381,6 +382,25 @@ public UnresolvedPlan visitRenameCommand(RenameCommandContext ctx) { .collect(Collectors.toList())); } + /** Replace command. */ + @Override + public UnresolvedPlan visitReplaceCommand(OpenSearchPPLParser.ReplaceCommandContext ctx) { + UnresolvedExpression pattern = internalVisitExpression(ctx.pattern); + UnresolvedExpression replacement = internalVisitExpression(ctx.replacement); + + List fieldList = + ctx.fieldList().fieldExpression().stream() + .map(field -> (Field) internalVisitExpression(field)) + .collect(Collectors.toList()); + + return new Replace(pattern, replacement, fieldList); + } + + private String removeQuotes(String text) { + // Remove both single and double quotes + return text.replaceAll("^[\"']|[\"']$", ""); + } + /** Stats command. */ @Override public UnresolvedPlan visitStatsCommand(StatsCommandContext ctx) { diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java index 871f8dc4713..ac15076ed1d 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java @@ -78,6 +78,7 @@ import org.opensearch.sql.ast.tree.Regex; import org.opensearch.sql.ast.tree.Relation; import org.opensearch.sql.ast.tree.Rename; +import org.opensearch.sql.ast.tree.Replace; import org.opensearch.sql.ast.tree.Reverse; import org.opensearch.sql.ast.tree.Rex; import org.opensearch.sql.ast.tree.SPath; @@ -278,6 +279,25 @@ public String visitRename(Rename node, String context) { return StringUtils.format("%s | rename %s", child, renames); } + @Override + public String visitReplace(Replace node, String context) { + // Get the child query string + String child = node.getChild().get(0).accept(this, context); + + // Get pattern and replacement expressions + String pattern = visitExpression(node.getPattern()); + String replacement = visitExpression(node.getReplacement()); + + // Get field list + String fieldListStr = + " IN " + + node.getFieldList().stream().map(Field::toString).collect(Collectors.joining(", ")); + + // Build the replace command string + return StringUtils.format( + "%s | replace %s WITH %s%s", child, pattern, replacement, fieldListStr); + } + /** Build {@link LogicalAggregation}. */ @Override public String visitAggregation(Aggregation node, String context) { diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReplaceTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReplaceTest.java new file mode 100644 index 00000000000..95d37e9dcf7 --- /dev/null +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReplaceTest.java @@ -0,0 +1,241 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ppl.calcite; + +import java.util.Collections; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.test.CalciteAssert; +import org.junit.Test; +import org.opensearch.sql.ast.expression.DataType; +import org.opensearch.sql.ast.expression.Literal; +import org.opensearch.sql.ast.tree.Replace; + +public class CalcitePPLReplaceTest extends CalcitePPLAbstractTest { + + public CalcitePPLReplaceTest() { + super(CalciteAssert.SchemaSpec.SCOTT_WITH_TEMPORAL); + } + + @Test + public void testBasicReplace() { + String ppl = "source=EMP | replace \"CLERK\" WITH \"EMPLOYEE\" IN JOB"; + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7], new_JOB=[REPLACE($2, 'CLERK':VARCHAR," + + " 'EMPLOYEE':VARCHAR)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, " + + "REPLACE(`JOB`, 'CLERK', 'EMPLOYEE') `new_JOB`\n" + + "FROM `scott`.`EMP`"; + + verifyPPLToSparkSQL(root, expectedSparkSql); + + String expectedResult = + "EMPNO=7369; ENAME=SMITH; JOB=CLERK; MGR=7902; HIREDATE=1980-12-17; SAL=800.00; COMM=null;" + + " DEPTNO=20; new_JOB=EMPLOYEE\n" + + "EMPNO=7499; ENAME=ALLEN; JOB=SALESMAN; MGR=7698; HIREDATE=1981-02-20; SAL=1600.00;" + + " COMM=300.00; DEPTNO=30; new_JOB=SALESMAN\n" + + "EMPNO=7521; ENAME=WARD; JOB=SALESMAN; MGR=7698; HIREDATE=1981-02-22; SAL=1250.00;" + + " COMM=500.00; DEPTNO=30; new_JOB=SALESMAN\n" + + "EMPNO=7566; ENAME=JONES; JOB=MANAGER; MGR=7839; HIREDATE=1981-02-04; SAL=2975.00;" + + " COMM=null; DEPTNO=20; new_JOB=MANAGER\n" + + "EMPNO=7654; ENAME=MARTIN; JOB=SALESMAN; MGR=7698; HIREDATE=1981-09-28; SAL=1250.00;" + + " COMM=1400.00; DEPTNO=30; new_JOB=SALESMAN\n" + + "EMPNO=7698; ENAME=BLAKE; JOB=MANAGER; MGR=7839; HIREDATE=1981-01-05; SAL=2850.00;" + + " COMM=null; DEPTNO=30; new_JOB=MANAGER\n" + + "EMPNO=7782; ENAME=CLARK; JOB=MANAGER; MGR=7839; HIREDATE=1981-06-09; SAL=2450.00;" + + " COMM=null; DEPTNO=10; new_JOB=MANAGER\n" + + "EMPNO=7788; ENAME=SCOTT; JOB=ANALYST; MGR=7566; HIREDATE=1987-04-19; SAL=3000.00;" + + " COMM=null; DEPTNO=20; new_JOB=ANALYST\n" + + "EMPNO=7839; ENAME=KING; JOB=PRESIDENT; MGR=null; HIREDATE=1981-11-17; SAL=5000.00;" + + " COMM=null; DEPTNO=10; new_JOB=PRESIDENT\n" + + "EMPNO=7844; ENAME=TURNER; JOB=SALESMAN; MGR=7698; HIREDATE=1981-09-08; SAL=1500.00;" + + " COMM=0.00; DEPTNO=30; new_JOB=SALESMAN\n" + + "EMPNO=7876; ENAME=ADAMS; JOB=CLERK; MGR=7788; HIREDATE=1987-05-23; SAL=1100.00;" + + " COMM=null; DEPTNO=20; new_JOB=EMPLOYEE\n" + + "EMPNO=7900; ENAME=JAMES; JOB=CLERK; MGR=7698; HIREDATE=1981-12-03; SAL=950.00;" + + " COMM=null; DEPTNO=30; new_JOB=EMPLOYEE\n" + + "EMPNO=7902; ENAME=FORD; JOB=ANALYST; MGR=7566; HIREDATE=1981-12-03; SAL=3000.00;" + + " COMM=null; DEPTNO=20; new_JOB=ANALYST\n" + + "EMPNO=7934; ENAME=MILLER; JOB=CLERK; MGR=7782; HIREDATE=1982-01-23; SAL=1300.00;" + + " COMM=null; DEPTNO=10; new_JOB=EMPLOYEE\n"; + + verifyResult(root, expectedResult); + } + + @Test + public void testMultipleFieldsReplace() { + String ppl = + "source=EMP | replace \"CLERK\" WITH \"EMPLOYEE\" IN JOB | replace \"20\" WITH \"RESEARCH\"" + + " IN DEPTNO"; + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7], new_JOB=[REPLACE($2, 'CLERK':VARCHAR, 'EMPLOYEE':VARCHAR)]," + + " new_DEPTNO=[REPLACE($7, '20':VARCHAR, 'RESEARCH':VARCHAR)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, " + + "REPLACE(`JOB`, 'CLERK', 'EMPLOYEE') `new_JOB`, " + + "REPLACE(`DEPTNO`, '20', 'RESEARCH') `new_DEPTNO`\n" + + "FROM `scott`.`EMP`"; + + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testReplaceSameValueInMultipleFields() { + // In EMP table, both JOB and MGR fields contain numeric values + String ppl = "source=EMP | replace \"7839\" WITH \"CEO\" IN MGR, EMPNO"; + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7], new_MGR=[REPLACE($3, '7839':VARCHAR, 'CEO':VARCHAR)]," + + " new_EMPNO=[REPLACE($0, '7839':VARCHAR, 'CEO':VARCHAR)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, " + + "REPLACE(`MGR`, '7839', 'CEO') `new_MGR`, " + + "REPLACE(`EMPNO`, '7839', 'CEO') `new_EMPNO`\n" + + "FROM `scott`.`EMP`"; + + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testReplaceWithPipeline() { + String ppl = + "source=EMP | where JOB = 'CLERK' | replace \"CLERK\" WITH \"EMPLOYEE\" IN JOB | sort SAL"; + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalSort(sort0=[$5], dir0=[ASC-nulls-first])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], new_JOB=[REPLACE($2, 'CLERK':VARCHAR," + + " 'EMPLOYEE':VARCHAR)])\n" + + " LogicalFilter(condition=[=($2, 'CLERK':VARCHAR)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, " + + "REPLACE(`JOB`, 'CLERK', 'EMPLOYEE') `new_JOB`\n" + + "FROM `scott`.`EMP`\n" + + "WHERE `JOB` = 'CLERK'\n" + + "ORDER BY `SAL`"; + + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test(expected = Exception.class) + public void testReplaceWithoutWithKeywordShouldFail() { + String ppl = "source=EMP | replace \"CLERK\" \"EMPLOYEE\" IN JOB"; + getRelNode(ppl); + } + + @Test(expected = Exception.class) + public void testReplaceWithoutInKeywordShouldFail() { + String ppl = "source=EMP | replace \"CLERK\" WITH \"EMPLOYEE\" JOB"; + getRelNode(ppl); + } + + @Test(expected = RuntimeException.class) + public void testReplaceWithExpressionShouldFail() { + String ppl = "source=EMP | replace EMPNO + 1 WITH \"EMPLOYEE\" IN JOB"; + getRelNode(ppl); + } + + @Test(expected = Exception.class) + public void testReplaceWithInvalidFieldShouldFail() { + String ppl = "source=EMP | replace \"CLERK\" WITH \"EMPLOYEE\" IN INVALID_FIELD"; + getRelNode(ppl); + } + + @Test(expected = Exception.class) + public void testReplaceWithMultipleInKeywordsShouldFail() { + String ppl = "source=EMP | replace \"CLERK\" WITH \"EMPLOYEE\" IN JOB IN ENAME"; + getRelNode(ppl); + } + + @Test(expected = Exception.class) + public void testReplaceWithMissingQuotesShouldFail() { + String ppl = "source=EMP | replace CLERK WITH EMPLOYEE IN JOB"; + getRelNode(ppl); + } + + @Test(expected = Exception.class) + public void testReplaceWithMissingReplacementValueShouldFail() { + String ppl = "source=EMP | replace \"CLERK\" WITH IN JOB"; + getRelNode(ppl); + } + + @Test(expected = IllegalArgumentException.class) + public void testReplaceWithNullPatternShouldFail() { + Replace replace = + new Replace(null, new Literal("EMPLOYEE", DataType.STRING), Collections.emptyList()); + replace.validate(); + } + + @Test(expected = IllegalArgumentException.class) + public void testReplaceWithNullReplacementShouldFail() { + Replace replace = + new Replace(new Literal("CLERK", DataType.STRING), null, Collections.emptyList()); + replace.validate(); + } + + @Test(expected = IllegalArgumentException.class) + public void testReplaceWithNonStringPatternShouldFail() { + Replace replace = + new Replace( + new Literal(123, DataType.INTEGER), + new Literal("EMPLOYEE", DataType.STRING), + Collections.emptyList()); + replace.validate(); + } + + @Test(expected = IllegalArgumentException.class) + public void testReplaceWithNonStringReplacementShouldFail() { + Replace replace = + new Replace( + new Literal("CLERK", DataType.STRING), + new Literal(456, DataType.INTEGER), + Collections.emptyList()); + replace.validate(); + } + + @Test(expected = IllegalArgumentException.class) + public void testReplaceWithNullFieldListShouldFail() { + Replace replace = + new Replace( + new Literal("CLERK", DataType.STRING), new Literal("EMPLOYEE", DataType.STRING), null); + replace.validate(); + } + + @Test(expected = IllegalArgumentException.class) + public void testReplaceWithEmptyFieldListShouldFail() { + Replace replace = + new Replace( + new Literal("CLERK", DataType.STRING), + new Literal("EMPLOYEE", DataType.STRING), + Collections.emptyList()); + replace.validate(); + } +} diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index aa5987a4472..5d9f94dbc4e 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -617,6 +617,49 @@ public void testGrok() { anonymize("source=t | grok email '.+@%{HOSTNAME:host}' | fields email, host")); } + @Test + public void testReplaceCommandSingleField() { + assertEquals( + "source=EMP | replace *** WITH *** IN Field(field=fieldname, fieldArgs=[])", + anonymize("source=EMP | replace \"value\" WITH \"newvalue\" IN fieldname")); + } + + @Test + public void testReplaceCommandMultipleFields() { + assertEquals( + "source=EMP | replace *** WITH *** IN Field(field=fieldname1, fieldArgs=[])," + + " Field(field=fieldname2, fieldArgs=[])", + anonymize("source=EMP | replace \"value\" WITH \"newvalue\" IN fieldname1, fieldname2")); + } + + @Test(expected = Exception.class) + public void testReplaceCommandWithoutInShouldFail() { + anonymize("source=EMP | replace \"value\" WITH \"newvalue\""); + } + + @Test + public void testReplaceCommandSpecialCharactersInFields() { + assertEquals( + "source=EMP | replace *** WITH *** IN Field(field=user.name, fieldArgs=[])," + + " Field(field=user.email, fieldArgs=[])", + anonymize("source=EMP | replace \"value\" WITH \"newvalue\" IN user.name, user.email")); + } + + @Test + public void testReplaceCommandWithWildcards() { + assertEquals( + "source=EMP | replace *** WITH *** IN Field(field=fieldname, fieldArgs=[])", + anonymize("source=EMP | replace \"CLERK*\" WITH \"EMPLOYEE*\" IN fieldname")); + } + + @Test + public void testReplaceCommandWithMultipleWildcards() { + assertEquals( + "source=EMP | replace *** WITH *** IN Field(field=fieldname1, fieldArgs=[])," + + " Field(field=fieldname2, fieldArgs=[])", + anonymize("source=EMP | replace \"*TEST*\" WITH \"*NEW*\" IN fieldname1, fieldname2")); + } + @Test public void testPatterns() { when(settings.getSettingValue(Key.PATTERN_METHOD)).thenReturn("SIMPLE_PATTERN"); From 00eee9ecfe899e54e9bc574cd4b473d1eddbfeb1 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Tue, 7 Oct 2025 14:49:11 -0700 Subject: [PATCH 02/19] fix anonymizer test and add explainIT Signed-off-by: Kai Huang # Conflicts: # integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java --- .../sql/calcite/remote/CalciteExplainIT.java | 11 ++++++++ .../calcite/explain_replace_command.yaml | 8 ++++++ .../explain_replace_command.yaml | 9 +++++++ .../ppl/calcite/CalcitePPLReplaceTest.java | 25 +++++++++++++++++++ .../ppl/utils/PPLQueryDataAnonymizerTest.java | 10 ++++---- 5 files changed, 58 insertions(+), 5 deletions(-) create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_replace_command.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_replace_command.yaml diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index bf12f466aeb..b69c9224d83 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -1134,4 +1134,15 @@ public void testJoinWithPushdownSortIntoAgg() throws IOException { TEST_INDEX_ACCOUNT, TEST_INDEX_ACCOUNT))); resetJoinSubsearchMaxOut(); } + + @Test + public void testReplaceCommandExplain() throws IOException { + String expected = loadExpectedPlan("explain_replace_command.yaml"); + assertYamlEqualsJsonIgnoreId( + expected, + explainQueryToString( + String.format( + "source=%s | replace 'IL' WITH 'Illinois' IN state | fields state, new_state", + TEST_INDEX_ACCOUNT))); + } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_replace_command.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_replace_command.yaml new file mode 100644 index 00000000000..85737d90c15 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_replace_command.yaml @@ -0,0 +1,8 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(state=[$7], new_state=[REPLACE($7, 'IL':VARCHAR, 'Illinois':VARCHAR)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableCalc(expr#0=[{inputs}], expr#1=['IL':VARCHAR], expr#2=['Illinois':VARCHAR], expr#3=[REPLACE($t0, $t1, $t2)], state=[$t0], $f1=[$t3]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["state"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_replace_command.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_replace_command.yaml new file mode 100644 index 00000000000..6a40f67465d --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_replace_command.yaml @@ -0,0 +1,9 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(state=[$7], new_state=[REPLACE($7, 'IL':VARCHAR, 'Illinois':VARCHAR)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..16=[{inputs}], expr#17=['IL':VARCHAR], expr#18=['Illinois':VARCHAR], expr#19=[REPLACE($t7, $t17, $t18)], state=[$t7], new_state=[$t19]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) \ No newline at end of file diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReplaceTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReplaceTest.java index 95d37e9dcf7..f3f7ae9da95 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReplaceTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReplaceTest.java @@ -238,4 +238,29 @@ public void testReplaceWithEmptyFieldListShouldFail() { Collections.emptyList()); replace.validate(); } + + @Test + public void testReplaceWithExistingNewFieldConflict() { + // Test verifies that Calcite's RelBuilder automatically handles duplicate field names + // by appending numbers (e.g., new_JOB becomes new_JOB0 when new_JOB already exists) + String ppl = + "source=EMP | eval new_JOB = 'existing' | replace \"CLERK\" WITH \"EMPLOYEE\" IN JOB"; + RelNode root = getRelNode(ppl); + + // Verify that Calcite automatically deduplicates field names + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7], new_JOB=['existing':VARCHAR], new_JOB0=[REPLACE($2," + + " 'CLERK':VARCHAR, 'EMPLOYEE':VARCHAR)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, " + + "'existing' `new_JOB`, REPLACE(`JOB`, 'CLERK', 'EMPLOYEE') `new_JOB0`\n" + + "FROM `scott`.`EMP`"; + + verifyPPLToSparkSQL(root, expectedSparkSql); + } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index 5d9f94dbc4e..6de9acacfe1 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -620,14 +620,14 @@ public void testGrok() { @Test public void testReplaceCommandSingleField() { assertEquals( - "source=EMP | replace *** WITH *** IN Field(field=fieldname, fieldArgs=[])", + "source=table | replace *** WITH *** IN Field(field=fieldname, fieldArgs=[])", anonymize("source=EMP | replace \"value\" WITH \"newvalue\" IN fieldname")); } @Test public void testReplaceCommandMultipleFields() { assertEquals( - "source=EMP | replace *** WITH *** IN Field(field=fieldname1, fieldArgs=[])," + "source=table | replace *** WITH *** IN Field(field=fieldname1, fieldArgs=[])," + " Field(field=fieldname2, fieldArgs=[])", anonymize("source=EMP | replace \"value\" WITH \"newvalue\" IN fieldname1, fieldname2")); } @@ -640,7 +640,7 @@ public void testReplaceCommandWithoutInShouldFail() { @Test public void testReplaceCommandSpecialCharactersInFields() { assertEquals( - "source=EMP | replace *** WITH *** IN Field(field=user.name, fieldArgs=[])," + "source=table | replace *** WITH *** IN Field(field=user.name, fieldArgs=[])," + " Field(field=user.email, fieldArgs=[])", anonymize("source=EMP | replace \"value\" WITH \"newvalue\" IN user.name, user.email")); } @@ -648,14 +648,14 @@ public void testReplaceCommandSpecialCharactersInFields() { @Test public void testReplaceCommandWithWildcards() { assertEquals( - "source=EMP | replace *** WITH *** IN Field(field=fieldname, fieldArgs=[])", + "source=table | replace *** WITH *** IN Field(field=fieldname, fieldArgs=[])", anonymize("source=EMP | replace \"CLERK*\" WITH \"EMPLOYEE*\" IN fieldname")); } @Test public void testReplaceCommandWithMultipleWildcards() { assertEquals( - "source=EMP | replace *** WITH *** IN Field(field=fieldname1, fieldArgs=[])," + "source=table | replace *** WITH *** IN Field(field=fieldname1, fieldArgs=[])," + " Field(field=fieldname2, fieldArgs=[])", anonymize("source=EMP | replace \"*TEST*\" WITH \"*NEW*\" IN fieldname1, fieldname2")); } From 1288c4433a7c560fd6d4c55f50673bc30777607a Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Tue, 7 Oct 2025 14:55:43 -0700 Subject: [PATCH 03/19] fix category.json Signed-off-by: Kai Huang --- docs/category.json | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/category.json b/docs/category.json index 95e3a004195..1f6593fe42a 100644 --- a/docs/category.json +++ b/docs/category.json @@ -41,6 +41,7 @@ "user/ppl/cmd/rare.rst", "user/ppl/cmd/regex.rst", "user/ppl/cmd/rename.rst", + "user/ppl/cmd/replace.rst", "user/ppl/cmd/rex.rst", "user/ppl/cmd/search.rst", "user/ppl/cmd/showdatasources.rst", From 3e70cbdc08a2089ad016ae0a91f371814bb23167 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Tue, 7 Oct 2025 14:56:35 -0700 Subject: [PATCH 04/19] fix category.json Signed-off-by: Kai Huang --- docs/category.json | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/category.json b/docs/category.json index 1f6593fe42a..33c3301cbe0 100644 --- a/docs/category.json +++ b/docs/category.json @@ -63,6 +63,9 @@ "user/ppl/functions/json.rst", "user/ppl/functions/math.rst", "user/ppl/functions/relevance.rst", + "user/ppl/functions/string.rst", + "user/ppl/general/datatypes.rst", + "user/ppl/general/identifiers.rst" "user/ppl/functions/string.rst" ], "sql_cli": [ From 1790fd8ab85ce417e8072b704d54d1ab6e991773 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Tue, 7 Oct 2025 15:07:36 -0700 Subject: [PATCH 05/19] update doc Signed-off-by: Kai Huang --- docs/user/ppl/cmd/replace.rst | 39 +++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/docs/user/ppl/cmd/replace.rst b/docs/user/ppl/cmd/replace.rst index f2a193e54ec..dfd5c49e27c 100644 --- a/docs/user/ppl/cmd/replace.rst +++ b/docs/user/ppl/cmd/replace.rst @@ -11,29 +11,28 @@ replace Description ============ -| Using ``replace`` command to replace text in one or more fields in the search result. -* The command creates new fields with *new_* prefix for replaced content (e.g., replacing text in 'country' creates 'new_country') -* If a field with *new_* prefix already exists (e.g., 'new_country'), a number will be appended to create a unique field name (e.g., 'new_country0') +Using ``replace`` command to replace text in one or more fields in the search result. - -Version -======= -3.3.0 +Note: This command is only available when Calcite engine is enabled. Syntax ============ replace '' WITH '' IN [, ]... -Note: This command is only available when Calcite engine is enabled. -* pattern: mandatory. The text pattern you want to replace. Currently supports only plain text literals (no wildcards or regular expressions). -* replacement: mandatory. The text you want to replace with. -* field list: mandatory. One or more field names where the replacement should occur. +Parameters +========== +* **pattern**: mandatory. The text pattern you want to replace. Currently supports only plain text literals (no wildcards or regular expressions). +* **replacement**: mandatory. The text you want to replace with. +* **field-name**: mandatory. One or more field names where the replacement should occur. +Examples +======== + Example 1: Replace text in one field -==================================== +------------------------------------ The example shows replacing text in one field. @@ -52,7 +51,7 @@ PPL query:: Example 2: Replace text in multiple fields -========================================== +------------------------------------ The example shows replacing text in multiple fields. @@ -71,7 +70,7 @@ PPL query:: Example 3: Replace with IN clause and other commands -==================================================== +------------------------------------ The example shows using replace with other commands. @@ -88,7 +87,7 @@ PPL query:: +-------+-----+-----------+ Example 4: Pattern matching with LIKE and replace -================================================= +------------------------------------ Since replace command only supports plain string literals, you can use LIKE command with replace for pattern matching needs. @@ -102,6 +101,10 @@ PPL query:: | 880 Holmes Lane | IL | M | 32 | Brogan | 880 HOLMES Lane | +-----------------+-------+--------+-----+--------+-----------------+ -Note -==== -* For each field specified in the IN clause, a new field is created with prefix *new_* containing the replaced text. The original fields remain unchanged. \ No newline at end of file + +Limitations +=========== +* Only supports plain text literals for pattern matching. Wildcards and regular expressions are not supported. +* Pattern and replacement values must be string literals. +* For each field specified in the IN clause, a new field is created with prefix *new_* containing the replaced text. The original fields remain unchanged. +* If a field with *new_* prefix already exists (e.g., 'new_country'), a number will be appended to create a unique field name (e.g., 'new_country0'). \ No newline at end of file From f92545adc8314ee2ccde4d5b2cfb6d84ef23d10b Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Wed, 8 Oct 2025 09:24:02 -0700 Subject: [PATCH 06/19] add nullable Signed-off-by: Kai Huang --- core/src/main/java/org/opensearch/sql/ast/tree/Replace.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Replace.java b/core/src/main/java/org/opensearch/sql/ast/tree/Replace.java index 0bc43854078..b329c9a853c 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/Replace.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Replace.java @@ -14,6 +14,7 @@ import lombok.Getter; import lombok.Setter; import lombok.ToString; +import org.jetbrains.annotations.Nullable; import org.opensearch.sql.ast.AbstractNodeVisitor; import org.opensearch.sql.ast.expression.DataType; import org.opensearch.sql.ast.expression.Field; @@ -28,7 +29,7 @@ public class Replace extends UnresolvedPlan { private final UnresolvedExpression pattern; private final UnresolvedExpression replacement; private final List fieldList; - private UnresolvedPlan child; + @Nullable private UnresolvedPlan child; public Replace( UnresolvedExpression pattern, UnresolvedExpression replacement, List fieldList) { From a2df2fc2a0a8dedf067dd836ca00b4ffc3fdb468 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Wed, 8 Oct 2025 09:43:29 -0700 Subject: [PATCH 07/19] fixes Signed-off-by: Kai Huang --- .../org/opensearch/sql/ast/tree/Replace.java | 18 ++-------- .../opensearch/sql/ppl/parser/AstBuilder.java | 34 ++++++++++++++----- .../ppl/calcite/CalcitePPLReplaceTest.java | 25 +++++++------- 3 files changed, 40 insertions(+), 37 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Replace.java b/core/src/main/java/org/opensearch/sql/ast/tree/Replace.java index b329c9a853c..195b7774ad1 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/Replace.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Replace.java @@ -6,10 +6,8 @@ package org.opensearch.sql.ast.tree; import com.google.common.collect.ImmutableList; -import java.util.HashSet; import java.util.List; import java.util.Set; -import java.util.stream.Collectors; import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.Setter; @@ -28,11 +26,11 @@ public class Replace extends UnresolvedPlan { private final UnresolvedExpression pattern; private final UnresolvedExpression replacement; - private final List fieldList; + private final Set fieldList; @Nullable private UnresolvedPlan child; public Replace( - UnresolvedExpression pattern, UnresolvedExpression replacement, List fieldList) { + UnresolvedExpression pattern, UnresolvedExpression replacement, Set fieldList) { this.pattern = pattern; this.replacement = replacement; this.fieldList = fieldList; @@ -62,18 +60,6 @@ public void validate() { throw new IllegalArgumentException( "Field list cannot be empty in Replace command. Use IN clause to specify the field."); } - - Set uniqueFields = new HashSet<>(); - List duplicates = - fieldList.stream() - .map(field -> field.getField().toString()) - .filter(fieldName -> !uniqueFields.add(fieldName)) - .collect(Collectors.toList()); - - if (!duplicates.isEmpty()) { - throw new IllegalArgumentException( - String.format("Duplicate fields [%s] in Replace command", String.join(", ", duplicates))); - } } @Override diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index 8b530772741..c8c0891b278 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -388,19 +388,11 @@ public UnresolvedPlan visitReplaceCommand(OpenSearchPPLParser.ReplaceCommandCont UnresolvedExpression pattern = internalVisitExpression(ctx.pattern); UnresolvedExpression replacement = internalVisitExpression(ctx.replacement); - List fieldList = - ctx.fieldList().fieldExpression().stream() - .map(field -> (Field) internalVisitExpression(field)) - .collect(Collectors.toList()); + Set fieldList = getUniqueFieldSet(ctx.fieldList()); return new Replace(pattern, replacement, fieldList); } - private String removeQuotes(String text) { - // Remove both single and double quotes - return text.replaceAll("^[\"']|[\"']$", ""); - } - /** Stats command. */ @Override public UnresolvedPlan visitStatsCommand(StatsCommandContext ctx) { @@ -689,6 +681,30 @@ private List getFieldList(FieldListContext ctx) { .collect(Collectors.toList()); } + private Set getUniqueFieldSet(FieldListContext ctx) { + List fields = + ctx.fieldExpression().stream() + .map(field -> (Field) internalVisitExpression(field)) + .toList(); + + Set uniqueFields = new java.util.LinkedHashSet<>(fields); + + if (uniqueFields.size() < fields.size()) { + // Find duplicates for error message + Set seen = new HashSet<>(); + Set duplicates = + fields.stream() + .map(f -> f.getField().toString()) + .filter(name -> !seen.add(name)) + .collect(Collectors.toSet()); + + throw new IllegalArgumentException( + String.format("Duplicate fields [%s] in Replace command", String.join(", ", duplicates))); + } + + return uniqueFields; + } + /** Rare command. */ @Override public UnresolvedPlan visitRareCommand(OpenSearchPPLParser.RareCommandContext ctx) { diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReplaceTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReplaceTest.java index f3f7ae9da95..a1406fe9da1 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReplaceTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReplaceTest.java @@ -12,6 +12,7 @@ import org.opensearch.sql.ast.expression.DataType; import org.opensearch.sql.ast.expression.Literal; import org.opensearch.sql.ast.tree.Replace; +import org.opensearch.sql.common.antlr.SyntaxCheckException; public class CalcitePPLReplaceTest extends CalcitePPLAbstractTest { @@ -145,43 +146,43 @@ public void testReplaceWithPipeline() { verifyPPLToSparkSQL(root, expectedSparkSql); } - @Test(expected = Exception.class) + @Test(expected = SyntaxCheckException.class) public void testReplaceWithoutWithKeywordShouldFail() { String ppl = "source=EMP | replace \"CLERK\" \"EMPLOYEE\" IN JOB"; getRelNode(ppl); } - @Test(expected = Exception.class) + @Test(expected = SyntaxCheckException.class) public void testReplaceWithoutInKeywordShouldFail() { String ppl = "source=EMP | replace \"CLERK\" WITH \"EMPLOYEE\" JOB"; getRelNode(ppl); } - @Test(expected = RuntimeException.class) + @Test(expected = SyntaxCheckException.class) public void testReplaceWithExpressionShouldFail() { String ppl = "source=EMP | replace EMPNO + 1 WITH \"EMPLOYEE\" IN JOB"; getRelNode(ppl); } - @Test(expected = Exception.class) + @Test(expected = IllegalArgumentException.class) public void testReplaceWithInvalidFieldShouldFail() { String ppl = "source=EMP | replace \"CLERK\" WITH \"EMPLOYEE\" IN INVALID_FIELD"; getRelNode(ppl); } - @Test(expected = Exception.class) + @Test(expected = IllegalArgumentException.class) public void testReplaceWithMultipleInKeywordsShouldFail() { String ppl = "source=EMP | replace \"CLERK\" WITH \"EMPLOYEE\" IN JOB IN ENAME"; getRelNode(ppl); } - @Test(expected = Exception.class) + @Test(expected = SyntaxCheckException.class) public void testReplaceWithMissingQuotesShouldFail() { String ppl = "source=EMP | replace CLERK WITH EMPLOYEE IN JOB"; getRelNode(ppl); } - @Test(expected = Exception.class) + @Test(expected = SyntaxCheckException.class) public void testReplaceWithMissingReplacementValueShouldFail() { String ppl = "source=EMP | replace \"CLERK\" WITH IN JOB"; getRelNode(ppl); @@ -190,14 +191,14 @@ public void testReplaceWithMissingReplacementValueShouldFail() { @Test(expected = IllegalArgumentException.class) public void testReplaceWithNullPatternShouldFail() { Replace replace = - new Replace(null, new Literal("EMPLOYEE", DataType.STRING), Collections.emptyList()); + new Replace(null, new Literal("EMPLOYEE", DataType.STRING), Collections.emptySet()); replace.validate(); } @Test(expected = IllegalArgumentException.class) public void testReplaceWithNullReplacementShouldFail() { Replace replace = - new Replace(new Literal("CLERK", DataType.STRING), null, Collections.emptyList()); + new Replace(new Literal("CLERK", DataType.STRING), null, Collections.emptySet()); replace.validate(); } @@ -207,7 +208,7 @@ public void testReplaceWithNonStringPatternShouldFail() { new Replace( new Literal(123, DataType.INTEGER), new Literal("EMPLOYEE", DataType.STRING), - Collections.emptyList()); + Collections.emptySet()); replace.validate(); } @@ -217,7 +218,7 @@ public void testReplaceWithNonStringReplacementShouldFail() { new Replace( new Literal("CLERK", DataType.STRING), new Literal(456, DataType.INTEGER), - Collections.emptyList()); + Collections.emptySet()); replace.validate(); } @@ -235,7 +236,7 @@ public void testReplaceWithEmptyFieldListShouldFail() { new Replace( new Literal("CLERK", DataType.STRING), new Literal("EMPLOYEE", DataType.STRING), - Collections.emptyList()); + Collections.emptySet()); replace.validate(); } From af95e74c8a89615187f0ad81b27d7a9f9db2338a Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Thu, 9 Oct 2025 11:03:25 -0700 Subject: [PATCH 08/19] fix Signed-off-by: Kai Huang --- docs/category.json | 33 +-------------------------------- 1 file changed, 1 insertion(+), 32 deletions(-) diff --git a/docs/category.json b/docs/category.json index 33c3301cbe0..67c6b901000 100644 --- a/docs/category.json +++ b/docs/category.json @@ -2,7 +2,6 @@ "bash": [ "user/ppl/interfaces/endpoint.rst", "user/ppl/interfaces/protocol.rst", - "user/ppl/admin/settings.rst", "user/optimization/optimization.rst", "user/admin/settings.rst" ], @@ -66,38 +65,8 @@ "user/ppl/functions/string.rst", "user/ppl/general/datatypes.rst", "user/ppl/general/identifiers.rst" - "user/ppl/functions/string.rst" - ], - "sql_cli": [ - "user/dql/expressions.rst", - "user/general/comments.rst", - "user/general/datatypes.rst", - "user/general/identifiers.rst", - "user/general/values.rst", - "user/dql/basics.rst", - "user/dql/functions.rst", - "user/dql/window.rst", - "user/beyond/partiql.rst", - "user/dql/aggregations.rst", - "user/dql/complex.rst", - "user/dql/metadata.rst" - ], - "ppl_cli_calcite": [ - "user/ppl/cmd/append.rst", - "user/ppl/cmd/eventstats.rst", - "user/ppl/cmd/fields.rst", - "user/ppl/cmd/regex.rst", - "user/ppl/cmd/rename.rst", - "user/ppl/cmd/rex.rst", - "user/ppl/cmd/stats.rst", - "user/ppl/cmd/timechart.rst", - "user/ppl/cmd/search.rst", - "user/ppl/cmd/replace.rst" - "user/ppl/functions/string.rst", - "user/ppl/general/datatypes.rst", - "user/ppl/general/identifiers.rst" ], "bash_settings": [ "user/ppl/admin/settings.rst" ] -} +} \ No newline at end of file From 638ae3445dad5dcdfd1bbde77ee48bc97e1de261 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Thu, 9 Oct 2025 13:44:01 -0700 Subject: [PATCH 09/19] change new_ handling to in-place replacement Signed-off-by: Kai Huang --- .../sql/calcite/CalciteRelNodeVisitor.java | 43 +++++++++----- .../remote/CalciteReplaceCommandIT.java | 59 ++++++++----------- 2 files changed, 53 insertions(+), 49 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index a74db231075..e00c063db5a 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -2421,26 +2421,37 @@ public RelNode visitReplace(Replace node, CalcitePlanContext context) { RexNode patternNode = rexVisitor.analyze(node.getPattern(), context); RexNode replacementNode = rexVisitor.analyze(node.getReplacement(), context); + // Create a set of field names to replace for quick lookup + Set fieldsToReplace = + node.getFieldList().stream().map(f -> f.getField().toString()).collect(Collectors.toSet()); + + // Validate that all fields to replace exist in the current schema + Set availableFields = new HashSet<>(fieldNames); + for (String fieldToReplace : fieldsToReplace) { + if (!availableFields.contains(fieldToReplace)) { + throw new IllegalArgumentException( + String.format( + "field [%s] not found; input fields are: %s", fieldToReplace, fieldNames)); + } + } + List projectList = new ArrayList<>(); List newFieldNames = new ArrayList<>(); - // First add all original fields + // Project all fields, replacing specified ones in-place for (String fieldName : fieldNames) { - RexNode fieldRef = context.relBuilder.field(fieldName); - projectList.add(fieldRef); - newFieldNames.add(fieldName); - } - - // Then add new fields with replaced content using new_ prefix - for (Field field : node.getFieldList()) { - String fieldName = field.getField().toString(); - RexNode fieldRef = context.relBuilder.field(fieldName); - - RexNode replaceCall = - context.relBuilder.call( - SqlStdOperatorTable.REPLACE, fieldRef, patternNode, replacementNode); - projectList.add(replaceCall); - newFieldNames.add(NEW_FIELD_PREFIX + fieldName); + if (fieldsToReplace.contains(fieldName)) { + // Replace this field in-place + RexNode fieldRef = context.relBuilder.field(fieldName); + RexNode replaceCall = + context.relBuilder.call( + SqlStdOperatorTable.REPLACE, fieldRef, patternNode, replacementNode); + projectList.add(replaceCall); + } else { + // Keep original field unchanged + projectList.add(context.relBuilder.field(fieldName)); + } + newFieldNames.add(fieldName); // Same field name (in-place replacement) } context.relBuilder.project(projectList, newFieldNames); diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReplaceCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReplaceCommandIT.java index 34eb31d97d8..ed07166a9b1 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReplaceCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReplaceCommandIT.java @@ -29,11 +29,11 @@ public void testReplaceWithFields() throws IOException { executeQuery( String.format( "source = %s | replace 'USA' WITH 'United States' IN country | fields name, age," - + " new_country", + + " country", TEST_INDEX_STATE_COUNTRY)); verifySchema( - result, schema("name", "string"), schema("age", "int"), schema("new_country", "string")); + result, schema("name", "string"), schema("age", "int"), schema("country", "string")); verifyDataRows( result, @@ -55,20 +55,18 @@ public void testMultipleReplace() throws IOException { verifySchema( result, schema("name", "string"), - schema("age", "int"), - schema("state", "string"), schema("country", "string"), - schema("year", "int"), + schema("state", "string"), schema("month", "int"), - schema("new_name", "string"), - schema("new_country", "string")); + schema("year", "int"), + schema("age", "int")); verifyDataRows( result, - rows("Jake", "USA", "California", 4, 2023, 70, "United States", "Jake"), - rows("Hello", "USA", "New York", 4, 2023, 30, "United States", "Hello"), - rows("John", "Canada", "Ontario", 4, 2023, 25, "Canada", "John"), - rows("Jane", "Canada", "Quebec", 4, 2023, 20, "Canada", "Joseph")); + rows("Jake", "United States", "California", 4, 2023, 70), + rows("Hello", "United States", "New York", 4, 2023, 30), + rows("John", "Canada", "Ontario", 4, 2023, 25), + rows("Joseph", "Canada", "Quebec", 4, 2023, 20)); } @Test @@ -76,7 +74,7 @@ public void testReplaceWithSort() throws IOException { JSONObject result = executeQuery( String.format( - "source = %s | replace 'US' WITH 'United States' IN country | sort new_country", + "source = %s | replace 'US' WITH 'United States' IN country | sort country", TEST_INDEX_STATE_COUNTRY)); verifySchema( @@ -86,8 +84,7 @@ public void testReplaceWithSort() throws IOException { schema("state", "string"), schema("country", "string"), schema("year", "int"), - schema("month", "int"), - schema("new_country", "string")); + schema("month", "int")); } @Test @@ -105,8 +102,7 @@ public void testReplaceWithWhereClause() throws IOException { schema("state", "string"), schema("country", "string"), schema("year", "int"), - schema("month", "int"), - schema("new_country", "string")); + schema("month", "int")); } @Test @@ -119,19 +115,18 @@ public void testEmptyStringReplacement() throws IOException { verifySchema( result, schema("name", "string"), - schema("age", "int"), - schema("state", "string"), schema("country", "string"), - schema("year", "int"), + schema("state", "string"), schema("month", "int"), - schema("new_country", "string")); + schema("year", "int"), + schema("age", "int")); verifyDataRows( result, - rows("Jake", "USA", "California", 4, 2023, 70, ""), - rows("Hello", "USA", "New York", 4, 2023, 30, ""), - rows("John", "Canada", "Ontario", 4, 2023, 25, "Canada"), - rows("Jane", "Canada", "Quebec", 4, 2023, 20, "Canada")); + rows("Jake", "", "California", 4, 2023, 70), + rows("Hello", "", "New York", 4, 2023, 30), + rows("John", "Canada", "Ontario", 4, 2023, 25), + rows("Jane", "Canada", "Quebec", 4, 2023, 20)); } @Test @@ -145,20 +140,18 @@ public void testMultipleFieldsInClause() throws IOException { verifySchema( result, schema("name", "string"), - schema("age", "int"), - schema("state", "string"), schema("country", "string"), - schema("year", "int"), + schema("state", "string"), schema("month", "int"), - schema("new_state", "string"), - schema("new_country", "string")); + schema("year", "int"), + schema("age", "int")); verifyDataRows( result, - rows("Jake", "USA", "California", 4, 2023, 70, "United States", "California"), - rows("Hello", "USA", "New York", 4, 2023, 30, "United States", "New York"), - rows("John", "Canada", "Ontario", 4, 2023, 25, "Canada", "Ontario"), - rows("Jane", "Canada", "Quebec", 4, 2023, 20, "Canada", "Quebec")); + rows("Jake", "United States", "California", 4, 2023, 70), + rows("Hello", "United States", "New York", 4, 2023, 30), + rows("John", "Canada", "Ontario", 4, 2023, 25), + rows("Jane", "Canada", "Quebec", 4, 2023, 20)); } @Test From d099aed1d2d788c8a5c03bbdfb76ab0ddc001b53 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Thu, 9 Oct 2025 13:56:41 -0700 Subject: [PATCH 10/19] update doctest Signed-off-by: Kai Huang --- docs/user/ppl/cmd/replace.rst | 67 +++++++++++++++++------------------ 1 file changed, 33 insertions(+), 34 deletions(-) diff --git a/docs/user/ppl/cmd/replace.rst b/docs/user/ppl/cmd/replace.rst index dfd5c49e27c..b1cd960164d 100644 --- a/docs/user/ppl/cmd/replace.rst +++ b/docs/user/ppl/cmd/replace.rst @@ -38,16 +38,16 @@ The example shows replacing text in one field. PPL query:: - os> source=accounts | replace "IL" WITH "Illinois" IN state | fields state, new_state; + os> source=accounts | replace "IL" WITH "Illinois" IN state | fields state; fetched rows / total rows = 4/4 - +-------+-----------+ - | state | new_state | - |-------+-----------| - | IL | Illinois | - | TN | TN | - | VA | VA | - | MD | MD | - +-------+-----------+ + +----------+ + | state | + |----------| + | Illinois | + | TN | + | VA | + | MD | + +----------+ Example 2: Replace text in multiple fields @@ -57,16 +57,16 @@ The example shows replacing text in multiple fields. PPL query:: - os> source=accounts | replace "IL" WITH "Illinois" IN state, address | fields state, address, new_state, new_address; + os> source=accounts | replace "IL" WITH "Illinois" IN state, address | fields state, address; fetched rows / total rows = 4/4 - +-------+----------------------+-----------+----------------------+ - | state | address | new_state | new_address | - |-------+----------------------+-----------+----------------------| - | IL | 880 Holmes Lane | Illinois | 880 Holmes Lane | - | TN | 671 Bristol Street | TN | 671 Bristol Street | - | VA | 789 Madison Street | VA | 789 Madison Street | - | MD | 467 Hutchinson Court | MD | 467 Hutchinson Court | - +-------+----------------------+-----------+----------------------+ + +----------+----------------------+ + | state | address | + |----------+----------------------| + | Illinois | 880 Holmes Lane | + | TN | 671 Bristol Street | + | VA | 789 Madison Street | + | MD | 467 Hutchinson Court | + +----------+----------------------+ Example 3: Replace with IN clause and other commands @@ -76,15 +76,15 @@ The example shows using replace with other commands. PPL query:: - os> source=accounts | replace "IL" WITH "Illinois" IN state | where age > 30 | fields state, age, new_state; + os> source=accounts | replace "IL" WITH "Illinois" IN state | where age > 30 | fields state, age; fetched rows / total rows = 3/3 - +-------+-----+-----------+ - | state | age | new_state | - |-------+-----+-----------| - | IL | 32 | Illinois | - | TN | 36 | TN | - | MD | 33 | MD | - +-------+-----+-----------+ + +----------+-----+ + | state | age | + |----------+-----| + | Illinois | 32 | + | TN | 36 | + | MD | 33 | + +----------+-----+ Example 4: Pattern matching with LIKE and replace ------------------------------------ @@ -93,18 +93,17 @@ Since replace command only supports plain string literals, you can use LIKE comm PPL query:: - os> source=accounts | where LIKE(address, '%Holmes%') | replace "Holmes" WITH "HOLMES" IN address | fields address, state, gender, age, city, new_address; + os> source=accounts | where LIKE(address, '%Holmes%') | replace "Holmes" WITH "HOLMES" IN address | fields address, state, gender, age, city; fetched rows / total rows = 1/1 - +-----------------+-------+--------+-----+--------+-----------------+ - | address | state | gender | age | city | new_address | - |-----------------+-------+--------+-----+--------+-----------------| - | 880 Holmes Lane | IL | M | 32 | Brogan | 880 HOLMES Lane | - +-----------------+-------+--------+-----+--------+-----------------+ + +-----------------+-------+--------+-----+--------+ + | address | state | gender | age | city | + |-----------------+-------+--------+-----+--------| + | 880 HOLMES Lane | IL | M | 32 | Brogan | + +-----------------+-------+--------+-----+--------+ Limitations =========== * Only supports plain text literals for pattern matching. Wildcards and regular expressions are not supported. * Pattern and replacement values must be string literals. -* For each field specified in the IN clause, a new field is created with prefix *new_* containing the replaced text. The original fields remain unchanged. -* If a field with *new_* prefix already exists (e.g., 'new_country'), a number will be appended to create a unique field name (e.g., 'new_country0'). \ No newline at end of file +* The replace command modifies the specified fields in-place. \ No newline at end of file From 72defe9541d6815c40239fa582933a31a28afa65 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Thu, 9 Oct 2025 14:05:54 -0700 Subject: [PATCH 11/19] update explain Signed-off-by: Kai Huang --- .../org/opensearch/sql/calcite/remote/CalciteExplainIT.java | 2 +- .../expectedOutput/calcite/explain_replace_command.yaml | 4 ++-- .../calcite_no_pushdown/explain_replace_command.yaml | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index b69c9224d83..86f585e4547 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -1142,7 +1142,7 @@ public void testReplaceCommandExplain() throws IOException { expected, explainQueryToString( String.format( - "source=%s | replace 'IL' WITH 'Illinois' IN state | fields state, new_state", + "source=%s | replace 'IL' WITH 'Illinois' IN state | fields state", TEST_INDEX_ACCOUNT))); } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_replace_command.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_replace_command.yaml index 85737d90c15..a867c569168 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_replace_command.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_replace_command.yaml @@ -1,8 +1,8 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalProject(state=[$7], new_state=[REPLACE($7, 'IL':VARCHAR, 'Illinois':VARCHAR)]) + LogicalProject(state=[REPLACE($7, 'IL':VARCHAR, 'Illinois':VARCHAR)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - EnumerableCalc(expr#0=[{inputs}], expr#1=['IL':VARCHAR], expr#2=['Illinois':VARCHAR], expr#3=[REPLACE($t0, $t1, $t2)], state=[$t0], $f1=[$t3]) + EnumerableCalc(expr#0=[{inputs}], expr#1=['IL':VARCHAR], expr#2=['Illinois':VARCHAR], expr#3=[REPLACE($t0, $t1, $t2)], $f0=[$t3]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["state"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_replace_command.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_replace_command.yaml index 6a40f67465d..bbebbf37f80 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_replace_command.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_replace_command.yaml @@ -1,9 +1,9 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalProject(state=[$7], new_state=[REPLACE($7, 'IL':VARCHAR, 'Illinois':VARCHAR)]) + LogicalProject(state=[REPLACE($7, 'IL':VARCHAR, 'Illinois':VARCHAR)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | EnumerableLimit(fetch=[10000]) - EnumerableCalc(expr#0..16=[{inputs}], expr#17=['IL':VARCHAR], expr#18=['Illinois':VARCHAR], expr#19=[REPLACE($t7, $t17, $t18)], state=[$t7], new_state=[$t19]) + EnumerableCalc(expr#0..16=[{inputs}], expr#17=['IL':VARCHAR], expr#18=['Illinois':VARCHAR], expr#19=[REPLACE($t7, $t17, $t18)], state=[$t19]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) \ No newline at end of file From 9d1a2fcc5d28bff044d2f40c4cf0cf87ca639932 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Thu, 9 Oct 2025 14:29:35 -0700 Subject: [PATCH 12/19] update test Signed-off-by: Kai Huang --- .../ppl/calcite/CalcitePPLReplaceTest.java | 96 +++++++++---------- 1 file changed, 47 insertions(+), 49 deletions(-) diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReplaceTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReplaceTest.java index a1406fe9da1..d7841001e0b 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReplaceTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReplaceTest.java @@ -26,49 +26,49 @@ public void testBasicReplace() { RelNode root = getRelNode(ppl); String expectedLogical = - "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," - + " COMM=[$6], DEPTNO=[$7], new_JOB=[REPLACE($2, 'CLERK':VARCHAR," - + " 'EMPLOYEE':VARCHAR)])\n" + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[REPLACE($2, 'CLERK':VARCHAR," + + " 'EMPLOYEE':VARCHAR)], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, " - + "REPLACE(`JOB`, 'CLERK', 'EMPLOYEE') `new_JOB`\n" + "SELECT `EMPNO`, `ENAME`, REPLACE(`JOB`, 'CLERK', 'EMPLOYEE') `JOB`, `MGR`, `HIREDATE`," + + " `SAL`, `COMM`, `DEPTNO`\n" + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); String expectedResult = - "EMPNO=7369; ENAME=SMITH; JOB=CLERK; MGR=7902; HIREDATE=1980-12-17; SAL=800.00; COMM=null;" - + " DEPTNO=20; new_JOB=EMPLOYEE\n" + "EMPNO=7369; ENAME=SMITH; JOB=EMPLOYEE; MGR=7902; HIREDATE=1980-12-17; SAL=800.00;" + + " COMM=null; DEPTNO=20\n" + "EMPNO=7499; ENAME=ALLEN; JOB=SALESMAN; MGR=7698; HIREDATE=1981-02-20; SAL=1600.00;" - + " COMM=300.00; DEPTNO=30; new_JOB=SALESMAN\n" + + " COMM=300.00; DEPTNO=30\n" + "EMPNO=7521; ENAME=WARD; JOB=SALESMAN; MGR=7698; HIREDATE=1981-02-22; SAL=1250.00;" - + " COMM=500.00; DEPTNO=30; new_JOB=SALESMAN\n" + + " COMM=500.00; DEPTNO=30\n" + "EMPNO=7566; ENAME=JONES; JOB=MANAGER; MGR=7839; HIREDATE=1981-02-04; SAL=2975.00;" - + " COMM=null; DEPTNO=20; new_JOB=MANAGER\n" + + " COMM=null; DEPTNO=20\n" + "EMPNO=7654; ENAME=MARTIN; JOB=SALESMAN; MGR=7698; HIREDATE=1981-09-28; SAL=1250.00;" - + " COMM=1400.00; DEPTNO=30; new_JOB=SALESMAN\n" + + " COMM=1400.00; DEPTNO=30\n" + "EMPNO=7698; ENAME=BLAKE; JOB=MANAGER; MGR=7839; HIREDATE=1981-01-05; SAL=2850.00;" - + " COMM=null; DEPTNO=30; new_JOB=MANAGER\n" + + " COMM=null; DEPTNO=30\n" + "EMPNO=7782; ENAME=CLARK; JOB=MANAGER; MGR=7839; HIREDATE=1981-06-09; SAL=2450.00;" - + " COMM=null; DEPTNO=10; new_JOB=MANAGER\n" + + " COMM=null; DEPTNO=10\n" + "EMPNO=7788; ENAME=SCOTT; JOB=ANALYST; MGR=7566; HIREDATE=1987-04-19; SAL=3000.00;" - + " COMM=null; DEPTNO=20; new_JOB=ANALYST\n" + + " COMM=null; DEPTNO=20\n" + "EMPNO=7839; ENAME=KING; JOB=PRESIDENT; MGR=null; HIREDATE=1981-11-17; SAL=5000.00;" - + " COMM=null; DEPTNO=10; new_JOB=PRESIDENT\n" + + " COMM=null; DEPTNO=10\n" + "EMPNO=7844; ENAME=TURNER; JOB=SALESMAN; MGR=7698; HIREDATE=1981-09-08; SAL=1500.00;" - + " COMM=0.00; DEPTNO=30; new_JOB=SALESMAN\n" - + "EMPNO=7876; ENAME=ADAMS; JOB=CLERK; MGR=7788; HIREDATE=1987-05-23; SAL=1100.00;" - + " COMM=null; DEPTNO=20; new_JOB=EMPLOYEE\n" - + "EMPNO=7900; ENAME=JAMES; JOB=CLERK; MGR=7698; HIREDATE=1981-12-03; SAL=950.00;" - + " COMM=null; DEPTNO=30; new_JOB=EMPLOYEE\n" + + " COMM=0.00; DEPTNO=30\n" + + "EMPNO=7876; ENAME=ADAMS; JOB=EMPLOYEE; MGR=7788; HIREDATE=1987-05-23; SAL=1100.00;" + + " COMM=null; DEPTNO=20\n" + + "EMPNO=7900; ENAME=JAMES; JOB=EMPLOYEE; MGR=7698; HIREDATE=1981-12-03; SAL=950.00;" + + " COMM=null; DEPTNO=30\n" + "EMPNO=7902; ENAME=FORD; JOB=ANALYST; MGR=7566; HIREDATE=1981-12-03; SAL=3000.00;" - + " COMM=null; DEPTNO=20; new_JOB=ANALYST\n" - + "EMPNO=7934; ENAME=MILLER; JOB=CLERK; MGR=7782; HIREDATE=1982-01-23; SAL=1300.00;" - + " COMM=null; DEPTNO=10; new_JOB=EMPLOYEE\n"; + + " COMM=null; DEPTNO=20\n" + + "EMPNO=7934; ENAME=MILLER; JOB=EMPLOYEE; MGR=7782; HIREDATE=1982-01-23; SAL=1300.00;" + + " COMM=null; DEPTNO=10\n"; verifyResult(root, expectedResult); } @@ -81,17 +81,16 @@ public void testMultipleFieldsReplace() { RelNode root = getRelNode(ppl); String expectedLogical = - "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," - + " COMM=[$6], DEPTNO=[$7], new_JOB=[REPLACE($2, 'CLERK':VARCHAR, 'EMPLOYEE':VARCHAR)]," - + " new_DEPTNO=[REPLACE($7, '20':VARCHAR, 'RESEARCH':VARCHAR)])\n" + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[REPLACE($2, 'CLERK':VARCHAR," + + " 'EMPLOYEE':VARCHAR)], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6]," + + " DEPTNO=[REPLACE($7, '20':VARCHAR, 'RESEARCH':VARCHAR)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, " - + "REPLACE(`JOB`, 'CLERK', 'EMPLOYEE') `new_JOB`, " - + "REPLACE(`DEPTNO`, '20', 'RESEARCH') `new_DEPTNO`\n" + "SELECT `EMPNO`, `ENAME`, REPLACE(`JOB`, 'CLERK', 'EMPLOYEE') `JOB`, `MGR`, `HIREDATE`," + + " `SAL`, `COMM`, REPLACE(`DEPTNO`, '20', 'RESEARCH') `DEPTNO`\n" + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); @@ -104,17 +103,16 @@ public void testReplaceSameValueInMultipleFields() { RelNode root = getRelNode(ppl); String expectedLogical = - "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," - + " COMM=[$6], DEPTNO=[$7], new_MGR=[REPLACE($3, '7839':VARCHAR, 'CEO':VARCHAR)]," - + " new_EMPNO=[REPLACE($0, '7839':VARCHAR, 'CEO':VARCHAR)])\n" + "LogicalProject(EMPNO=[REPLACE($0, '7839':VARCHAR, 'CEO':VARCHAR)], ENAME=[$1], JOB=[$2]," + + " MGR=[REPLACE($3, '7839':VARCHAR, 'CEO':VARCHAR)], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, " - + "REPLACE(`MGR`, '7839', 'CEO') `new_MGR`, " - + "REPLACE(`EMPNO`, '7839', 'CEO') `new_EMPNO`\n" + "SELECT REPLACE(`EMPNO`, '7839', 'CEO') `EMPNO`, `ENAME`, `JOB`," + + " REPLACE(`MGR`, '7839', 'CEO') `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`\n" + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); @@ -128,17 +126,17 @@ public void testReplaceWithPipeline() { String expectedLogical = "LogicalSort(sort0=[$5], dir0=[ASC-nulls-first])\n" - + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," - + " SAL=[$5], COMM=[$6], DEPTNO=[$7], new_JOB=[REPLACE($2, 'CLERK':VARCHAR," - + " 'EMPLOYEE':VARCHAR)])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[REPLACE($2, 'CLERK':VARCHAR," + + " 'EMPLOYEE':VARCHAR)], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7])\n" + " LogicalFilter(condition=[=($2, 'CLERK':VARCHAR)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, " - + "REPLACE(`JOB`, 'CLERK', 'EMPLOYEE') `new_JOB`\n" + "SELECT `EMPNO`, `ENAME`, REPLACE(`JOB`, 'CLERK', 'EMPLOYEE') `JOB`, `MGR`, `HIREDATE`," + + " `SAL`, `COMM`, `DEPTNO`\n" + "FROM `scott`.`EMP`\n" + "WHERE `JOB` = 'CLERK'\n" + "ORDER BY `SAL`"; @@ -241,25 +239,25 @@ public void testReplaceWithEmptyFieldListShouldFail() { } @Test - public void testReplaceWithExistingNewFieldConflict() { - // Test verifies that Calcite's RelBuilder automatically handles duplicate field names - // by appending numbers (e.g., new_JOB becomes new_JOB0 when new_JOB already exists) + public void testReplaceWithEvalAndReplaceOnSameField() { + // Test verifies that in-place replacement works correctly when there are additional fields + // created by eval. The eval creates new_JOB, and replace modifies JOB in-place. String ppl = "source=EMP | eval new_JOB = 'existing' | replace \"CLERK\" WITH \"EMPLOYEE\" IN JOB"; RelNode root = getRelNode(ppl); - // Verify that Calcite automatically deduplicates field names + // With in-place replacement, JOB is modified and new_JOB remains as created by eval String expectedLogical = - "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," - + " COMM=[$6], DEPTNO=[$7], new_JOB=['existing':VARCHAR], new_JOB0=[REPLACE($2," - + " 'CLERK':VARCHAR, 'EMPLOYEE':VARCHAR)])\n" + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[REPLACE($2, 'CLERK':VARCHAR," + + " 'EMPLOYEE':VARCHAR)], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7], new_JOB=['existing':VARCHAR])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, " - + "'existing' `new_JOB`, REPLACE(`JOB`, 'CLERK', 'EMPLOYEE') `new_JOB0`\n" + "SELECT `EMPNO`, `ENAME`, REPLACE(`JOB`, 'CLERK', 'EMPLOYEE') `JOB`, `MGR`, `HIREDATE`," + + " `SAL`, `COMM`, `DEPTNO`, 'existing' `new_JOB`\n" + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); From b6fe45224e1c4e659ffb0bcfc01942edb58d75cd Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Mon, 13 Oct 2025 13:28:13 -0700 Subject: [PATCH 13/19] fixes Signed-off-by: Kai Huang --- .../sql/calcite/CalciteRelNodeVisitor.java | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index e00c063db5a..d1402eb1980 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -158,7 +158,6 @@ public class CalciteRelNodeVisitor extends AbstractNodeVisitor f.getField().toString()).collect(Collectors.toSet()); // Validate that all fields to replace exist in the current schema - Set availableFields = new HashSet<>(fieldNames); - for (String fieldToReplace : fieldsToReplace) { - if (!availableFields.contains(fieldToReplace)) { - throw new IllegalArgumentException( - String.format( - "field [%s] not found; input fields are: %s", fieldToReplace, fieldNames)); - } - } + validateFieldsExist(fieldsToReplace, fieldNames); List projectList = new ArrayList<>(); - List newFieldNames = new ArrayList<>(); // Project all fields, replacing specified ones in-place for (String fieldName : fieldNames) { @@ -2451,13 +2442,22 @@ public RelNode visitReplace(Replace node, CalcitePlanContext context) { // Keep original field unchanged projectList.add(context.relBuilder.field(fieldName)); } - newFieldNames.add(fieldName); // Same field name (in-place replacement) } - context.relBuilder.project(projectList, newFieldNames); + context.relBuilder.project(projectList, fieldNames); return context.relBuilder.peek(); } + private void validateFieldsExist(Set fieldsToValidate, List availableFields) { + Set availableFieldsSet = new HashSet<>(availableFields); + for (String field : fieldsToValidate) { + if (!availableFieldsSet.contains(field)) { + throw new IllegalArgumentException( + String.format("field [%s] not found; input fields are: %s", field, availableFields)); + } + } + } + private void buildParseRelNode(Parse node, CalcitePlanContext context) { RexNode sourceField = rexVisitor.analyze(node.getSourceField(), context); ParseMethod parseMethod = node.getParseMethod(); From d5beab71f7f5f438e3893beb0b32a5a17eb9edb5 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Mon, 13 Oct 2025 15:25:19 -0700 Subject: [PATCH 14/19] doc update Signed-off-by: Kai Huang --- docs/user/ppl/functions/string.rst | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/docs/user/ppl/functions/string.rst b/docs/user/ppl/functions/string.rst index 24efa1434f5..eb82a06a055 100644 --- a/docs/user/ppl/functions/string.rst +++ b/docs/user/ppl/functions/string.rst @@ -215,6 +215,14 @@ Argument type: STRING, STRING (regex pattern), STRING (replacement) Return type: STRING +**Important - Regex Special Characters**: The pattern is interpreted as a regular expression. Characters like ``.``, ``*``, ``+``, ``[``, ``]``, ``(``, ``)``, ``{``, ``}``, ``^``, ``$``, ``|``, ``?``, and ``\`` have special meaning in regex. To match them literally, escape with backslashes: + +* To match ``example.com``: use ``'example\\.com'`` (escape the dots) +* To match ``value*``: use ``'value\\*'`` (escape the asterisk) +* To match ``price+tax``: use ``'price\\+tax'`` (escape the plus) + +For strings with many special characters, use ``\\Q...\\E`` to quote the entire literal string (e.g., ``'\\Qhttps://example.com/path?id=123\\E'`` matches that exact URL). + Literal String Replacement Examples:: os> source=people | eval `REPLACE('helloworld', 'world', 'universe')` = REPLACE('helloworld', 'world', 'universe'), `REPLACE('helloworld', 'invalid', 'universe')` = REPLACE('helloworld', 'invalid', 'universe') | fields `REPLACE('helloworld', 'world', 'universe')`, `REPLACE('helloworld', 'invalid', 'universe')` @@ -225,6 +233,16 @@ Literal String Replacement Examples:: | hellouniverse | helloworld | +--------------------------------------------+----------------------------------------------+ +Escaping Special Characters Examples:: + + os> source=people | eval `Replace domain` = REPLACE('api.example.com', 'example\\.com', 'newsite.org'), `Replace with quote` = REPLACE('https://api.example.com/v1', '\\Qhttps://api.example.com\\E', 'http://localhost:8080') | fields `Replace domain`, `Replace with quote` + fetched rows / total rows = 1/1 + +-----------------+--------------------------+ + | Replace domain | Replace with quote | + |-----------------+--------------------------| + | api.newsite.org | http://localhost:8080/v1 | + +-----------------+--------------------------+ + Regex Pattern Examples:: os> source=people | eval `Remove digits` = REPLACE('test123', '\\d+', ''), `Collapse spaces` = REPLACE('hello world', ' +', ' '), `Remove special` = REPLACE('hello@world!', '[^a-zA-Z]', '') | fields `Remove digits`, `Collapse spaces`, `Remove special` From 38eeb2f3c9ff8fea2cebdfef5a85217e4b7b95c1 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Mon, 13 Oct 2025 16:30:19 -0700 Subject: [PATCH 15/19] update comma support Signed-off-by: Kai Huang --- .../org/opensearch/sql/ast/tree/Replace.java | 92 +++++++++++--- .../opensearch/sql/ast/tree/ReplacePair.java | 22 ++++ .../sql/calcite/CalciteRelNodeVisitor.java | 20 +-- .../remote/CalciteReplaceCommandIT.java | 48 +++++++ ppl/src/main/antlr/OpenSearchPPLParser.g4 | 6 +- .../opensearch/sql/ppl/parser/AstBuilder.java | 15 ++- .../ppl/calcite/CalcitePPLReplaceTest.java | 119 ++++++++++++++++++ .../sql/ppl/parser/AstBuilderTest.java | 12 ++ 8 files changed, 307 insertions(+), 27 deletions(-) create mode 100644 core/src/main/java/org/opensearch/sql/ast/tree/ReplacePair.java diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Replace.java b/core/src/main/java/org/opensearch/sql/ast/tree/Replace.java index 195b7774ad1..897521b15cb 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/Replace.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Replace.java @@ -6,6 +6,7 @@ package org.opensearch.sql.ast.tree; import com.google.common.collect.ImmutableList; +import java.util.Collections; import java.util.List; import java.util.Set; import lombok.EqualsAndHashCode; @@ -24,36 +25,95 @@ @ToString @EqualsAndHashCode(callSuper = false) public class Replace extends UnresolvedPlan { - private final UnresolvedExpression pattern; - private final UnresolvedExpression replacement; + private final List replacePairs; private final Set fieldList; @Nullable private UnresolvedPlan child; + /** + * Constructor with multiple pattern/replacement pairs. + * + * @param replacePairs List of pattern/replacement pairs + * @param fieldList Set of fields to apply replacements to + */ + public Replace(List replacePairs, Set fieldList) { + this.replacePairs = replacePairs; + this.fieldList = fieldList; + validate(); + } + + /** + * Backward-compatible constructor with single pattern/replacement pair. + * + * @param pattern Pattern literal + * @param replacement Replacement literal + * @param fieldList Set of fields to apply replacement to + */ public Replace( UnresolvedExpression pattern, UnresolvedExpression replacement, Set fieldList) { - this.pattern = pattern; - this.replacement = replacement; + // Convert single pair to list for internal consistency + if (!(pattern instanceof Literal) || !(replacement instanceof Literal)) { + throw new IllegalArgumentException( + "Pattern and replacement must be literals in Replace command"); + } + this.replacePairs = + Collections.singletonList(new ReplacePair((Literal) pattern, (Literal) replacement)); this.fieldList = fieldList; validate(); } - public void validate() { - if (pattern == null) { - throw new IllegalArgumentException("Pattern expression cannot be null in Replace command"); + /** + * Get the pattern from the first replacement pair (for backward compatibility). + * + * @return Pattern expression + * @deprecated Use {@link #getReplacePairs()} instead + */ + @Deprecated + public UnresolvedExpression getPattern() { + if (replacePairs.isEmpty()) { + throw new IllegalStateException("No replacement pairs available"); } - if (replacement == null) { - throw new IllegalArgumentException( - "Replacement expression cannot be null in Replace command"); + return replacePairs.get(0).getPattern(); + } + + /** + * Get the replacement from the first replacement pair (for backward compatibility). + * + * @return Replacement expression + * @deprecated Use {@link #getReplacePairs()} instead + */ + @Deprecated + public UnresolvedExpression getReplacement() { + if (replacePairs.isEmpty()) { + throw new IllegalStateException("No replacement pairs available"); } + return replacePairs.get(0).getReplacement(); + } - // Validate pattern is a string literal - if (!(pattern instanceof Literal && ((Literal) pattern).getType() == DataType.STRING)) { - throw new IllegalArgumentException("Pattern must be a string literal in Replace command"); + public void validate() { + if (replacePairs == null || replacePairs.isEmpty()) { + throw new IllegalArgumentException( + "At least one pattern/replacement pair is required in Replace command"); } - // Validate replacement is a string literal - if (!(replacement instanceof Literal && ((Literal) replacement).getType() == DataType.STRING)) { - throw new IllegalArgumentException("Replacement must be a string literal in Replace command"); + // Validate each pair + for (ReplacePair pair : replacePairs) { + if (pair.getPattern() == null) { + throw new IllegalArgumentException("Pattern cannot be null in Replace command"); + } + if (pair.getReplacement() == null) { + throw new IllegalArgumentException("Replacement cannot be null in Replace command"); + } + + // Validate pattern is a string literal + if (pair.getPattern().getType() != DataType.STRING) { + throw new IllegalArgumentException("Pattern must be a string literal in Replace command"); + } + + // Validate replacement is a string literal + if (pair.getReplacement().getType() != DataType.STRING) { + throw new IllegalArgumentException( + "Replacement must be a string literal in Replace command"); + } } if (fieldList == null || fieldList.isEmpty()) { diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/ReplacePair.java b/core/src/main/java/org/opensearch/sql/ast/tree/ReplacePair.java new file mode 100644 index 00000000000..e3f3897fdf1 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/ast/tree/ReplacePair.java @@ -0,0 +1,22 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ast.tree; + +import lombok.AllArgsConstructor; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.ToString; +import org.opensearch.sql.ast.expression.Literal; + +/** A pair of pattern and replacement literals for the Replace command. */ +@Getter +@AllArgsConstructor +@EqualsAndHashCode +@ToString +public class ReplacePair { + private final Literal pattern; + private final Literal replacement; +} diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index d1402eb1980..e26f7e02aa8 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -122,6 +122,7 @@ import org.opensearch.sql.ast.tree.Relation; import org.opensearch.sql.ast.tree.Rename; import org.opensearch.sql.ast.tree.Replace; +import org.opensearch.sql.ast.tree.ReplacePair; import org.opensearch.sql.ast.tree.Rex; import org.opensearch.sql.ast.tree.SPath; import org.opensearch.sql.ast.tree.Search; @@ -2417,8 +2418,6 @@ public RelNode visitReplace(Replace node, CalcitePlanContext context) { visitChildren(node, context); List fieldNames = context.relBuilder.peek().getRowType().getFieldNames(); - RexNode patternNode = rexVisitor.analyze(node.getPattern(), context); - RexNode replacementNode = rexVisitor.analyze(node.getReplacement(), context); // Create a set of field names to replace for quick lookup Set fieldsToReplace = @@ -2432,12 +2431,19 @@ public RelNode visitReplace(Replace node, CalcitePlanContext context) { // Project all fields, replacing specified ones in-place for (String fieldName : fieldNames) { if (fieldsToReplace.contains(fieldName)) { - // Replace this field in-place + // Replace this field in-place with all pattern/replacement pairs applied sequentially RexNode fieldRef = context.relBuilder.field(fieldName); - RexNode replaceCall = - context.relBuilder.call( - SqlStdOperatorTable.REPLACE, fieldRef, patternNode, replacementNode); - projectList.add(replaceCall); + + // Apply all replacement pairs sequentially (nested REPLACE calls) + for (ReplacePair pair : node.getReplacePairs()) { + RexNode patternNode = rexVisitor.analyze(pair.getPattern(), context); + RexNode replacementNode = rexVisitor.analyze(pair.getReplacement(), context); + fieldRef = + context.relBuilder.call( + SqlStdOperatorTable.REPLACE, fieldRef, patternNode, replacementNode); + } + + projectList.add(fieldRef); } else { // Keep original field unchanged projectList.add(context.relBuilder.field(fieldName)); diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReplaceCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReplaceCommandIT.java index ed07166a9b1..9d6304c363b 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReplaceCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReplaceCommandIT.java @@ -240,4 +240,52 @@ public void testNonStringLiteralReplacement() { verifyErrorMessageContains(e, "is not a valid term at this part of the query"); verifyErrorMessageContains(e, "Expecting tokens: DQUOTA_STRING, SQUOTA_STRING"); } + + @Test + public void testMultiplePairsInSingleCommand() throws IOException { + // Test replacing multiple patterns in a single command + JSONObject result = + executeQuery( + String.format( + "source = %s | replace 'USA' WITH 'United States', 'Canada' WITH 'CA' IN country", + TEST_INDEX_STATE_COUNTRY)); + + verifySchema( + result, + schema("name", "string"), + schema("country", "string"), + schema("state", "string"), + schema("month", "int"), + schema("year", "int"), + schema("age", "int")); + + verifyDataRows( + result, + rows("Jake", "United States", "California", 4, 2023, 70), + rows("Hello", "United States", "New York", 4, 2023, 30), + rows("John", "CA", "Ontario", 4, 2023, 25), + rows("Jane", "CA", "Quebec", 4, 2023, 20)); + } + + @Test + public void testMultiplePairsSequentialApplication() throws IOException { + // Test that replacements are applied sequentially (order matters) + // If we have "Ontario" WITH "ON", "ON" WITH "Ontario Province" + // then "Ontario" becomes "ON" first, then that "ON" becomes "Ontario Province" + JSONObject result = + executeQuery( + String.format( + "source = %s | replace 'Ontario' WITH 'ON', 'ON' WITH 'Ontario Province' IN state" + + " | fields name, state", + TEST_INDEX_STATE_COUNTRY)); + + verifySchema(result, schema("name", "string"), schema("state", "string")); + + verifyDataRows( + result, + rows("Jake", "California"), + rows("Hello", "New York"), + rows("John", "Ontario Province"), + rows("Jane", "Quebec")); + } } diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index d3c3cf63210..1bd2ba31c0e 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -207,7 +207,11 @@ renameCommand ; replaceCommand - : REPLACE pattern=stringLiteral WITH replacement=stringLiteral IN fieldList + : REPLACE replacePair (COMMA replacePair)* IN fieldList + ; + +replacePair + : pattern=stringLiteral WITH replacement=stringLiteral ; statsCommand diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index c8c0891b278..33420bd79c5 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -77,6 +77,7 @@ import org.opensearch.sql.ast.tree.Relation; import org.opensearch.sql.ast.tree.Rename; import org.opensearch.sql.ast.tree.Replace; +import org.opensearch.sql.ast.tree.ReplacePair; import org.opensearch.sql.ast.tree.Reverse; import org.opensearch.sql.ast.tree.Rex; import org.opensearch.sql.ast.tree.SPath; @@ -385,12 +386,20 @@ public UnresolvedPlan visitRenameCommand(RenameCommandContext ctx) { /** Replace command. */ @Override public UnresolvedPlan visitReplaceCommand(OpenSearchPPLParser.ReplaceCommandContext ctx) { - UnresolvedExpression pattern = internalVisitExpression(ctx.pattern); - UnresolvedExpression replacement = internalVisitExpression(ctx.replacement); + // Parse all replacement pairs + List replacePairs = + ctx.replacePair().stream().map(this::buildReplacePair).collect(Collectors.toList()); Set fieldList = getUniqueFieldSet(ctx.fieldList()); - return new Replace(pattern, replacement, fieldList); + return new Replace(replacePairs, fieldList); + } + + /** Build a ReplacePair from parse context. */ + private ReplacePair buildReplacePair(OpenSearchPPLParser.ReplacePairContext ctx) { + Literal pattern = (Literal) internalVisitExpression(ctx.pattern); + Literal replacement = (Literal) internalVisitExpression(ctx.replacement); + return new ReplacePair(pattern, replacement); } /** Stats command. */ diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReplaceTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReplaceTest.java index d7841001e0b..fe61beefd35 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReplaceTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReplaceTest.java @@ -262,4 +262,123 @@ public void testReplaceWithEvalAndReplaceOnSameField() { verifyPPLToSparkSQL(root, expectedSparkSql); } + + @Test + public void testReplaceWithMultiplePairs() { + // Test with multiple pattern/replacement pairs in a single command + String ppl = + "source=EMP | replace \"CLERK\" WITH \"EMPLOYEE\", \"MANAGER\" WITH \"SUPERVISOR\" IN JOB"; + RelNode root = getRelNode(ppl); + + // Should generate nested REPLACE calls: REPLACE(REPLACE(JOB, 'CLERK', 'EMPLOYEE'), 'MANAGER', + // 'SUPERVISOR') + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[REPLACE(REPLACE($2, 'CLERK':VARCHAR," + + " 'EMPLOYEE':VARCHAR), 'MANAGER':VARCHAR, 'SUPERVISOR':VARCHAR)], MGR=[$3]," + + " HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, REPLACE(REPLACE(`JOB`, 'CLERK', 'EMPLOYEE'), 'MANAGER'," + + " 'SUPERVISOR') `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`\n" + + "FROM `scott`.`EMP`"; + + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testReplaceWithThreePairs() { + // Test with three pattern/replacement pairs + String ppl = + "source=EMP | replace \"CLERK\" WITH \"EMPLOYEE\", \"MANAGER\" WITH \"SUPERVISOR\"," + + " \"ANALYST\" WITH \"RESEARCHER\" IN JOB"; + RelNode root = getRelNode(ppl); + + // Should generate triple nested REPLACE calls + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[REPLACE(REPLACE(REPLACE($2," + + " 'CLERK':VARCHAR, 'EMPLOYEE':VARCHAR), 'MANAGER':VARCHAR, 'SUPERVISOR':VARCHAR)," + + " 'ANALYST':VARCHAR, 'RESEARCHER':VARCHAR)], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, REPLACE(REPLACE(REPLACE(`JOB`, 'CLERK', 'EMPLOYEE')," + + " 'MANAGER', 'SUPERVISOR'), 'ANALYST', 'RESEARCHER') `JOB`, `MGR`, `HIREDATE`," + + " `SAL`, `COMM`, `DEPTNO`\n" + + "FROM `scott`.`EMP`"; + + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testReplaceWithMultiplePairsOnMultipleFields() { + // Test with multiple pattern/replacement pairs applied to multiple fields + String ppl = + "source=EMP | replace \"CLERK\" WITH \"EMPLOYEE\", \"MANAGER\" WITH \"SUPERVISOR\" IN JOB," + + " ENAME"; + RelNode root = getRelNode(ppl); + + // Should apply the same nested REPLACE calls to both JOB and ENAME fields + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[REPLACE(REPLACE($1, 'CLERK':VARCHAR," + + " 'EMPLOYEE':VARCHAR), 'MANAGER':VARCHAR, 'SUPERVISOR':VARCHAR)]," + + " JOB=[REPLACE(REPLACE($2, 'CLERK':VARCHAR, 'EMPLOYEE':VARCHAR)," + + " 'MANAGER':VARCHAR, 'SUPERVISOR':VARCHAR)], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, REPLACE(REPLACE(`ENAME`, 'CLERK', 'EMPLOYEE'), 'MANAGER'," + + " 'SUPERVISOR') `ENAME`, REPLACE(REPLACE(`JOB`, 'CLERK', 'EMPLOYEE'), 'MANAGER'," + + " 'SUPERVISOR') `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`\n" + + "FROM `scott`.`EMP`"; + + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testReplaceWithMultiplePairsSequentialApplication() { + // Test that replacements are applied sequentially + // This test demonstrates the order matters: if we have "20" WITH "30", "30" WITH "40" + // then "20" will become "30" first, then that "30" becomes "40", resulting in "40" + String ppl = "source=EMP | replace \"20\" WITH \"30\", \"30\" WITH \"40\" IN DEPTNO"; + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[REPLACE(REPLACE($7, '20':VARCHAR, '30':VARCHAR)," + + " '30':VARCHAR, '40':VARCHAR)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`," + + " REPLACE(REPLACE(`DEPTNO`, '20', '30'), '30', '40') `DEPTNO`\n" + + "FROM `scott`.`EMP`"; + + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test(expected = SyntaxCheckException.class) + public void testReplaceWithMultiplePairsMissingWithKeywordShouldFail() { + // Missing WITH keyword between pairs + String ppl = + "source=EMP | replace \"CLERK\" \"EMPLOYEE\", \"MANAGER\" WITH \"SUPERVISOR\" IN JOB"; + getRelNode(ppl); + } + + @Test(expected = SyntaxCheckException.class) + public void testReplaceWithMultiplePairsTrailingCommaShouldFail() { + // Trailing comma after last pair + String ppl = "source=EMP | replace \"CLERK\" WITH \"EMPLOYEE\", IN JOB"; + getRelNode(ppl); + } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java index a3d6f686af6..b9948e6abe2 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java @@ -1255,4 +1255,16 @@ public void testMultisearchSingleSubsearchThrowsException() { "Multisearch command requires at least two subsearches. Provided: 1", exception.getMessage()); } + + @Test + public void testReplaceCommand() { + // Test basic single pattern replacement + plan("source=t | replace 'old' WITH 'new' IN field"); + } + + @Test + public void testReplaceCommandWithMultiplePairs() { + // Test multiple pattern/replacement pairs + plan("source=t | replace 'a' WITH 'A', 'b' WITH 'B' IN field"); + } } From c0d7ffe00d356a3e1e3b67924584c77d65b69f5c Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Tue, 14 Oct 2025 15:19:05 -0700 Subject: [PATCH 16/19] remove validation logic since enforced by antlr Signed-off-by: Kai Huang --- .../org/opensearch/sql/ast/tree/Replace.java | 36 ------------ .../ppl/calcite/CalcitePPLReplaceTest.java | 56 ------------------- 2 files changed, 92 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Replace.java b/core/src/main/java/org/opensearch/sql/ast/tree/Replace.java index 897521b15cb..9435986c01d 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/Replace.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Replace.java @@ -15,7 +15,6 @@ import lombok.ToString; import org.jetbrains.annotations.Nullable; import org.opensearch.sql.ast.AbstractNodeVisitor; -import org.opensearch.sql.ast.expression.DataType; import org.opensearch.sql.ast.expression.Field; import org.opensearch.sql.ast.expression.Literal; import org.opensearch.sql.ast.expression.UnresolvedExpression; @@ -38,7 +37,6 @@ public class Replace extends UnresolvedPlan { public Replace(List replacePairs, Set fieldList) { this.replacePairs = replacePairs; this.fieldList = fieldList; - validate(); } /** @@ -58,7 +56,6 @@ public Replace( this.replacePairs = Collections.singletonList(new ReplacePair((Literal) pattern, (Literal) replacement)); this.fieldList = fieldList; - validate(); } /** @@ -89,39 +86,6 @@ public UnresolvedExpression getReplacement() { return replacePairs.get(0).getReplacement(); } - public void validate() { - if (replacePairs == null || replacePairs.isEmpty()) { - throw new IllegalArgumentException( - "At least one pattern/replacement pair is required in Replace command"); - } - - // Validate each pair - for (ReplacePair pair : replacePairs) { - if (pair.getPattern() == null) { - throw new IllegalArgumentException("Pattern cannot be null in Replace command"); - } - if (pair.getReplacement() == null) { - throw new IllegalArgumentException("Replacement cannot be null in Replace command"); - } - - // Validate pattern is a string literal - if (pair.getPattern().getType() != DataType.STRING) { - throw new IllegalArgumentException("Pattern must be a string literal in Replace command"); - } - - // Validate replacement is a string literal - if (pair.getReplacement().getType() != DataType.STRING) { - throw new IllegalArgumentException( - "Replacement must be a string literal in Replace command"); - } - } - - if (fieldList == null || fieldList.isEmpty()) { - throw new IllegalArgumentException( - "Field list cannot be empty in Replace command. Use IN clause to specify the field."); - } - } - @Override public Replace attach(UnresolvedPlan child) { if (null == this.child) { diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReplaceTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReplaceTest.java index fe61beefd35..abde8b3a5bb 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReplaceTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReplaceTest.java @@ -5,13 +5,9 @@ package org.opensearch.sql.ppl.calcite; -import java.util.Collections; import org.apache.calcite.rel.RelNode; import org.apache.calcite.test.CalciteAssert; import org.junit.Test; -import org.opensearch.sql.ast.expression.DataType; -import org.opensearch.sql.ast.expression.Literal; -import org.opensearch.sql.ast.tree.Replace; import org.opensearch.sql.common.antlr.SyntaxCheckException; public class CalcitePPLReplaceTest extends CalcitePPLAbstractTest { @@ -186,58 +182,6 @@ public void testReplaceWithMissingReplacementValueShouldFail() { getRelNode(ppl); } - @Test(expected = IllegalArgumentException.class) - public void testReplaceWithNullPatternShouldFail() { - Replace replace = - new Replace(null, new Literal("EMPLOYEE", DataType.STRING), Collections.emptySet()); - replace.validate(); - } - - @Test(expected = IllegalArgumentException.class) - public void testReplaceWithNullReplacementShouldFail() { - Replace replace = - new Replace(new Literal("CLERK", DataType.STRING), null, Collections.emptySet()); - replace.validate(); - } - - @Test(expected = IllegalArgumentException.class) - public void testReplaceWithNonStringPatternShouldFail() { - Replace replace = - new Replace( - new Literal(123, DataType.INTEGER), - new Literal("EMPLOYEE", DataType.STRING), - Collections.emptySet()); - replace.validate(); - } - - @Test(expected = IllegalArgumentException.class) - public void testReplaceWithNonStringReplacementShouldFail() { - Replace replace = - new Replace( - new Literal("CLERK", DataType.STRING), - new Literal(456, DataType.INTEGER), - Collections.emptySet()); - replace.validate(); - } - - @Test(expected = IllegalArgumentException.class) - public void testReplaceWithNullFieldListShouldFail() { - Replace replace = - new Replace( - new Literal("CLERK", DataType.STRING), new Literal("EMPLOYEE", DataType.STRING), null); - replace.validate(); - } - - @Test(expected = IllegalArgumentException.class) - public void testReplaceWithEmptyFieldListShouldFail() { - Replace replace = - new Replace( - new Literal("CLERK", DataType.STRING), - new Literal("EMPLOYEE", DataType.STRING), - Collections.emptySet()); - replace.validate(); - } - @Test public void testReplaceWithEvalAndReplaceOnSameField() { // Test verifies that in-place replacement works correctly when there are additional fields From 4b4217e8ee7e1160a5434720da3b141ae10e3bac Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Tue, 14 Oct 2025 15:34:35 -0700 Subject: [PATCH 17/19] update doc Signed-off-by: Kai Huang --- docs/user/ppl/cmd/replace.rst | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/docs/user/ppl/cmd/replace.rst b/docs/user/ppl/cmd/replace.rst index b1cd960164d..bcb0d57e677 100644 --- a/docs/user/ppl/cmd/replace.rst +++ b/docs/user/ppl/cmd/replace.rst @@ -18,7 +18,7 @@ Note: This command is only available when Calcite engine is enabled. Syntax ============ -replace '' WITH '' IN [, ]... +replace '' WITH '' [, '' WITH '']... IN [, ]... Parameters @@ -69,10 +69,10 @@ PPL query:: +----------+----------------------+ -Example 3: Replace with IN clause and other commands +Example 3: Replace with other commands in a pipeline ------------------------------------ -The example shows using replace with other commands. +The example shows using replace with other commands in a query pipeline. PPL query:: @@ -86,7 +86,25 @@ PPL query:: | MD | 33 | +----------+-----+ -Example 4: Pattern matching with LIKE and replace +Example 4: Replace with multiple pattern/replacement pairs +------------------------------------ + +The example shows using multiple pattern/replacement pairs in a single replace command. The replacements are applied sequentially. + +PPL query:: + + os> source=accounts | replace "IL" WITH "Illinois", "TN" WITH "Tennessee" IN state | fields state; + fetched rows / total rows = 4/4 + +-----------+ + | state | + |-----------| + | Illinois | + | Tennessee | + | VA | + | MD | + +-----------+ + +Example 5: Pattern matching with LIKE and replace ------------------------------------ Since replace command only supports plain string literals, you can use LIKE command with replace for pattern matching needs. From 0ff7d979eb6a4996bd4d63edde5710e7ebacefbf Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Wed, 15 Oct 2025 12:59:19 -0700 Subject: [PATCH 18/19] update validation logic Signed-off-by: Kai Huang --- .../sql/calcite/CalciteRelNodeVisitor.java | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index e26f7e02aa8..06fc02c304d 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -2423,8 +2423,12 @@ public RelNode visitReplace(Replace node, CalcitePlanContext context) { Set fieldsToReplace = node.getFieldList().stream().map(f -> f.getField().toString()).collect(Collectors.toSet()); - // Validate that all fields to replace exist in the current schema - validateFieldsExist(fieldsToReplace, fieldNames); + // Validate that all fields to replace exist by calling field() on each + // This leverages relBuilder.field()'s built-in validation which throws + // IllegalArgumentException if any field doesn't exist + for (String fieldToReplace : fieldsToReplace) { + context.relBuilder.field(fieldToReplace); + } List projectList = new ArrayList<>(); @@ -2454,16 +2458,6 @@ public RelNode visitReplace(Replace node, CalcitePlanContext context) { return context.relBuilder.peek(); } - private void validateFieldsExist(Set fieldsToValidate, List availableFields) { - Set availableFieldsSet = new HashSet<>(availableFields); - for (String field : fieldsToValidate) { - if (!availableFieldsSet.contains(field)) { - throw new IllegalArgumentException( - String.format("field [%s] not found; input fields are: %s", field, availableFields)); - } - } - } - private void buildParseRelNode(Parse node, CalcitePlanContext context) { RexNode sourceField = rexVisitor.analyze(node.getSourceField(), context); ParseMethod parseMethod = node.getParseMethod(); From e49d99ff0cbb62808a3877e0119ab77b1e261474 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Wed, 15 Oct 2025 15:48:09 -0700 Subject: [PATCH 19/19] update Signed-off-by: Kai Huang --- .../org/opensearch/sql/ast/tree/Replace.java | 50 ------------------- .../sql/ppl/utils/PPLQueryDataAnonymizer.java | 15 ++++-- 2 files changed, 10 insertions(+), 55 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Replace.java b/core/src/main/java/org/opensearch/sql/ast/tree/Replace.java index 9435986c01d..8b2c18cd56c 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/Replace.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Replace.java @@ -6,7 +6,6 @@ package org.opensearch.sql.ast.tree; import com.google.common.collect.ImmutableList; -import java.util.Collections; import java.util.List; import java.util.Set; import lombok.EqualsAndHashCode; @@ -16,8 +15,6 @@ import org.jetbrains.annotations.Nullable; import org.opensearch.sql.ast.AbstractNodeVisitor; import org.opensearch.sql.ast.expression.Field; -import org.opensearch.sql.ast.expression.Literal; -import org.opensearch.sql.ast.expression.UnresolvedExpression; @Getter @Setter @@ -39,53 +36,6 @@ public Replace(List replacePairs, Set fieldList) { this.fieldList = fieldList; } - /** - * Backward-compatible constructor with single pattern/replacement pair. - * - * @param pattern Pattern literal - * @param replacement Replacement literal - * @param fieldList Set of fields to apply replacement to - */ - public Replace( - UnresolvedExpression pattern, UnresolvedExpression replacement, Set fieldList) { - // Convert single pair to list for internal consistency - if (!(pattern instanceof Literal) || !(replacement instanceof Literal)) { - throw new IllegalArgumentException( - "Pattern and replacement must be literals in Replace command"); - } - this.replacePairs = - Collections.singletonList(new ReplacePair((Literal) pattern, (Literal) replacement)); - this.fieldList = fieldList; - } - - /** - * Get the pattern from the first replacement pair (for backward compatibility). - * - * @return Pattern expression - * @deprecated Use {@link #getReplacePairs()} instead - */ - @Deprecated - public UnresolvedExpression getPattern() { - if (replacePairs.isEmpty()) { - throw new IllegalStateException("No replacement pairs available"); - } - return replacePairs.get(0).getPattern(); - } - - /** - * Get the replacement from the first replacement pair (for backward compatibility). - * - * @return Replacement expression - * @deprecated Use {@link #getReplacePairs()} instead - */ - @Deprecated - public UnresolvedExpression getReplacement() { - if (replacePairs.isEmpty()) { - throw new IllegalStateException("No replacement pairs available"); - } - return replacePairs.get(0).getReplacement(); - } - @Override public Replace attach(UnresolvedPlan child) { if (null == this.child) { diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java index ac15076ed1d..e392a682cef 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java @@ -284,9 +284,15 @@ public String visitReplace(Replace node, String context) { // Get the child query string String child = node.getChild().get(0).accept(this, context); - // Get pattern and replacement expressions - String pattern = visitExpression(node.getPattern()); - String replacement = visitExpression(node.getReplacement()); + // Build pattern/replacement pairs string + String pairs = + node.getReplacePairs().stream() + .map( + pair -> + StringUtils.format( + "%s WITH %s", + visitExpression(pair.getPattern()), visitExpression(pair.getReplacement()))) + .collect(Collectors.joining(", ")); // Get field list String fieldListStr = @@ -294,8 +300,7 @@ public String visitReplace(Replace node, String context) { + node.getFieldList().stream().map(Field::toString).collect(Collectors.joining(", ")); // Build the replace command string - return StringUtils.format( - "%s | replace %s WITH %s%s", child, pattern, replacement, fieldListStr); + return StringUtils.format("%s | replace %s%s", child, pairs, fieldListStr); } /** Build {@link LogicalAggregation}. */