diff --git a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java index 93ad06011c0..bf54d2ffd89 100644 --- a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java +++ b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java @@ -113,7 +113,7 @@ public UnresolvedPlan describe(String tableName) { } public static UnresolvedPlan search(UnresolvedPlan input, String queryString) { - return new Search(input, queryString); + return new Search(input, queryString, null); } public UnresolvedPlan subqueryAlias(UnresolvedPlan child, String alias) { diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/SearchAnd.java b/core/src/main/java/org/opensearch/sql/ast/expression/SearchAnd.java index fe7be0db13e..bdfbd9fda39 100644 --- a/core/src/main/java/org/opensearch/sql/ast/expression/SearchAnd.java +++ b/core/src/main/java/org/opensearch/sql/ast/expression/SearchAnd.java @@ -27,6 +27,11 @@ public String toQueryString() { return left.toQueryString() + " AND " + right.toQueryString(); } + @Override + public String toAnonymizedString() { + return left.toAnonymizedString() + " AND " + right.toAnonymizedString(); + } + @Override public List getChild() { return Arrays.asList(left, right); diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/SearchComparison.java b/core/src/main/java/org/opensearch/sql/ast/expression/SearchComparison.java index be099059546..c429e6f66cc 100644 --- a/core/src/main/java/org/opensearch/sql/ast/expression/SearchComparison.java +++ b/core/src/main/java/org/opensearch/sql/ast/expression/SearchComparison.java @@ -5,6 +5,8 @@ package org.opensearch.sql.ast.expression; +import static org.opensearch.sql.utils.QueryStringUtils.maskField; + import java.util.Arrays; import java.util.List; import lombok.EqualsAndHashCode; @@ -63,6 +65,12 @@ public String toQueryString() { } } + @Override + public String toAnonymizedString() { + String fieldName = QueryStringUtils.escapeFieldName(field.getField().toString()); + return maskField(fieldName) + " " + operator.symbol + " ***"; + } + @Override public List getChild() { return Arrays.asList(field, value); diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/SearchExpression.java b/core/src/main/java/org/opensearch/sql/ast/expression/SearchExpression.java index 23bf806d230..b705909445f 100644 --- a/core/src/main/java/org/opensearch/sql/ast/expression/SearchExpression.java +++ b/core/src/main/java/org/opensearch/sql/ast/expression/SearchExpression.java @@ -17,6 +17,13 @@ public abstract class SearchExpression extends UnresolvedExpression { */ public abstract String toQueryString(); + /** + * Convert the search expression to anonymized string + * + * @return the anonymized string + */ + public abstract String toAnonymizedString(); + @Override public R accept(AbstractNodeVisitor nodeVisitor, C context) { return nodeVisitor.visitChildren(this, context); diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/SearchGroup.java b/core/src/main/java/org/opensearch/sql/ast/expression/SearchGroup.java index 41b85f408ca..09197202dc0 100644 --- a/core/src/main/java/org/opensearch/sql/ast/expression/SearchGroup.java +++ b/core/src/main/java/org/opensearch/sql/ast/expression/SearchGroup.java @@ -26,6 +26,14 @@ public String toQueryString() { return "(" + expression.toQueryString() + ")"; } + @Override + public String toAnonymizedString() { + if (expression instanceof SearchGroup) { + return expression.toAnonymizedString(); + } + return "(" + expression.toAnonymizedString() + ")"; + } + @Override public List getChild() { return Collections.singletonList(expression); diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/SearchIn.java b/core/src/main/java/org/opensearch/sql/ast/expression/SearchIn.java index bdbdb712a9c..8291d130dff 100644 --- a/core/src/main/java/org/opensearch/sql/ast/expression/SearchIn.java +++ b/core/src/main/java/org/opensearch/sql/ast/expression/SearchIn.java @@ -33,6 +33,11 @@ public String toQueryString() { return fieldName + ":( " + valueList + " )"; } + @Override + public String toAnonymizedString() { + return "identifier IN ***"; + } + @Override public List getChild() { List children = new ArrayList<>(); diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/SearchLiteral.java b/core/src/main/java/org/opensearch/sql/ast/expression/SearchLiteral.java index f27dad34bab..460615afa64 100644 --- a/core/src/main/java/org/opensearch/sql/ast/expression/SearchLiteral.java +++ b/core/src/main/java/org/opensearch/sql/ast/expression/SearchLiteral.java @@ -55,6 +55,11 @@ public String toQueryString() { return QueryStringUtils.escapeLuceneSpecialCharacters(text); } + @Override + public String toAnonymizedString() { + return "***"; + } + @Override public List getChild() { return Collections.singletonList(literal); diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/SearchNot.java b/core/src/main/java/org/opensearch/sql/ast/expression/SearchNot.java index 3e5083b7678..b9ea7b416b4 100644 --- a/core/src/main/java/org/opensearch/sql/ast/expression/SearchNot.java +++ b/core/src/main/java/org/opensearch/sql/ast/expression/SearchNot.java @@ -26,6 +26,11 @@ public String toQueryString() { return "NOT(" + expression.toQueryString() + ")"; } + @Override + public String toAnonymizedString() { + return "NOT(" + expression.toAnonymizedString() + ")"; + } + @Override public List getChild() { return Collections.singletonList(expression); diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/SearchOr.java b/core/src/main/java/org/opensearch/sql/ast/expression/SearchOr.java index b5aa72807bd..1a9e95e89a2 100644 --- a/core/src/main/java/org/opensearch/sql/ast/expression/SearchOr.java +++ b/core/src/main/java/org/opensearch/sql/ast/expression/SearchOr.java @@ -27,6 +27,11 @@ public String toQueryString() { return left.toQueryString() + " OR " + right.toQueryString(); } + @Override + public String toAnonymizedString() { + return left.toAnonymizedString() + " OR " + right.toAnonymizedString(); + } + @Override public List getChild() { return Arrays.asList(left, right); diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Search.java b/core/src/main/java/org/opensearch/sql/ast/tree/Search.java index ebc74192987..0a705f95c41 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/Search.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Search.java @@ -7,11 +7,13 @@ import com.google.common.collect.ImmutableList; import java.util.List; +import javax.annotation.Nullable; import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.RequiredArgsConstructor; import lombok.ToString; import org.opensearch.sql.ast.AbstractNodeVisitor; +import org.opensearch.sql.ast.expression.SearchExpression; /** * Logical plan node for Search operation. Represents search expressions that get converted to @@ -19,12 +21,19 @@ */ @Getter @ToString -@EqualsAndHashCode(callSuper = false) +@EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = false) @RequiredArgsConstructor public class Search extends UnresolvedPlan { - private final UnresolvedPlan child; - private final String queryString; + @EqualsAndHashCode.Include private final UnresolvedPlan child; + @EqualsAndHashCode.Include private final String queryString; + + // Currently it's only for anonymizer + private final @Nullable SearchExpression originalExpression; + + public Search(UnresolvedPlan child, String queryString) { + this(child, queryString, null); + } @Override public List getChild() { @@ -38,6 +47,6 @@ public T accept(AbstractNodeVisitor nodeVisitor, C context) { @Override public UnresolvedPlan attach(UnresolvedPlan child) { - return new Search(child, queryString); + return new Search(child, queryString, originalExpression); } } diff --git a/core/src/main/java/org/opensearch/sql/utils/QueryStringUtils.java b/core/src/main/java/org/opensearch/sql/utils/QueryStringUtils.java index fd9c4b7d2e3..94c1bd9db05 100644 --- a/core/src/main/java/org/opensearch/sql/utils/QueryStringUtils.java +++ b/core/src/main/java/org/opensearch/sql/utils/QueryStringUtils.java @@ -8,6 +8,26 @@ /** Utility class for query_string syntax operations. */ public class QueryStringUtils { + private static final String INTERNAL_TIMESTAMP = "@timestamp"; + + public static final String MASK_LITERAL = "***"; + + public static final String MASK_COLUMN = "identifier"; + + public static final String MASK_TIMESTAMP_COLUMN = "time_identifier"; + + public static final String MASK_METADATA_COLUMN = "meta_identifier"; + + public static String maskField(String fieldName) { + if (fieldName.equals(INTERNAL_TIMESTAMP)) { + return MASK_TIMESTAMP_COLUMN; + } + if (fieldName.startsWith("_")) { + return MASK_METADATA_COLUMN; + } + return MASK_COLUMN; + } + // For field names, we typically don't escape dots as they're used for nested fields // But we escape other special characters public static final String LUCENE_SPECIAL_CHARS = "+-&|!(){}[]^\"~:/"; diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_search_with_match_pushdown.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_search_with_match_pushdown.json new file mode 100644 index 00000000000..fb6ef8e36ff --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_search_with_match_pushdown.json @@ -0,0 +1,6 @@ +{ + "calcite":{ + "logical":"LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12])\n LogicalFilter(condition=[match(MAP('field_name', $3), MAP('value', '\"2016\\-12\\-08 00\\:00\\:00.000000000\"':VARCHAR))])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n", + "physical":"CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], FILTER->match(MAP('field_name', $3), MAP('value', '\"2016\\-12\\-08 00\\:00\\:00.000000000\"':VARCHAR)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"match\":{\"birthdate\":{\"query\":\"\\\"2016\\\\-12\\\\-08 00\\\\:00\\\\:00.000000000\\\"\",\"operator\":\"OR\",\"prefix_length\":0,\"max_expansions\":50,\"fuzzy_transpositions\":true,\"lenient\":false,\"zero_terms_query\":\"NONE\",\"auto_generate_synonyms_phrase_query\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"birthdate\",\"gender\",\"city\",\"lastname\",\"balance\",\"employer\",\"state\",\"age\",\"email\",\"male\"],\"excludes\":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" + } +} \ No newline at end of file diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index ed66682a981..9eedaeed2d8 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -214,7 +214,7 @@ public UnresolvedPlan visitSearchFrom(SearchFromContext ctx) { // Create Search node with relation and query string Relation relation = (Relation) visitFromClause(ctx.fromClause()); - return new Search(relation, queryString); + return new Search(relation, queryString, combined); } } diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java index 7e8dc16f4dd..e277dfacbc1 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java @@ -7,6 +7,9 @@ import static org.opensearch.sql.calcite.utils.PlanUtils.getRelation; import static org.opensearch.sql.calcite.utils.PlanUtils.transformPlanToAttachChild; +import static org.opensearch.sql.utils.QueryStringUtils.MASK_COLUMN; +import static org.opensearch.sql.utils.QueryStringUtils.MASK_LITERAL; +import static org.opensearch.sql.utils.QueryStringUtils.maskField; import com.google.common.base.Strings; import com.google.common.collect.ImmutableList; @@ -108,11 +111,7 @@ /** Utility class to mask sensitive information in incoming PPL queries. */ public class PPLQueryDataAnonymizer extends AbstractNodeVisitor { - private static final String MASK_LITERAL = "***"; - - private static final String MASK_COLUMN = "identifier"; - - private static final String MASK_TABLE = "table"; + public static final String MASK_TABLE = "table"; private final AnonymizerExpressionAnalyzer expressionAnalyzer; private final Settings settings; @@ -252,9 +251,7 @@ public String visitTableFunction(TableFunction node, String context) { @Override public String visitSearch(Search node, String context) { String source = node.getChild().get(0).accept(this, context); - String queryString = node.getQueryString(); - String anonymized = queryString.replaceAll(":\\S+", ":" + MASK_LITERAL); - return StringUtils.format("%s %s", source, anonymized); + return StringUtils.format("%s %s", source, node.getOriginalExpression().toAnonymizedString()); } @Override @@ -918,7 +915,8 @@ public String visitIn(In node, String context) { @Override public String visitField(Field node, String context) { - return MASK_COLUMN; + String fieldName = node.getField().toString(); + return maskField(fieldName); } @Override diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index f205b9fe0cc..549fca03195 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -33,7 +33,7 @@ public class PPLQueryDataAnonymizerTest { @Test public void testSearchCommand() { - assertEquals("source=table a:***", anonymize("search source=t a=1")); + assertEquals("source=table identifier = ***", anonymize("search source=t a=1")); } @Test @@ -255,8 +255,8 @@ public void testReverseCommand() { @Test public void testTimechartCommand() { assertEquals( - "source=table | timechart limit=*** useother=*** count() by span(identifier, *** m)" - + " identifier", + "source=table | timechart limit=*** useother=*** count() by span(time_identifier, ***" + + " m) identifier", anonymize("source=t | timechart count() by host")); } @@ -388,6 +388,13 @@ public void testAndExpression() { anonymize("source=t | where a=1 and b=2")); } + @Test + public void testAndExpressionWithMetaData() { + assertEquals( + "source=table | where meta_identifier = *** and identifier = ***", + anonymize("source=t | where _id=1 and b=2")); + } + @Test public void testOrExpression() { assertEquals( @@ -879,10 +886,37 @@ private String anonymizeStatement(String query, boolean isExplain) { @Test public void testSearchWithAbsoluteTimeRange() { assertEquals( - "source=table (@timestamp:*** AND (@timestamp:***", + "source=table (time_identifier >= ***) AND (time_identifier <= ***)", anonymize("search source=t earliest='2012-12-10 15:00:00' latest=now")); } + @Test + public void testSearchWithIn() { + assertEquals("source=table identifier IN ***", anonymize("search source=t balance in (2000)")); + } + + @Test + public void testSearchWithNot() { + assertEquals( + "source=table NOT(identifier = ***)", anonymize("search NOT balance=2000 source=t")); + } + + @Test + public void testSearchWithGroup() { + assertEquals( + "source=table ((identifier = *** OR identifier = ***) AND identifier > ***)", + anonymize( + "search (severityText=\"ERROR\" OR severityText=\"WARN\") AND severityNumber>10" + + " source=t")); + } + + @Test + public void testSearchWithOr() { + assertEquals( + "source=table (time_identifier >= *** OR time_identifier <= ***)", + anonymize("search source=t earliest='2012-12-10 15:00:00' or latest=now")); + } + @Test public void testSpath() { assertEquals(