Skip to content
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ public UnresolvedPlan describe(String tableName) {
}

public static UnresolvedPlan search(UnresolvedPlan input, String queryString) {
return new Search(input, queryString);
return new Search(input, queryString, null);
}

public UnresolvedPlan subqueryAlias(UnresolvedPlan child, String alias) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@ public String toQueryString() {
return left.toQueryString() + " AND " + right.toQueryString();
}

@Override
public String toAnonymizedString() {
return left.toAnonymizedString() + " AND " + right.toAnonymizedString();
}

@Override
public List<? extends UnresolvedExpression> getChild() {
return Arrays.asList(left, right);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,11 @@ public String toQueryString() {
}
}

@Override
public String toAnonymizedString() {
return "identifier " + operator.symbol + " ***";
}

@Override
public List<? extends UnresolvedExpression> getChild() {
return Arrays.asList(field, value);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,13 @@ public abstract class SearchExpression extends UnresolvedExpression {
*/
public abstract String toQueryString();

/**
* Convert the search expression to anonymized string
*
* @return the anonymized string
*/
public abstract String toAnonymizedString();

@Override
public <R, C> R accept(AbstractNodeVisitor<R, C> nodeVisitor, C context) {
return nodeVisitor.visitChildren(this, context);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,14 @@ public String toQueryString() {
return "(" + expression.toQueryString() + ")";
}

@Override
public String toAnonymizedString() {
if (expression instanceof SearchGroup) {
return expression.toAnonymizedString();
}
return "(" + expression.toAnonymizedString() + ")";
}

@Override
public List<? extends UnresolvedExpression> getChild() {
return Collections.singletonList(expression);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@ public String toQueryString() {
return fieldName + ":( " + valueList + " )";
}

@Override
public String toAnonymizedString() {
return "identifier IN ***";
}

@Override
public List<? extends UnresolvedExpression> getChild() {
List<UnresolvedExpression> children = new ArrayList<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,11 @@ public String toQueryString() {
return QueryStringUtils.escapeLuceneSpecialCharacters(text);
}

@Override
public String toAnonymizedString() {
return "***";
}

@Override
public List<? extends UnresolvedExpression> getChild() {
return Collections.singletonList(literal);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ public String toQueryString() {
return "NOT(" + expression.toQueryString() + ")";
}

@Override
public String toAnonymizedString() {
return "NOT(" + expression.toAnonymizedString() + ")";
}

@Override
public List<? extends UnresolvedExpression> getChild() {
return Collections.singletonList(expression);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@ public String toQueryString() {
return left.toQueryString() + " OR " + right.toQueryString();
}

@Override
public String toAnonymizedString() {
return left.toAnonymizedString() + " OR " + right.toAnonymizedString();
}

@Override
public List<? extends UnresolvedExpression> getChild() {
return Arrays.asList(left, right);
Expand Down
17 changes: 13 additions & 4 deletions core/src/main/java/org/opensearch/sql/ast/tree/Search.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,24 +7,33 @@

import com.google.common.collect.ImmutableList;
import java.util.List;
import javax.annotation.Nullable;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.RequiredArgsConstructor;
import lombok.ToString;
import org.opensearch.sql.ast.AbstractNodeVisitor;
import org.opensearch.sql.ast.expression.SearchExpression;

/**
* Logical plan node for Search operation. Represents search expressions that get converted to
* query_string function.
*/
@Getter
@ToString
@EqualsAndHashCode(callSuper = false)
@EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = false)
@RequiredArgsConstructor
public class Search extends UnresolvedPlan {

private final UnresolvedPlan child;
private final String queryString;
@EqualsAndHashCode.Include private final UnresolvedPlan child;
@EqualsAndHashCode.Include private final String queryString;

// Currently it's only for anonymizer
private final @Nullable SearchExpression originalExpression;

public Search(UnresolvedPlan child, String queryString) {
this(child, queryString, null);
}

@Override
public List<UnresolvedPlan> getChild() {
Expand All @@ -38,6 +47,6 @@ public <T, C> T accept(AbstractNodeVisitor<T, C> nodeVisitor, C context) {

@Override
public UnresolvedPlan attach(UnresolvedPlan child) {
return new Search(child, queryString);
return new Search(child, queryString, originalExpression);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"calcite":{
"logical":"LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12])\n LogicalFilter(condition=[match(MAP('field_name', $3), MAP('value', '\"2016\\-12\\-08 00\\:00\\:00.000000000\"':VARCHAR))])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n",
"physical":"CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], FILTER->match(MAP('field_name', $3), MAP('value', '\"2016\\-12\\-08 00\\:00\\:00.000000000\"':VARCHAR)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"match\":{\"birthdate\":{\"query\":\"\\\"2016\\\\-12\\\\-08 00\\\\:00\\\\:00.000000000\\\"\",\"operator\":\"OR\",\"prefix_length\":0,\"max_expansions\":50,\"fuzzy_transpositions\":true,\"lenient\":false,\"zero_terms_query\":\"NONE\",\"auto_generate_synonyms_phrase_query\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"birthdate\",\"gender\",\"city\",\"lastname\",\"balance\",\"employer\",\"state\",\"age\",\"email\",\"male\"],\"excludes\":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ public UnresolvedPlan visitSearchFrom(SearchFromContext ctx) {

// Create Search node with relation and query string
Relation relation = (Relation) visitFromClause(ctx.fromClause());
return new Search(relation, queryString);
return new Search(relation, queryString, combined);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,11 +108,11 @@
/** Utility class to mask sensitive information in incoming PPL queries. */
public class PPLQueryDataAnonymizer extends AbstractNodeVisitor<String, String> {

private static final String MASK_LITERAL = "***";
public static final String MASK_LITERAL = "***";

private static final String MASK_COLUMN = "identifier";
public static final String MASK_COLUMN = "identifier";

private static final String MASK_TABLE = "table";
public static final String MASK_TABLE = "table";

private final AnonymizerExpressionAnalyzer expressionAnalyzer;
private final Settings settings;
Expand Down Expand Up @@ -252,9 +252,7 @@ public String visitTableFunction(TableFunction node, String context) {
@Override
public String visitSearch(Search node, String context) {
String source = node.getChild().get(0).accept(this, context);
String queryString = node.getQueryString();
String anonymized = queryString.replaceAll(":\\S+", ":" + MASK_LITERAL);
return StringUtils.format("%s %s", source, anonymized);
return StringUtils.format("%s %s", source, node.getOriginalExpression().toAnonymizedString());
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ public class PPLQueryDataAnonymizerTest {

@Test
public void testSearchCommand() {
assertEquals("source=table a:***", anonymize("search source=t a=1"));
assertEquals("source=table identifier = ***", anonymize("search source=t a=1"));
}

@Test
Expand Down Expand Up @@ -879,10 +879,37 @@ private String anonymizeStatement(String query, boolean isExplain) {
@Test
public void testSearchWithAbsoluteTimeRange() {
assertEquals(
"source=table (@timestamp:*** AND (@timestamp:***",
"source=table (identifier >= ***) AND (identifier <= ***)",
anonymize("search source=t earliest='2012-12-10 15:00:00' latest=now"));
}

@Test
public void testSearchWithIn() {
assertEquals("source=table identifier IN ***", anonymize("search source=t balance in (2000)"));
}

@Test
public void testSearchWithNot() {
assertEquals(
"source=table NOT(identifier = ***)", anonymize("search NOT balance=2000 source=t"));
}

@Test
public void testSearchWithGroup() {
assertEquals(
"source=table ((identifier = *** OR identifier = ***) AND identifier > ***)",
anonymize(
"search (severityText=\"ERROR\" OR severityText=\"WARN\") AND severityNumber>10"
+ " source=t"));
}

@Test
public void testSearchWithOr() {
assertEquals(
"source=table (identifier >= *** OR identifier <= ***)",
anonymize("search source=t earliest='2012-12-10 15:00:00' or latest=now"));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's odd that the = anonymized to >= and <=. It changes the semantic IMO.

Copy link
Member

@LantaoJin LantaoJin Nov 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Change to such as time_identifier?
For meta fields such as _id, _doc etc, how about anonymize to meta_identifier?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Already add time_identifier with meta_identifier. Please check it.

}

@Test
public void testSpath() {
assertEquals(
Expand Down
Loading