Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ public UnresolvedPlan describe(String tableName) {
}

public static UnresolvedPlan search(UnresolvedPlan input, String queryString) {
return new Search(input, queryString);
return new Search(input, queryString, null);
}

public UnresolvedPlan subqueryAlias(UnresolvedPlan child, String alias) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@ public String toQueryString() {
return left.toQueryString() + " AND " + right.toQueryString();
}

@Override
public String toAnonymizedString() {
return left.toAnonymizedString() + " AND " + right.toAnonymizedString();
}

@Override
public List<? extends UnresolvedExpression> getChild() {
return Arrays.asList(left, right);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

package org.opensearch.sql.ast.expression;

import static org.opensearch.sql.utils.QueryStringUtils.maskField;

import java.util.Arrays;
import java.util.List;
import lombok.EqualsAndHashCode;
Expand Down Expand Up @@ -63,6 +65,12 @@ public String toQueryString() {
}
}

@Override
public String toAnonymizedString() {
String fieldName = QueryStringUtils.escapeFieldName(field.getField().toString());
return maskField(fieldName) + " " + operator.symbol + " ***";
}

@Override
public List<? extends UnresolvedExpression> getChild() {
return Arrays.asList(field, value);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,13 @@ public abstract class SearchExpression extends UnresolvedExpression {
*/
public abstract String toQueryString();

/**
* Convert the search expression to anonymized string
*
* @return the anonymized string
*/
public abstract String toAnonymizedString();

@Override
public <R, C> R accept(AbstractNodeVisitor<R, C> nodeVisitor, C context) {
return nodeVisitor.visitChildren(this, context);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,14 @@ public String toQueryString() {
return "(" + expression.toQueryString() + ")";
}

@Override
public String toAnonymizedString() {
if (expression instanceof SearchGroup) {
return expression.toAnonymizedString();
}
return "(" + expression.toAnonymizedString() + ")";
}

@Override
public List<? extends UnresolvedExpression> getChild() {
return Collections.singletonList(expression);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@ public String toQueryString() {
return fieldName + ":( " + valueList + " )";
}

@Override
public String toAnonymizedString() {
return "identifier IN ***";
}

@Override
public List<? extends UnresolvedExpression> getChild() {
List<UnresolvedExpression> children = new ArrayList<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,11 @@ public String toQueryString() {
return QueryStringUtils.escapeLuceneSpecialCharacters(text);
}

@Override
public String toAnonymizedString() {
return "***";
}

@Override
public List<? extends UnresolvedExpression> getChild() {
return Collections.singletonList(literal);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ public String toQueryString() {
return "NOT(" + expression.toQueryString() + ")";
}

@Override
public String toAnonymizedString() {
return "NOT(" + expression.toAnonymizedString() + ")";
}

@Override
public List<? extends UnresolvedExpression> getChild() {
return Collections.singletonList(expression);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@ public String toQueryString() {
return left.toQueryString() + " OR " + right.toQueryString();
}

@Override
public String toAnonymizedString() {
return left.toAnonymizedString() + " OR " + right.toAnonymizedString();
}

@Override
public List<? extends UnresolvedExpression> getChild() {
return Arrays.asList(left, right);
Expand Down
17 changes: 13 additions & 4 deletions core/src/main/java/org/opensearch/sql/ast/tree/Search.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,24 +7,33 @@

import com.google.common.collect.ImmutableList;
import java.util.List;
import javax.annotation.Nullable;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.RequiredArgsConstructor;
import lombok.ToString;
import org.opensearch.sql.ast.AbstractNodeVisitor;
import org.opensearch.sql.ast.expression.SearchExpression;

/**
* Logical plan node for Search operation. Represents search expressions that get converted to
* query_string function.
*/
@Getter
@ToString
@EqualsAndHashCode(callSuper = false)
@EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = false)
@RequiredArgsConstructor
public class Search extends UnresolvedPlan {

private final UnresolvedPlan child;
private final String queryString;
@EqualsAndHashCode.Include private final UnresolvedPlan child;
@EqualsAndHashCode.Include private final String queryString;

// Currently it's only for anonymizer
private final @Nullable SearchExpression originalExpression;

public Search(UnresolvedPlan child, String queryString) {
this(child, queryString, null);
}

@Override
public List<UnresolvedPlan> getChild() {
Expand All @@ -38,6 +47,6 @@ public <T, C> T accept(AbstractNodeVisitor<T, C> nodeVisitor, C context) {

@Override
public UnresolvedPlan attach(UnresolvedPlan child) {
return new Search(child, queryString);
return new Search(child, queryString, originalExpression);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,26 @@
/** Utility class for query_string syntax operations. */
public class QueryStringUtils {

private static final String INTERNAL_TIMESTAMP = "@timestamp";

public static final String MASK_LITERAL = "***";

public static final String MASK_COLUMN = "identifier";

public static final String MASK_TIMESTAMP_COLUMN = "time_identifier";

public static final String MASK_METADATA_COLUMN = "meta_identifier";

public static String maskField(String fieldName) {
if (fieldName.equals(INTERNAL_TIMESTAMP)) {
return MASK_TIMESTAMP_COLUMN;
}
if (fieldName.startsWith("_")) {
return MASK_METADATA_COLUMN;
}
return MASK_COLUMN;
}

// For field names, we typically don't escape dots as they're used for nested fields
// But we escape other special characters
public static final String LUCENE_SPECIAL_CHARS = "+-&|!(){}[]^\"~:/";
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"calcite":{
"logical":"LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12])\n LogicalFilter(condition=[match(MAP('field_name', $3), MAP('value', '\"2016\\-12\\-08 00\\:00\\:00.000000000\"':VARCHAR))])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n",
"physical":"CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], FILTER->match(MAP('field_name', $3), MAP('value', '\"2016\\-12\\-08 00\\:00\\:00.000000000\"':VARCHAR)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"match\":{\"birthdate\":{\"query\":\"\\\"2016\\\\-12\\\\-08 00\\\\:00\\\\:00.000000000\\\"\",\"operator\":\"OR\",\"prefix_length\":0,\"max_expansions\":50,\"fuzzy_transpositions\":true,\"lenient\":false,\"zero_terms_query\":\"NONE\",\"auto_generate_synonyms_phrase_query\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"birthdate\",\"gender\",\"city\",\"lastname\",\"balance\",\"employer\",\"state\",\"age\",\"email\",\"male\"],\"excludes\":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ public UnresolvedPlan visitSearchFrom(SearchFromContext ctx) {

// Create Search node with relation and query string
Relation relation = (Relation) visitFromClause(ctx.fromClause());
return new Search(relation, queryString);
return new Search(relation, queryString, combined);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@

import static org.opensearch.sql.calcite.utils.PlanUtils.getRelation;
import static org.opensearch.sql.calcite.utils.PlanUtils.transformPlanToAttachChild;
import static org.opensearch.sql.utils.QueryStringUtils.MASK_COLUMN;
import static org.opensearch.sql.utils.QueryStringUtils.MASK_LITERAL;
import static org.opensearch.sql.utils.QueryStringUtils.maskField;

import com.google.common.base.Strings;
import com.google.common.collect.ImmutableList;
Expand Down Expand Up @@ -108,12 +111,7 @@
/** Utility class to mask sensitive information in incoming PPL queries. */
public class PPLQueryDataAnonymizer extends AbstractNodeVisitor<String, String> {

private static final String MASK_LITERAL = "***";
private static final String MASK_IDENTIFIER = "identifier";

private static final String MASK_COLUMN = "identifier";

private static final String MASK_TABLE = "table";
public static final String MASK_TABLE = "table";

private final AnonymizerExpressionAnalyzer expressionAnalyzer;
private final Settings settings;
Expand Down Expand Up @@ -255,9 +253,7 @@ public String visitTableFunction(TableFunction node, String context) {
@Override
public String visitSearch(Search node, String context) {
String source = node.getChild().get(0).accept(this, context);
String queryString = node.getQueryString();
String anonymized = queryString.replaceAll(":\\S+", ":" + MASK_LITERAL);
return StringUtils.format("%s %s", source, anonymized);
return StringUtils.format("%s %s", source, node.getOriginalExpression().toAnonymizedString());
}

@Override
Expand Down Expand Up @@ -726,7 +722,7 @@ public String visitFillNull(FillNull node, String context) {
"%s | fillnull value=%s %s",
child,
MASK_LITERAL,
fieldFills.stream().map(n -> MASK_IDENTIFIER).collect(Collectors.joining(" ")));
fieldFills.stream().map(n -> MASK_COLUMN).collect(Collectors.joining(" ")));
}

// Distinguish between with...in and using based on whether all values are the same
Expand Down Expand Up @@ -919,7 +915,8 @@ public String visitIn(In node, String context) {

@Override
public String visitField(Field node, String context) {
return MASK_COLUMN;
String fieldName = node.getField().toString();
return maskField(fieldName);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ public class PPLQueryDataAnonymizerTest {

@Test
public void testSearchCommand() {
assertEquals("source=table a:***", anonymize("search source=t a=1"));
assertEquals("source=table identifier = ***", anonymize("search source=t a=1"));
}

@Test
Expand Down Expand Up @@ -255,8 +255,8 @@ public void testReverseCommand() {
@Test
public void testTimechartCommand() {
assertEquals(
"source=table | timechart limit=*** useother=*** count() by span(identifier, *** m)"
+ " identifier",
"source=table | timechart limit=*** useother=*** count() by span(time_identifier, ***"
+ " m) identifier",
anonymize("source=t | timechart count() by host"));
}

Expand Down Expand Up @@ -388,6 +388,13 @@ public void testAndExpression() {
anonymize("source=t | where a=1 and b=2"));
}

@Test
public void testAndExpressionWithMetaData() {
assertEquals(
"source=table | where meta_identifier = *** and identifier = ***",
anonymize("source=t | where _id=1 and b=2"));
}

@Test
public void testOrExpression() {
assertEquals(
Expand Down Expand Up @@ -879,8 +886,35 @@ private String anonymizeStatement(String query, boolean isExplain) {
@Test
public void testSearchWithAbsoluteTimeRange() {
assertEquals(
"source=table (@timestamp:*** AND (@timestamp:***",
anonymize("search source=table earliest='2012-12-10 15:00:00' latest=now"));
"source=table (time_identifier >= ***) AND (time_identifier <= ***)",
anonymize("search source=t earliest='2012-12-10 15:00:00' latest=now"));
}

@Test
public void testSearchWithIn() {
assertEquals("source=table identifier IN ***", anonymize("search source=t balance in (2000)"));
}

@Test
public void testSearchWithNot() {
assertEquals(
"source=table NOT(identifier = ***)", anonymize("search NOT balance=2000 source=t"));
}

@Test
public void testSearchWithGroup() {
assertEquals(
"source=table ((identifier = *** OR identifier = ***) AND identifier > ***)",
anonymize(
"search (severityText=\"ERROR\" OR severityText=\"WARN\") AND severityNumber>10"
+ " source=t"));
}

@Test
public void testSearchWithOr() {
assertEquals(
"source=table (time_identifier >= *** OR time_identifier <= ***)",
anonymize("search source=t earliest='2012-12-10 15:00:00' or latest=now"));
}

@Test
Expand Down
Loading