diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java index d5c55d10258..eeca282cb9d 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java @@ -105,6 +105,10 @@ public class PPLQueryDataAnonymizer extends AbstractNodeVisitor private static final String MASK_LITERAL = "***"; + private static final String MASK_COLUMN = "identifier"; + + private static final String MASK_TABLE = "table"; + private final AnonymizerExpressionAnalyzer expressionAnalyzer; private final Settings settings; @@ -143,12 +147,9 @@ public String visitExplain(Explain node, String context) { @Override public String visitRelation(Relation node, String context) { if (node instanceof DescribeRelation) { - // remove the system table suffix - String systemTable = node.getTableQualifiedName().toString(); - return StringUtils.format( - "describe %s", systemTable.substring(0, systemTable.lastIndexOf('.'))); + return StringUtils.format("describe %s", MASK_TABLE); } - return StringUtils.format("source=%s", node.getTableQualifiedName().toString()); + return StringUtils.format("source=%s", MASK_TABLE); } @Override @@ -183,23 +184,22 @@ public String visitJoin(Join node, String context) { .toList()); return StringUtils.format( "%s | join type=%s overwrite=%s max=%s %s %s", - left, joinType, overwrite, max, fieldList, right); + left, joinType, MASK_LITERAL, MASK_LITERAL, fieldList, right); } else { String joinType = node.getJoinType().name().toLowerCase(Locale.ROOT); - String leftAlias = node.getLeftAlias().map(l -> " left = " + l).orElse(""); - String rightAlias = node.getRightAlias().map(r -> " right = " + r).orElse(""); + String leftAlias = node.getLeftAlias().map(l -> " left = " + MASK_COLUMN).orElse(""); + String rightAlias = node.getRightAlias().map(r -> " right = " + MASK_COLUMN).orElse(""); String condition = node.getJoinCondition().map(c -> expressionAnalyzer.analyze(c, context)).orElse("true"); return StringUtils.format( "%s | %s join max=%s%s%s on %s %s", - left, joinType, max, leftAlias, rightAlias, condition, right); + left, joinType, MASK_LITERAL, leftAlias, rightAlias, condition, right); } } @Override public String visitLookup(Lookup node, String context) { String child = node.getChild().get(0).accept(this, context); - String lookupTable = ((Relation) node.getLookupRelation()).getTableQualifiedName().toString(); String mappingFields = formatFieldAlias(node.getMappingAliasMap()); String strategy = node.getOutputAliasMap().isEmpty() @@ -207,7 +207,7 @@ public String visitLookup(Lookup node, String context) { : String.format(" %s ", node.getOutputStrategy().toString().toLowerCase()); String outputFields = formatFieldAlias(node.getOutputAliasMap()); return StringUtils.format( - "%s | lookup %s %s%s%s", child, lookupTable, mappingFields, strategy, outputFields); + "%s | lookup %s %s%s%s", child, MASK_TABLE, mappingFields, strategy, outputFields); } private String formatFieldAlias(java.util.Map fieldMap) { @@ -230,7 +230,7 @@ public String visitSubqueryAlias(SubqueryAlias node, String context) { } // add "[]" only if its child is not a root String format = childNode.getChild().isEmpty() ? "%s as %s" : "[ %s ] as %s"; - return StringUtils.format(format, child, node.getAlias()); + return StringUtils.format(format, child, MASK_COLUMN); } @Override @@ -270,8 +270,8 @@ public String visitRename(Rename node, String context) { ((Field) renameMap.getTarget()).getField().toString()); } String renames = - renameMapBuilder.build().entrySet().stream() - .map(entry -> StringUtils.format("%s as %s", entry.getKey(), entry.getValue())) + node.getRenameList().stream() + .map(entry -> StringUtils.format("%s as %s", MASK_COLUMN, MASK_COLUMN)) .collect(Collectors.joining(",")); return StringUtils.format("%s | rename %s", child, renames); } @@ -336,7 +336,7 @@ public String visitBin(Bin node, String context) { } if (node.getAlias() != null) { - binCommand.append(" as ").append(node.getAlias()); + binCommand.append(" as ").append(MASK_COLUMN); } return StringUtils.format("%s%s", child, binCommand.toString()); @@ -406,7 +406,7 @@ public String visitEval(Eval node, String context) { } String expressions = expressionsBuilder.build().stream() - .map(pair -> StringUtils.format("%s" + "=%s", pair.getLeft(), pair.getRight())) + .map(pair -> StringUtils.format("%s" + "=%s", MASK_COLUMN, pair.getRight())) .collect(Collectors.joining(" ")); return StringUtils.format("%s | eval %s", child, expressions); } @@ -496,7 +496,7 @@ public String visitTimechart(Timechart node, String context) { public String visitRex(Rex node, String context) { String child = node.getChild().get(0).accept(this, context); String field = visitExpression(node.getField()); - String pattern = "\"" + node.getPattern().toString() + "\""; + String pattern = "\"" + MASK_LITERAL + "\""; StringBuilder command = new StringBuilder(); command.append( @@ -505,11 +505,11 @@ public String visitRex(Rex node, String context) { child, field, node.getMode().toString().toLowerCase(), pattern)); if (node.getMaxMatch().isPresent()) { - command.append(" max_match=").append(node.getMaxMatch().get()); + command.append(" max_match=").append(MASK_LITERAL); } if (node.getOffsetField().isPresent()) { - command.append(" offset_field=").append(node.getOffsetField().get()); + command.append(" offset_field=").append(MASK_COLUMN); } return command.toString(); @@ -535,7 +535,7 @@ public String visitParse(Parse node, String context) { } return ParseMethod.PATTERNS.equals(node.getParseMethod()) && regex.isEmpty() ? StringUtils.format("%s | %s %s", child, commandName, source) - : StringUtils.format("%s | %s %s '%s'", child, commandName, source, regex); + : StringUtils.format("%s | %s %s '%s'", child, commandName, source, MASK_LITERAL); } @Override @@ -646,7 +646,7 @@ public String visitPatterns(Patterns node, String context) { builder.append(" mode=").append(node.getPatternMode().toString()); builder.append(" max_sample_count=").append(visitExpression(node.getPatternMaxSampleCount())); builder.append(" buffer_limit=").append(visitExpression(node.getPatternBufferLimit())); - builder.append(" new_field=").append(node.getAlias()); + builder.append(" new_field=").append(MASK_COLUMN); if (!node.getArguments().isEmpty()) { for (java.util.Map.Entry entry : node.getArguments().entrySet()) { builder.append( @@ -780,7 +780,7 @@ public String visitIn(In node, String context) { @Override public String visitField(Field node, String context) { - return node.getField().toString(); + return MASK_COLUMN; } @Override @@ -802,7 +802,7 @@ public String visitAlias(Alias node, String context) { @Override public String visitTrendlineComputation(Trendline.TrendlineComputation node, String context) { final String dataField = node.getDataField().accept(this, context); - final String aliasClause = " as " + node.getAlias(); + final String aliasClause = " as " + MASK_COLUMN; final String computationType = node.getComputationType().name().toLowerCase(Locale.ROOT); return StringUtils.format( "%s(%d, %s)%s", computationType, node.getNumberOfDataPoints(), dataField, aliasClause); @@ -831,7 +831,7 @@ public String visitExistsSubquery(ExistsSubquery node, String context) { @Override public String visitCase(Case node, String context) { StringBuilder builder = new StringBuilder(); - builder.append("cast("); + builder.append("case("); for (When when : node.getWhenClauses()) { builder.append(analyze(when.getCondition(), context)); builder.append(","); @@ -858,7 +858,7 @@ public String visitCast(Cast node, String context) { @Override public String visitQualifiedName( org.opensearch.sql.ast.expression.QualifiedName node, String context) { - return String.join(".", node.getParts()); + return MASK_COLUMN; } } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index 5dfc73f5483..44392cd9f57 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -33,7 +33,7 @@ public class PPLQueryDataAnonymizerTest { @Test public void testSearchCommand() { - assertEquals("source=t a:***", anonymize("search source=t a=1")); + assertEquals("source=table a:***", anonymize("search source=t a=1")); } @Test @@ -45,241 +45,261 @@ public void testTableFunctionCommand() { @Test public void testPrometheusPPLCommand() { - assertEquals( - "source=prometheus.http_requests_process", - anonymize("source=prometheus.http_requests_process")); + assertEquals("source=table", anonymize("source=prometheus.http_requests_process")); } @Test public void testWhereCommand() { - assertEquals("source=t | where a = ***", anonymize("search source=t | where a=1")); + assertEquals("source=table | where identifier = ***", anonymize("search source=t | where a=1")); } // Fields and Table Command Tests @Test public void testFieldsCommandWithoutArguments() { - assertEquals("source=t | fields + f,g", anonymize("source=t | fields f,g")); + assertEquals( + "source=table | fields + identifier,identifier", anonymize("source=t | fields f,g")); } @Test public void testFieldsCommandWithIncludeArguments() { - assertEquals("source=t | fields + f,g", anonymize("source=t | fields + f,g")); + assertEquals( + "source=table | fields + identifier,identifier", anonymize("source=t | fields + f,g")); } @Test public void testFieldsCommandWithExcludeArguments() { - assertEquals("source=t | fields - f,g", anonymize("source=t | fields - f,g")); + assertEquals( + "source=table | fields - identifier,identifier", anonymize("source=t | fields - f,g")); } @Test public void testFieldsCommandWithWildcards() { when(settings.getSettingValue(Key.CALCITE_ENGINE_ENABLED)).thenReturn(true); - assertEquals("source=t | fields + account*", anonymize("source=t | fields account*")); - assertEquals("source=t | fields + *name", anonymize("source=t | fields *name")); - assertEquals("source=t | fields + *a*", anonymize("source=t | fields *a*")); - assertEquals("source=t | fields - account*", anonymize("source=t | fields - account*")); + assertEquals("source=table | fields + identifier", anonymize("source=t | fields account*")); + assertEquals("source=table | fields + identifier", anonymize("source=t | fields *name")); + assertEquals("source=table | fields + identifier", anonymize("source=t | fields *a*")); + assertEquals("source=table | fields - identifier", anonymize("source=t | fields - account*")); } @Test public void testFieldsCommandWithDelimiters() { when(settings.getSettingValue(Key.CALCITE_ENGINE_ENABLED)).thenReturn(true); assertEquals( - "source=t | fields + firstname,lastname,age", + "source=table | fields + identifier,identifier,identifier", anonymize("source=t | fields firstname lastname age")); assertEquals( - "source=t | fields + firstname,lastname,balance", + "source=table | fields + identifier,identifier,identifier", anonymize("source=t | fields firstname lastname, balance")); assertEquals( - "source=t | fields + account*,*name", anonymize("source=t | fields account*, *name")); + "source=table | fields + identifier,identifier", + anonymize("source=t | fields account*, *name")); } @Test public void testTableCommand() { when(settings.getSettingValue(Key.CALCITE_ENGINE_ENABLED)).thenReturn(true); - assertEquals("source=t | fields + f,g", anonymize("source=t | table f,g")); - assertEquals("source=t | fields + f,g", anonymize("source=t | table + f,g")); - assertEquals("source=t | fields - f,g", anonymize("source=t | table - f,g")); - assertEquals("source=t | fields + account*", anonymize("source=t | table account*")); assertEquals( - "source=t | fields + firstname,lastname,age", + "source=table | fields + identifier,identifier", anonymize("source=t | table f,g")); + assertEquals( + "source=table | fields + identifier,identifier", anonymize("source=t | table + f,g")); + assertEquals( + "source=table | fields - identifier,identifier", anonymize("source=t | table - f,g")); + assertEquals("source=table | fields + identifier", anonymize("source=t | table account*")); + assertEquals( + "source=table | fields + identifier,identifier,identifier", anonymize("source=t | table firstname lastname age")); } @Test public void anonymizeFieldsNoArg() { assertEquals( - "source=t | fields + f", + "source=table | fields + identifier", anonymize(projectWithArg(relation("t"), Collections.emptyList(), field("f")))); } @Test public void testRenameCommandWithMultiFields() { assertEquals( - "source=t | rename f as g,h as i,j as k", + "source=table | rename identifier as identifier,identifier as identifier,identifier as" + + " identifier", anonymize("source=t | rename f as g,h as i,j as k")); } @Test public void testRenameCommandWithWildcards() { - assertEquals("source=t | rename f* as g*", anonymize("source=t | rename f* as g*")); + assertEquals( + "source=table | rename identifier as identifier", anonymize("source=t | rename f* as g*")); } @Test public void testStatsCommandWithByClause() { - assertEquals("source=t | stats count(a) by b", anonymize("source=t | stats count(a) by b")); + assertEquals( + "source=table | stats count(identifier) by identifier", + anonymize("source=t | stats count(a) by b")); } @Test public void testStatsCommandWithNestedFunctions() { - assertEquals("source=t | stats sum(+(a,b))", anonymize("source=t | stats sum(a+b)")); + assertEquals( + "source=table | stats sum(+(identifier,identifier))", + anonymize("source=t | stats sum(a+b)")); } @Test public void testStatsCommandWithSpanFunction() { assertEquals( - "source=t | stats count(a) by span(b, *** d),c", + "source=table | stats count(identifier) by span(identifier, *** d),identifier", anonymize("source=t | stats count(a) by span(b, 1d), c")); } @Test public void testEventstatsCommandWithByClause() { assertEquals( - "source=t | eventstats count(a) by b", anonymize("source=t | eventstats count(a) by b")); + "source=table | eventstats count(identifier) by identifier", + anonymize("source=t | eventstats count(a) by b")); } @Test public void testEventstatsCommandWithNestedFunctions() { - assertEquals("source=t | eventstats sum(+(a,b))", anonymize("source=t | eventstats sum(a+b)")); + assertEquals( + "source=table | eventstats sum(+(identifier,identifier))", + anonymize("source=t | eventstats sum(a+b)")); } @Test public void testEventstatsCommandWithSpanFunction() { assertEquals( - "source=t | eventstats count(a) by span(b, *** d),c", + "source=table | eventstats count(identifier) by span(identifier, *** d),identifier", anonymize("source=t | eventstats count(a) by span(b, 1d), c")); } @Test public void testBinCommandBasic() { - assertEquals("source=t | bin f span=***", anonymize("source=t | bin f span=10")); + assertEquals("source=table | bin identifier span=***", anonymize("source=t | bin f span=10")); } @Test public void testBinCommandWithAllParameters() { assertEquals( - "source=t | bin f span=*** aligntime=*** as alias", + "source=table | bin identifier span=*** aligntime=*** as identifier", anonymize("source=t | bin f span=10 aligntime=earliest as alias")); } @Test public void testBinCommandWithCountParameters() { assertEquals( - "source=t | bin f bins=*** start=*** end=*** as alias", + "source=table | bin identifier bins=*** start=*** end=*** as identifier", anonymize("source=t | bin f bins=10 start=0 end=100 as alias")); } @Test public void testBinCommandWithMinspanParameters() { assertEquals( - "source=t | bin f minspan=*** start=*** end=*** as alias", + "source=table | bin identifier minspan=*** start=*** end=*** as identifier", anonymize("source=t | bin f minspan=5 start=0 end=100 as alias")); } @Test public void testDedupCommand() { assertEquals( - "source=t | dedup f1,f2 1 keepempty=false consecutive=false", + "source=table | dedup identifier,identifier 1 keepempty=false consecutive=false", anonymize("source=t | dedup f1, f2")); } @Test public void testTrendlineCommand() { assertEquals( - "source=t | trendline sma(2, date) as date_alias sma(3, time) as time_alias", + "source=table | trendline sma(2, identifier) as identifier sma(3, identifier) as" + + " identifier", anonymize("source=t | trendline sma(2, date) as date_alias sma(3, time) as time_alias")); } @Test public void testHeadCommandWithNumber() { - assertEquals("source=t | head 3", anonymize("source=t | head 3")); + assertEquals("source=table | head 3", anonymize("source=t | head 3")); } @Test public void testReverseCommand() { - assertEquals("source=t | reverse", anonymize("source=t | reverse")); + assertEquals("source=table | reverse", anonymize("source=t | reverse")); } @Test public void testTimechartCommand() { assertEquals( - "source=t | timechart span=span(@timestamp, *** m) limit=10 useother=true count() by host", + "source=table | timechart span=span(identifier, *** m) limit=10 useother=true count() by" + + " identifier", anonymize("source=t | timechart count() by host")); } // todo, sort order is ignored, it doesn't impact the log analysis. @Test public void testSortCommandWithOptions() { - assertEquals("source=t | sort f1,f2", anonymize("source=t | sort - f1, + f2")); + assertEquals( + "source=table | sort identifier,identifier", anonymize("source=t | sort - f1, + f2")); } @Test public void testSortCommandWithCount() { - assertEquals("source=t | sort 5 f1", anonymize("source=t | sort 5 f1")); + assertEquals("source=table | sort 5 identifier", anonymize("source=t | sort 5 f1")); } @Test public void testSortCommandWithDesc() { - assertEquals("source=t | sort f1", anonymize("source=t | sort f1 desc")); + assertEquals("source=table | sort identifier", anonymize("source=t | sort f1 desc")); } @Test public void testEvalCommand() { - assertEquals("source=t | eval r=abs(f)", anonymize("source=t | eval r=abs(f)")); + assertEquals( + "source=table | eval identifier=abs(identifier)", anonymize("source=t | eval r=abs(f)")); } @Test public void testEvalCommandWithStrftime() { assertEquals( - "source=t | eval formatted=strftime(timestamp,***)", + "source=table | eval identifier=strftime(identifier,***)", anonymize("source=t | eval formatted=strftime(timestamp, '%Y-%m-%d %H:%M:%S')")); } @Test public void testFillNullSameValue() { assertEquals( - "source=t | fillnull with *** in f1, f2", + "source=table | fillnull with *** in identifier, identifier", anonymize("source=t | fillnull with 0 in f1, f2")); } @Test public void testFillNullVariousValues() { assertEquals( - "source=t | fillnull using f1 = ***, f2 = ***", + "source=table | fillnull using identifier = ***, identifier = ***", anonymize("source=t | fillnull using f1 = 0, f2 = -1")); } @Test public void testFillNullWithoutFields() { - assertEquals("source=t | fillnull with ***", anonymize("source=t | fillnull with 0")); + assertEquals("source=table | fillnull with ***", anonymize("source=t | fillnull with 0")); } @Test public void testRareCommandWithGroupBy() { when(settings.getSettingValue(Key.CALCITE_ENGINE_ENABLED)).thenReturn(false); - assertEquals("source=t | rare 10 a by b", anonymize("source=t | rare a by b")); + assertEquals( + "source=table | rare 10 identifier by identifier", anonymize("source=t | rare a by b")); } @Test public void testTopCommandWithNAndGroupBy() { when(settings.getSettingValue(Key.CALCITE_ENGINE_ENABLED)).thenReturn(false); - assertEquals("source=t | top 1 a by b", anonymize("source=t | top 1 a by b")); + assertEquals( + "source=table | top 1 identifier by identifier", anonymize("source=t | top 1 a by b")); } @Test public void testRareCommandWithGroupByWithCalcite() { when(settings.getSettingValue(Key.CALCITE_ENGINE_ENABLED)).thenReturn(true); assertEquals( - "source=t | rare 10 countield='count' showcount=true a by b", + "source=table | rare 10 countield='count' showcount=true identifier by identifier", anonymize("source=t | rare a by b")); } @@ -287,194 +307,220 @@ public void testRareCommandWithGroupByWithCalcite() { public void testTopCommandWithNAndGroupByWithCalcite() { when(settings.getSettingValue(Key.CALCITE_ENGINE_ENABLED)).thenReturn(true); assertEquals( - "source=t | top 1 countield='count' showcount=true a by b", + "source=table | top 1 countield='count' showcount=true identifier by identifier", anonymize("source=t | top 1 a by b")); } @Test public void testAndExpression() { - assertEquals("source=t | where a = *** and b = ***", anonymize("source=t | where a=1 and b=2")); + assertEquals( + "source=table | where identifier = *** and identifier = ***", + anonymize("source=t | where a=1 and b=2")); } @Test public void testOrExpression() { - assertEquals("source=t | where a = *** or b = ***", anonymize("source=t | where a=1 or b=2")); + assertEquals( + "source=table | where identifier = *** or identifier = ***", + anonymize("source=t | where a=1 or b=2")); } @Test public void testXorExpression() { - assertEquals("source=t | where a = *** xor b = ***", anonymize("source=t | where a=1 xor b=2")); + assertEquals( + "source=table | where identifier = *** xor identifier = ***", + anonymize("source=t | where a=1 xor b=2")); } @Test public void testNotExpression() { - assertEquals("source=t | where not a = ***", anonymize("source=t | where not a=1 ")); + assertEquals( + "source=table | where not identifier = ***", anonymize("source=t | where not a=1 ")); } @Test public void testInExpression() { - assertEquals("source=t | where a in (***)", anonymize("source=t | where a in (1, 2, 3) ")); + assertEquals( + "source=table | where identifier in (***)", anonymize("source=t | where a in (1, 2, 3) ")); } @Test public void testQualifiedName() { - assertEquals("source=t | fields + field0", anonymize("source=t | fields field0")); + assertEquals("source=table | fields + identifier", anonymize("source=t | fields field0")); } @Test public void testDateFunction() { assertEquals( - "source=t | eval date=DATE_ADD(DATE(***),INTERVAL *** HOUR)", + "source=table | eval identifier=DATE_ADD(DATE(***),INTERVAL *** HOUR)", anonymize("source=t | eval date=DATE_ADD(DATE('2020-08-26'),INTERVAL 1 HOUR)")); } @Test public void testDescribe() { - assertEquals("describe t", anonymize("describe t")); + assertEquals("describe table", anonymize("describe t")); } @Test public void testExplain() { assertEquals( - "explain standard source=t | fields + a", anonymizeStatement("source=t | fields a", true)); + "explain standard source=table | fields + identifier", + anonymizeStatement("source=t | fields a", true)); } @Test public void testExplainCommand() { assertEquals( - "explain standard source=t | fields + a", + "explain standard source=table | fields + identifier", anonymizeStatement("explain source=t | fields a", false)); assertEquals( - "explain extended source=t | fields + a", + "explain extended source=table | fields + identifier", anonymizeStatement("explain extended source=t | fields a", false)); } @Test public void testQuery() { - assertEquals("source=t | fields + a", anonymizeStatement("source=t | fields a", false)); + assertEquals( + "source=table | fields + identifier", anonymizeStatement("source=t | fields a", false)); } @Test public void testBetween() { assertEquals( - "source=t | where id between *** and *** | fields + id", + "source=table | where identifier between *** and *** | fields + identifier", anonymize("source=t | where id between 1 and 2 | fields id")); assertEquals( - "source=t | where not id between *** and *** | fields + id", + "source=table | where not identifier between *** and *** | fields + identifier", anonymize("source=t | where id not between 1 and 2 | fields id")); } @Test public void testAppendcol() { assertEquals( - "source=t | stats count() by b | appendcol override=false [ stats sum(c) by b ]", + "source=table | stats count() by identifier | appendcol override=false [ stats" + + " sum(identifier) by identifier ]", anonymize("source=t | stats count() by b | appendcol [ stats sum(c) by b ]")); assertEquals( - "source=t | stats count() by b | appendcol override=true [ stats sum(c) by b ]", + "source=table | stats count() by identifier | appendcol override=true [ stats" + + " sum(identifier) by identifier ]", anonymize("source=t | stats count() by b | appendcol override=true [ stats sum(c) by b ]")); assertEquals( - "source=t | appendcol override=false [ where a = *** ]", + "source=table | appendcol override=false [ where identifier = *** ]", anonymize("source=t | appendcol override=false [ where a = 1 ]")); } @Test public void testAppend() { assertEquals( - "source=t | stats count() by b | append [ | stats sum(c) by b ]", + "source=table | stats count() by identifier | append [ | stats sum(identifier) by" + + " identifier ]", anonymize("source=t | stats count() by b | append [ | stats sum(c) by b ]")); assertEquals( - "source=t | stats count() by b | append [ | stats sum(c) by b ]", + "source=table | stats count() by identifier | append [ | stats sum(identifier) by" + + " identifier ]", anonymize("source=t | stats count() by b | append [ | stats sum(c) by b ]")); assertEquals( - "source=t | append [ | where a = *** ]", anonymize("source=t | append [ | where a = 1 ]")); + "source=table | append [ | where identifier = *** ]", + anonymize("source=t | append [ | where a = 1 ]")); assertEquals( - "source=t | stats count() by b | append [source=a | stats sum(c) by b ]", + "source=table | stats count() by identifier | append [source=table | stats sum(identifier)" + + " by identifier ]", anonymize("source=t | stats count() by b | append [source=a | stats sum(c) by b ]")); assertEquals( - "source=t | append [source=b | where a = *** ]", + "source=table | append [source=table | where identifier = *** ]", anonymize("source=t | append [source=b | where a = 1 ]")); assertEquals( - "source=t | stats count() by b | append [source=a ]", + "source=table | stats count() by identifier | append [source=table ]", anonymize("source=t | stats count() by b | append [ source=a ]")); assertEquals( - "source=t | stats count() by b | append [ ]", + "source=table | stats count() by identifier | append [ ]", anonymize("source=t | stats count() by b | append [ ]")); } @Test + // Same as SQL, select * from a as b -> SELECT * FROM table AS identifier public void testSubqueryAlias() { - assertEquals("source=t as t1", anonymize("source=t as t1")); + assertEquals("source=table as identifier", anonymize("source=t as t1")); } @Test public void testJoin() { assertEquals( - "source=t | cross join max=0 on *** = *** s | fields + id", + "source=table | cross join max=*** on *** = *** table | fields + identifier", anonymize("source=t | cross join on 1=1 s | fields id")); assertEquals( - "source=t | inner join max=0 on id = uid s | fields + id", + "source=table | inner join max=*** on identifier = identifier table | fields + identifier", anonymize("source=t | inner join on id = uid s | fields id")); assertEquals( - "source=t as l | inner join max=0 left = l right = r on id = uid s as r | fields + id", + "source=table as identifier | inner join max=*** left = identifier right = identifier on" + + " identifier = identifier table as identifier | fields + identifier", anonymize("source=t | join left = l right = r on id = uid s | fields id")); assertEquals( - "source=t | left join max=0 right = r on id = uid s as r | fields + id", + "source=table | left join max=*** right = identifier on identifier = identifier table as" + + " identifier | fields + identifier", anonymize("source=t | left join right = r on id = uid s | fields id")); assertEquals( - "source=t as t1 | inner join max=0 left = t1 right = t2 on id = uid s as t2 | fields +" - + " t1.id", + "source=table as identifier | inner join max=*** left = identifier right = identifier on" + + " identifier = identifier table as identifier | fields + identifier", anonymize("source=t as t1 | inner join on id = uid s as t2 | fields t1.id")); assertEquals( - "source=t as t1 | right join max=0 left = t1 right = t2 on t1.id = t2.id s as t2 | fields +" - + " t1.id", + "source=table as identifier | right join max=*** left = identifier right = identifier on" + + " identifier = identifier table as identifier | fields + identifier", anonymize("source=t as t1 | right join max=0 on t1.id = t2.id s as t2 | fields t1.id")); assertEquals( - "source=t as t1 | right join max=0 left = t1 right = t2 on t1.id = t2.id [ source=s |" - + " fields + id ] as t2 | fields + t1.id", + "source=table as identifier | right join max=*** left = identifier right = identifier on" + + " identifier = identifier [ source=table | fields + identifier ] as identifier |" + + " fields + identifier", anonymize( "source=t as t1 | right join max=0 on t1.id = t2.id [ source=s | fields id] as t2 |" + " fields t1.id")); assertEquals( - "source=t | inner join max=2 on id = uid s | fields + id", + "source=table | inner join max=*** on identifier = identifier table | fields + identifier", anonymize("source=t | inner join max=2 on id = uid s | fields id")); } @Test public void testJoinWithFieldList() { assertEquals( - "source=t | join type=inner overwrite=true max=0 s | fields + id", + "source=table | join type=inner overwrite=*** max=*** table | fields + identifier", anonymize("source=t | join s | fields id")); assertEquals( - "source=t | join type=inner overwrite=true max=0 id s | fields + id", + "source=table | join type=inner overwrite=*** max=*** identifier table | fields +" + + " identifier", anonymize("source=t | join id s | fields id")); assertEquals( - "source=t | join type=left overwrite=false max=0 id1,id2 s | fields + id1", + "source=table | join type=left overwrite=*** max=*** identifier,identifier table | fields +" + + " identifier", anonymize("source=t | join type=left overwrite=false id1,id2 s | fields id1")); assertEquals( - "source=t | join type=left overwrite=false max=0 id1,id2 s | fields + id1", + "source=table | join type=left overwrite=*** max=*** identifier,identifier table | fields +" + + " identifier", anonymize("source=t | join type=outer overwrite=false id1 id2 s | fields id1")); assertEquals( - "source=t | join type=left overwrite=true max=2 id1,id2 s | fields + id1", + "source=table | join type=left overwrite=*** max=*** identifier,identifier table | fields +" + + " identifier", anonymize("source=t | join type=outer max=2 id1 id2 s | fields id1")); } @Test public void testLookup() { assertEquals( - "source=EMP | lookup DEPT DEPTNO replace LOC", + "source=table | lookup table DEPTNO replace LOC", anonymize("source=EMP | lookup DEPT DEPTNO replace LOC")); assertEquals( - "source=EMP | lookup DEPT DEPTNO replace LOC as JOB", + "source=table | lookup table DEPTNO replace LOC as JOB", anonymize("source=EMP | lookup DEPT DEPTNO replace LOC as JOB")); assertEquals( - "source=EMP | lookup DEPT DEPTNO append LOC", + "source=table | lookup table DEPTNO append LOC", anonymize("source=EMP | lookup DEPT DEPTNO append LOC")); assertEquals( - "source=EMP | lookup DEPT DEPTNO append LOC as JOB", + "source=table | lookup table DEPTNO append LOC as JOB", anonymize("source=EMP | lookup DEPT DEPTNO append LOC as JOB")); - assertEquals("source=EMP | lookup DEPT DEPTNO", anonymize("source=EMP | lookup DEPT DEPTNO")); assertEquals( - "source=EMP | lookup DEPT DEPTNO as EMPNO, ID append ID, LOC as JOB, COUNTRY as COUNTRY2", + "source=table | lookup table DEPTNO", anonymize("source=EMP | lookup DEPT DEPTNO")); + assertEquals( + "source=table | lookup table DEPTNO as EMPNO, ID append ID, LOC as JOB, COUNTRY as" + + " COUNTRY2", anonymize( "source=EMP | lookup DEPT DEPTNO as EMPNO, ID append ID, LOC as JOB, COUNTRY as" + " COUNTRY2")); @@ -483,27 +529,32 @@ public void testLookup() { @Test public void testInSubquery() { assertEquals( - "source=t | where (id) in [ source=s | fields + uid ] | fields + id", + "source=table | where (identifier) in [ source=table | fields + identifier ] | fields +" + + " identifier", anonymize("source=t | where id in [source=s | fields uid] | fields id")); } @Test public void testExistsSubquery() { assertEquals( - "source=t | where exists [ source=s | where id = uid ] | fields + id", + "source=table | where exists [ source=table | where identifier = identifier ] | fields +" + + " identifier", anonymize("source=t | where exists [source=s | where id = uid ] | fields id")); } @Test public void testScalarSubquery() { assertEquals( - "source=t | where id = [ source=s | stats max(b) ] | fields + id", + "source=table | where identifier = [ source=table | stats max(identifier) ] | fields +" + + " identifier", anonymize("source=t | where id = [ source=s | stats max(b) ] | fields id")); assertEquals( - "source=t | eval id=[ source=s | stats max(b) ] | fields + id", + "source=table | eval identifier=[ source=table | stats max(identifier) ] | fields +" + + " identifier", anonymize("source=t | eval id = [ source=s | stats max(b) ] | fields id")); assertEquals( - "source=t | where id > [ source=s | where id = uid | stats max(b) ] | fields + id", + "source=table | where identifier > [ source=table | where identifier = identifier | stats" + + " max(identifier) ] | fields + identifier", anonymize( "source=t | where id > [ source=s | where id = uid | stats max(b) ] | fields id")); } @@ -511,14 +562,14 @@ public void testScalarSubquery() { @Test public void testCaseWhen() { assertEquals( - "source=t | eval level=cast(score >= ***,***,score >= *** and score < ***,*** else ***) |" - + " fields + level", + "source=table | eval identifier=case(identifier >= ***,***,identifier >= *** and identifier" + + " < ***,*** else ***) | fields + identifier", anonymize( "source=t | eval level=CASE(score >= 90, 'A', score >= 80 AND score < 90, 'B' else 'C')" + " | fields level")); assertEquals( - "source=t | eval level=cast(score >= ***,***,score >= *** and score < ***,***) | fields +" - + " level", + "source=table | eval identifier=case(identifier >= ***,***,identifier >= *** and identifier" + + " < ***,***) | fields + identifier", anonymize( "source=t | eval level=CASE(score >= 90, 'A', score >= 80 AND score < 90, 'B')" + " | fields level")); @@ -527,30 +578,30 @@ public void testCaseWhen() { @Test public void testCast() { assertEquals( - "source=t | eval id=cast(a as INTEGER) | fields + id", + "source=table | eval identifier=cast(identifier as INTEGER) | fields + identifier", anonymize("source=t | eval id=CAST(a AS INTEGER) | fields id")); assertEquals( - "source=t | eval id=cast(*** as DOUBLE) | fields + id", + "source=table | eval identifier=cast(*** as DOUBLE) | fields + identifier", anonymize("source=t | eval id=CAST('1' AS DOUBLE) | fields id")); } @Test public void testParse() { assertEquals( - "source=t | parse email '.+@(?.+)'", + "source=table | parse identifier '***'", anonymize("source=t | parse email '.+@(?.+)'")); assertEquals( - "source=t | parse email '.+@(?.+)' | fields + email,host", + "source=table | parse identifier '***' | fields + identifier,identifier", anonymize("source=t | parse email '.+@(?.+)' | fields email, host")); } @Test public void testGrok() { assertEquals( - "source=t | grok email '.+@%{HOSTNAME:host}'", + "source=table | grok identifier '***'", anonymize("source=t | grok email '.+@%{HOSTNAME:host}'")); assertEquals( - "source=t | grok email '.+@%{HOSTNAME:host}' | fields + email,host", + "source=table | grok identifier '***' | fields + identifier,identifier", anonymize("source=t | grok email '.+@%{HOSTNAME:host}' | fields email, host")); } @@ -561,17 +612,17 @@ public void testPatterns() { when(settings.getSettingValue(Key.PATTERN_MAX_SAMPLE_COUNT)).thenReturn(10); when(settings.getSettingValue(Key.PATTERN_BUFFER_LIMIT)).thenReturn(100000); assertEquals( - "source=t | patterns email method=SIMPLE_PATTERN mode=LABEL" - + " max_sample_count=*** buffer_limit=*** new_field=patterns_field", + "source=table | patterns identifier method=SIMPLE_PATTERN mode=LABEL" + + " max_sample_count=*** buffer_limit=*** new_field=identifier", anonymize("source=t | patterns email")); assertEquals( - "source=t | patterns email method=SIMPLE_PATTERN mode=LABEL" - + " max_sample_count=*** buffer_limit=*** new_field=patterns_field |" - + " fields + email,patterns_field", - anonymize("source=t | patterns email | fields email, patterns_field")); + "source=table | patterns identifier method=SIMPLE_PATTERN mode=LABEL" + + " max_sample_count=*** buffer_limit=*** new_field=identifier |" + + " fields + identifier,identifier", + anonymize("source=t | patterns email | fields email, identifier")); assertEquals( - "source=t | patterns email method=BRAIN mode=AGGREGATION" - + " max_sample_count=*** buffer_limit=*** new_field=patterns_field" + "source=table | patterns identifier method=BRAIN mode=AGGREGATION" + + " max_sample_count=*** buffer_limit=*** new_field=identifier" + " variable_count_threshold=***", anonymize( "source=t | patterns email method=BRAIN mode=AGGREGATION" @@ -581,11 +632,11 @@ public void testPatterns() { @Test public void testRegex() { assertEquals( - "source=t | regex fieldname=***", anonymize("source=t | regex fieldname='pattern'")); + "source=table | regex identifier=***", anonymize("source=t | regex fieldname='pattern'")); assertEquals( - "source=t | regex fieldname!=***", anonymize("source=t | regex fieldname!='pattern'")); + "source=table | regex identifier!=***", anonymize("source=t | regex fieldname!='pattern'")); assertEquals( - "source=t | regex email=*** | fields + email", + "source=table | regex identifier=*** | fields + identifier", anonymize("source=t | regex email='.*@domain.com' | fields email")); } @@ -594,15 +645,14 @@ public void testRexCommand() { when(settings.getSettingValue(Key.PPL_REX_MAX_MATCH_LIMIT)).thenReturn(10); assertEquals( - "source=t | rex field=message mode=extract \"(?[A-Z]+)\" max_match=1", + "source=table | rex field=identifier mode=extract \"***\" max_match=***", anonymize("source=t | rex field=message \"(?[A-Z]+)\"")); assertEquals( - "source=t | rex field=lastname mode=extract \"(?^[A-Z])\" max_match=1 | fields +" - + " lastname,initial", - anonymize( - "source=t | rex field=lastname \"(?^[A-Z])\" | fields lastname, initial")); + "source=table | rex field=identifier mode=extract \"***\" max_match=*** | fields +" + + " identifier,identifier", + anonymize("source=table | rex field=identifier \"***\" | fields identifier, identifier")); assertEquals( - "source=t | rex field=name mode=extract \"(?[A-Z])\" max_match=3", + "source=table | rex field=identifier mode=extract \"***\" max_match=***", anonymize("source=t | rex field=name \"(?[A-Z])\" max_match=3")); } @@ -611,10 +661,10 @@ public void testRexSedMode() { when(settings.getSettingValue(Key.PPL_REX_MAX_MATCH_LIMIT)).thenReturn(10); assertEquals( - "source=t | rex field=lastname mode=sed \"s/^[A-Z]/X/\" max_match=1", + "source=table | rex field=identifier mode=sed \"***\" max_match=***", anonymize("source=t | rex field=lastname mode=sed \"s/^[A-Z]/X/\"")); assertEquals( - "source=t | rex field=data mode=sed \"s/sensitive/clean/g\" max_match=1 | fields + data", + "source=table | rex field=identifier mode=sed \"***\" max_match=*** | fields + identifier", anonymize("source=t | rex field=data mode=sed \"s/sensitive/clean/g\" | fields data")); } @@ -622,7 +672,7 @@ public void testRexSedMode() { public void testMvjoin() { // Test mvjoin with array of strings assertEquals( - "source=t | eval result=mvjoin(array(***,***,***),***) | fields + result", + "source=table | eval identifier=mvjoin(array(***,***,***),***) | fields + identifier", anonymize("source=t | eval result=mvjoin(array('a', 'b', 'c'), ',') | fields result")); } @@ -631,8 +681,8 @@ public void testRexWithOffsetField() { when(settings.getSettingValue(Key.PPL_REX_MAX_MATCH_LIMIT)).thenReturn(10); assertEquals( - "source=t | rex field=message mode=extract \"(?[a-z]+)\" max_match=1" - + " offset_field=pos", + "source=table | rex field=identifier mode=extract \"***\" max_match=***" + + " offset_field=identifier", anonymize("source=t | rex field=message \"(?[a-z]+)\" offset_field=pos")); }