Skip to content

Commit dca8359

Browse files
Create new anonymizer for new engine (opensearch-project#1665) (opensearch-project#1677)
* Create new anonymizer for new engine (#266) * Created anonymizer listener for anonymizing SQL queries through the new engine Signed-off-by: Matthew Wells <[email protected]> * Update for review comments Signed-off-by: Andrew Carbonetto <[email protected]> * added missing file header, change public variable to private Signed-off-by: Matthew Wells <[email protected]> --------- Signed-off-by: Andrew Carbonetto <[email protected]> Signed-off-by: Matthew Wells <[email protected]> Co-authored-by: Andrew Carbonetto <[email protected]> (cherry picked from commit 62120fd) Co-authored-by: Matthew Wells <[email protected]>
1 parent 8d3f762 commit dca8359

File tree

4 files changed

+333
-4
lines changed

4 files changed

+333
-4
lines changed

legacy/src/main/java/org/opensearch/sql/legacy/plugin/RestSqlAction.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -141,8 +141,7 @@ protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient cli
141141
}
142142
}
143143

144-
LOG.info("[{}] Incoming request {}: {}", QueryContext.getRequestId(), request.uri(),
145-
QueryDataAnonymizer.anonymizeData(sqlRequest.getSql()));
144+
LOG.info("[{}] Incoming request {}", QueryContext.getRequestId(), request.uri());
146145

147146
Format format = SqlRequestParam.getFormat(request.params());
148147

@@ -157,6 +156,7 @@ protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient cli
157156
}
158157
LOG.debug("[{}] Request {} is not supported and falling back to old SQL engine",
159158
QueryContext.getRequestId(), newSqlRequest);
159+
LOG.info("Request Query: {}", QueryDataAnonymizer.anonymizeData(sqlRequest.getSql()));
160160
QueryAction queryAction = explainRequest(client, sqlRequest, format);
161161
executeSqlRequest(request, queryAction, client, restChannel);
162162
} catch (Exception e) {
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
7+
package org.opensearch.sql.sql.antlr;
8+
9+
import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLLexer.BACKTICK_QUOTE_ID;
10+
import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLLexer.BOOLEAN;
11+
import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLLexer.COMMA;
12+
import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLLexer.DECIMAL_LITERAL;
13+
import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLLexer.DOT;
14+
import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLLexer.EQUAL_SYMBOL;
15+
import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLLexer.EXCLAMATION_SYMBOL;
16+
import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLLexer.FALSE;
17+
import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLLexer.FROM;
18+
import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLLexer.GREATER_SYMBOL;
19+
import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLLexer.ID;
20+
import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLLexer.LESS_SYMBOL;
21+
import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLLexer.ONE_DECIMAL;
22+
import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLLexer.REAL_LITERAL;
23+
import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLLexer.STRING_LITERAL;
24+
import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLLexer.TIMESTAMP;
25+
import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLLexer.TRUE;
26+
import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLLexer.TWO_DECIMAL;
27+
import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLLexer.ZERO_DECIMAL;
28+
29+
import org.antlr.v4.runtime.ParserRuleContext;
30+
import org.antlr.v4.runtime.tree.ErrorNode;
31+
import org.antlr.v4.runtime.tree.ParseTreeListener;
32+
import org.antlr.v4.runtime.tree.TerminalNode;
33+
34+
/**
35+
* Parse tree listener for anonymizing SQL requests.
36+
*/
37+
public class AnonymizerListener implements ParseTreeListener {
38+
private String anonymizedQueryString = "";
39+
private static final int NO_TYPE = -1;
40+
private int previousType = NO_TYPE;
41+
42+
@Override
43+
public void enterEveryRule(ParserRuleContext ctx) {
44+
}
45+
46+
@Override
47+
public void exitEveryRule(ParserRuleContext ctx) {
48+
}
49+
50+
@Override
51+
public void visitTerminal(TerminalNode node) {
52+
// In these situations don't add a space prior:
53+
// 1. a DOT between two identifiers
54+
// 2. before a comma
55+
// 3. between equal comparison tokens: e.g <=
56+
// 4. between alt not equals: <>
57+
int token = node.getSymbol().getType();
58+
boolean isDotIdentifiers = token == DOT || previousType == DOT;
59+
boolean isComma = token == COMMA;
60+
boolean isEqualComparison = ((token == EQUAL_SYMBOL)
61+
&& (previousType == LESS_SYMBOL
62+
|| previousType == GREATER_SYMBOL
63+
|| previousType == EXCLAMATION_SYMBOL));
64+
boolean isNotEqualComparisonAlternative =
65+
previousType == LESS_SYMBOL && token == GREATER_SYMBOL;
66+
if (!isDotIdentifiers && !isComma && !isEqualComparison && !isNotEqualComparisonAlternative) {
67+
anonymizedQueryString += " ";
68+
}
69+
70+
// anonymize the following tokens
71+
switch (node.getSymbol().getType()) {
72+
case ID:
73+
case TIMESTAMP:
74+
case BACKTICK_QUOTE_ID:
75+
if (previousType == FROM) {
76+
anonymizedQueryString += "table";
77+
} else {
78+
anonymizedQueryString += "identifier";
79+
}
80+
break;
81+
case ZERO_DECIMAL:
82+
case ONE_DECIMAL:
83+
case TWO_DECIMAL:
84+
case DECIMAL_LITERAL:
85+
case REAL_LITERAL:
86+
anonymizedQueryString += "number";
87+
break;
88+
case STRING_LITERAL:
89+
anonymizedQueryString += "'string_literal'";
90+
break;
91+
case BOOLEAN:
92+
case TRUE:
93+
case FALSE:
94+
anonymizedQueryString += "boolean_literal";
95+
break;
96+
case NO_TYPE:
97+
// end of file
98+
break;
99+
default:
100+
anonymizedQueryString += node.getText().toUpperCase();
101+
}
102+
previousType = node.getSymbol().getType();
103+
}
104+
105+
@Override
106+
public void visitErrorNode(ErrorNode node) {
107+
108+
}
109+
110+
public String getAnonymizedQueryString() {
111+
return "(" + anonymizedQueryString + ")";
112+
}
113+
}

sql/src/main/java/org/opensearch/sql/sql/antlr/SQLSyntaxParser.java

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88

99
import org.antlr.v4.runtime.CommonTokenStream;
1010
import org.antlr.v4.runtime.tree.ParseTree;
11+
import org.apache.logging.log4j.LogManager;
12+
import org.apache.logging.log4j.Logger;
1113
import org.opensearch.sql.common.antlr.CaseInsensitiveCharStream;
1214
import org.opensearch.sql.common.antlr.Parser;
1315
import org.opensearch.sql.common.antlr.SyntaxAnalysisErrorListener;
@@ -18,6 +20,7 @@
1820
* SQL syntax parser which encapsulates an ANTLR parser.
1921
*/
2022
public class SQLSyntaxParser implements Parser {
23+
private static final Logger LOG = LogManager.getLogger(SQLSyntaxParser.class);
2124

2225
/**
2326
* Parse a SQL query by ANTLR parser.
@@ -26,10 +29,16 @@ public class SQLSyntaxParser implements Parser {
2629
*/
2730
@Override
2831
public ParseTree parse(String query) {
32+
AnonymizerListener anonymizer = new AnonymizerListener();
33+
2934
OpenSearchSQLLexer lexer = new OpenSearchSQLLexer(new CaseInsensitiveCharStream(query));
3035
OpenSearchSQLParser parser = new OpenSearchSQLParser(new CommonTokenStream(lexer));
3136
parser.addErrorListener(new SyntaxAnalysisErrorListener());
32-
return parser.root();
33-
}
37+
parser.addParseListener(anonymizer);
3438

39+
ParseTree parseTree = parser.root();
40+
LOG.info("New Engine Request Query: {}", anonymizer.getAnonymizedQueryString());
41+
42+
return parseTree;
43+
}
3544
}
Lines changed: 207 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,207 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
7+
package org.opensearch.sql.sql.parser;
8+
9+
import static org.junit.jupiter.api.Assertions.assertEquals;
10+
import static org.mockito.Mockito.mock;
11+
12+
import org.antlr.v4.runtime.CommonTokenStream;
13+
import org.antlr.v4.runtime.tree.ErrorNode;
14+
import org.junit.jupiter.api.Test;
15+
import org.opensearch.sql.common.antlr.CaseInsensitiveCharStream;
16+
import org.opensearch.sql.sql.antlr.AnonymizerListener;
17+
import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLLexer;
18+
import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser;
19+
20+
public class AnonymizerListenerTest {
21+
22+
private final AnonymizerListener anonymizerListener = new AnonymizerListener();
23+
24+
/**
25+
* Helper function to parse SQl queries for testing purposes.
26+
* @param query SQL query to be anonymized.
27+
*/
28+
private void parse(String query) {
29+
OpenSearchSQLLexer lexer = new OpenSearchSQLLexer(new CaseInsensitiveCharStream(query));
30+
OpenSearchSQLParser parser = new OpenSearchSQLParser(new CommonTokenStream(lexer));
31+
parser.addParseListener(anonymizerListener);
32+
33+
parser.root();
34+
}
35+
36+
@Test
37+
public void queriesShouldHaveAnonymousFieldAndIndex() {
38+
String query = "SELECT ABS(balance) FROM accounts WHERE age > 30 GROUP BY ABS(balance)";
39+
String expectedQuery = "( SELECT ABS ( identifier ) FROM table "
40+
+ "WHERE identifier > number GROUP BY ABS ( identifier ) )";
41+
parse(query);
42+
assertEquals(expectedQuery, anonymizerListener.getAnonymizedQueryString());
43+
}
44+
45+
@Test
46+
public void queriesShouldAnonymousNumbers() {
47+
String query = "SELECT ABS(20), LOG(20.20) FROM accounts";
48+
String expectedQuery = "( SELECT ABS ( number ), LOG ( number ) FROM table )";
49+
parse(query);
50+
assertEquals(expectedQuery, anonymizerListener.getAnonymizedQueryString());
51+
}
52+
53+
@Test
54+
public void queriesShouldHaveAnonymousBooleanLiterals() {
55+
String query = "SELECT TRUE FROM accounts";
56+
String expectedQuery = "( SELECT boolean_literal FROM table )";
57+
parse(query);
58+
assertEquals(expectedQuery, anonymizerListener.getAnonymizedQueryString());
59+
}
60+
61+
@Test
62+
public void queriesShouldHaveAnonymousInputStrings() {
63+
String query = "SELECT * FROM accounts WHERE name = 'Oliver'";
64+
String expectedQuery = "( SELECT * FROM table WHERE identifier = 'string_literal' )";
65+
parse(query);
66+
assertEquals(expectedQuery, anonymizerListener.getAnonymizedQueryString());
67+
}
68+
69+
@Test
70+
public void queriesWithAliasesShouldAnonymizeSensitiveData() {
71+
String query = "SELECT balance AS b FROM accounts AS a";
72+
String expectedQuery = "( SELECT identifier AS identifier FROM table AS identifier )";
73+
parse(query);
74+
assertEquals(expectedQuery, anonymizerListener.getAnonymizedQueryString());
75+
}
76+
77+
@Test
78+
public void queriesWithFunctionsShouldAnonymizeSensitiveData() {
79+
String query = "SELECT LTRIM(firstname) FROM accounts";
80+
String expectedQuery = "( SELECT LTRIM ( identifier ) FROM table )";
81+
parse(query);
82+
assertEquals(expectedQuery, anonymizerListener.getAnonymizedQueryString());
83+
}
84+
85+
@Test
86+
public void queriesWithAggregatesShouldAnonymizeSensitiveData() {
87+
String query = "SELECT MAX(price) - MIN(price) from tickets";
88+
String expectedQuery = "( SELECT MAX ( identifier ) - MIN ( identifier ) FROM table )";
89+
parse(query);
90+
assertEquals(expectedQuery, anonymizerListener.getAnonymizedQueryString());
91+
}
92+
93+
@Test
94+
public void queriesWithSubqueriesShouldAnonymizeSensitiveData() {
95+
String query = "SELECT a.f, a.l, a.a FROM "
96+
+ "(SELECT firstname AS f, lastname AS l, age AS a FROM accounts WHERE age > 30) a";
97+
String expectedQuery =
98+
"( SELECT identifier.identifier, identifier.identifier, identifier.identifier FROM "
99+
+ "( SELECT identifier AS identifier, identifier AS identifier, identifier AS identifier "
100+
+ "FROM table WHERE identifier > number ) identifier )";
101+
parse(query);
102+
assertEquals(expectedQuery, anonymizerListener.getAnonymizedQueryString());
103+
}
104+
105+
@Test
106+
public void queriesWithLimitShouldAnonymizeSensitiveData() {
107+
String query = "SELECT balance FROM accounts LIMIT 5";
108+
String expectedQuery = "( SELECT identifier FROM table LIMIT number )";
109+
parse(query);
110+
assertEquals(expectedQuery, anonymizerListener.getAnonymizedQueryString());
111+
}
112+
113+
@Test
114+
public void queriesWithOrderByShouldAnonymizeSensitiveData() {
115+
String query = "SELECT firstname FROM accounts ORDER BY lastname";
116+
String expectedQuery = "( SELECT identifier FROM table ORDER BY identifier )";
117+
parse(query);
118+
assertEquals(expectedQuery, anonymizerListener.getAnonymizedQueryString());
119+
}
120+
121+
@Test
122+
public void queriesWithHavingShouldAnonymizeSensitiveData() {
123+
String query = "SELECT SUM(balance) FROM accounts GROUP BY lastname HAVING COUNT(balance) > 2";
124+
String expectedQuery = "( SELECT SUM ( identifier ) FROM table "
125+
+ "GROUP BY identifier HAVING COUNT ( identifier ) > number )";
126+
parse(query);
127+
assertEquals(expectedQuery, anonymizerListener.getAnonymizedQueryString());
128+
}
129+
130+
@Test
131+
public void queriesWithHighlightShouldAnonymizeSensitiveData() {
132+
String query = "SELECT HIGHLIGHT(str0) FROM CALCS WHERE QUERY_STRING(['str0'], 'FURNITURE')";
133+
String expectedQuery = "( SELECT HIGHLIGHT ( identifier ) FROM table WHERE "
134+
+ "QUERY_STRING ( [ 'string_literal' ], 'string_literal' ) )";
135+
parse(query);
136+
assertEquals(expectedQuery, anonymizerListener.getAnonymizedQueryString());
137+
}
138+
139+
@Test
140+
public void queriesWithMatchShouldAnonymizeSensitiveData() {
141+
String query = "SELECT str0 FROM CALCS WHERE MATCH(str0, 'FURNITURE')";
142+
String expectedQuery = "( SELECT identifier FROM table "
143+
+ "WHERE MATCH ( identifier, 'string_literal' ) )";
144+
parse(query);
145+
assertEquals(expectedQuery, anonymizerListener.getAnonymizedQueryString());
146+
}
147+
148+
@Test
149+
public void queriesWithPositionShouldAnonymizeSensitiveData() {
150+
String query = "SELECT POSITION('world' IN 'helloworld')";
151+
String expectedQuery = "( SELECT POSITION ( 'string_literal' IN 'string_literal' ) )";
152+
parse(query);
153+
assertEquals(expectedQuery, anonymizerListener.getAnonymizedQueryString());
154+
}
155+
156+
@Test
157+
public void queriesWithMatch_Bool_Prefix_ShouldAnonymizeSensitiveData() {
158+
String query = "SELECT firstname, address FROM accounts WHERE "
159+
+ "match_bool_prefix(address, 'Bristol Street', minimum_should_match=2)";
160+
String expectedQuery = "( SELECT identifier, identifier FROM table WHERE MATCH_BOOL_PREFIX "
161+
+ "( identifier, 'string_literal', MINIMUM_SHOULD_MATCH = number ) )";
162+
parse(query);
163+
assertEquals(expectedQuery, anonymizerListener.getAnonymizedQueryString());
164+
}
165+
166+
@Test
167+
public void queriesWithGreaterOrEqualShouldAnonymizeSensitiveData() {
168+
String query = "SELECT int0 FROM accounts WHERE int0 >= 0";
169+
String expectedQuery = "( SELECT identifier FROM table WHERE identifier >= number )";
170+
parse(query);
171+
assertEquals(expectedQuery, anonymizerListener.getAnonymizedQueryString());
172+
}
173+
174+
@Test
175+
public void queriesWithLessOrEqualShouldAnonymizeSensitiveData() {
176+
String query = "SELECT int0 FROM accounts WHERE int0 <= 0";
177+
String expectedQuery = "( SELECT identifier FROM table WHERE identifier <= number )";
178+
parse(query);
179+
assertEquals(expectedQuery, anonymizerListener.getAnonymizedQueryString());
180+
}
181+
182+
@Test
183+
public void queriesWithNotEqualShouldAnonymizeSensitiveData() {
184+
String query = "SELECT int0 FROM accounts WHERE int0 != 0";
185+
String expectedQuery = "( SELECT identifier FROM table WHERE identifier != number )";
186+
parse(query);
187+
assertEquals(expectedQuery, anonymizerListener.getAnonymizedQueryString());
188+
}
189+
190+
@Test
191+
public void queriesWithNotEqualAlternateShouldAnonymizeSensitiveData() {
192+
String query = "SELECT int0 FROM calcs WHERE int0 <> 0";
193+
String expectedQuery = "( SELECT identifier FROM table WHERE identifier <> number )";
194+
parse(query);
195+
assertEquals(expectedQuery, anonymizerListener.getAnonymizedQueryString());
196+
}
197+
198+
199+
/**
200+
* Test added for coverage, but the errorNode will not be hit normally.
201+
*/
202+
@Test
203+
public void enterErrorNote() {
204+
ErrorNode node = mock(ErrorNode.class);
205+
anonymizerListener.visitErrorNode(node);
206+
}
207+
}

0 commit comments

Comments
 (0)