Skip to content

Commit 6f1890b

Browse files
authored
EQL grammar updates and tests (#49658)
* EQL: Additional tests and grammar updates * EQL: Add backtick escaped identifiers * EQL: Adding keywords to language * EQL: Add checks for unsupported syntax * EQL: Testing updates and PR feedback * EQL: Add string escapes * EQL: Cleanup grammar for identifier * EQL: Remove tabs from .eql tests
1 parent 0483f7c commit 6f1890b

File tree

16 files changed

+2280
-1426
lines changed

16 files changed

+2280
-1426
lines changed

x-pack/plugin/eql/src/main/antlr/EqlBase.g4

Lines changed: 72 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,6 @@
66

77
grammar EqlBase;
88

9-
tokens {
10-
DELIMITER
11-
}
129

1310
singleStatement
1411
: statement EOF
@@ -19,45 +16,54 @@ singleExpression
1916
;
2017

2118
statement
22-
: query (PIPE pipe)*
19+
: query pipe*
2320
;
24-
21+
2522
query
2623
: sequence
2724
| join
28-
| condition
25+
| eventQuery
26+
;
27+
28+
sequenceParams
29+
: WITH (MAXSPAN EQ timeUnit)
2930
;
30-
31+
3132
sequence
32-
: SEQUENCE (by=joinKeys)? (span)?
33-
match+
34-
(UNTIL match)?
33+
: SEQUENCE (by=joinKeys sequenceParams? | sequenceParams by=joinKeys?)?
34+
sequenceTerm sequenceTerm+
35+
(UNTIL sequenceTerm)?
3536
;
3637

3738
join
3839
: JOIN (by=joinKeys)?
39-
match+
40-
(UNTIL match)?
40+
joinTerm joinTerm+
41+
(UNTIL joinTerm)?
4142
;
4243

4344
pipe
44-
: kind=IDENTIFIER (booleanExpression (COMMA booleanExpression)*)?
45+
: PIPE kind=IDENTIFIER (booleanExpression (COMMA booleanExpression)*)?
4546
;
4647

48+
4749
joinKeys
48-
: BY qualifiedNames
49-
;
50-
51-
span
52-
: WITH MAXSPAN EQ DIGIT_IDENTIFIER
50+
: BY expression (COMMA expression)*
5351
;
5452

55-
match
56-
: LB condition RB (by=joinKeys)?
53+
joinTerm
54+
: subquery (by=joinKeys)?
55+
;
56+
57+
sequenceTerm
58+
: subquery (FORK (EQ booleanValue)?)? (by=joinKeys)?
59+
;
60+
61+
subquery
62+
: LB eventQuery RB
5763
;
5864

59-
condition
60-
: event=qualifiedName WHERE expression
65+
eventQuery
66+
: event=identifier WHERE expression
6167
;
6268

6369
expression
@@ -66,6 +72,7 @@ expression
6672

6773
booleanExpression
6874
: NOT booleanExpression #logicalNot
75+
| relationship=IDENTIFIER OF subquery #processCheck
6976
| predicated #booleanDefault
7077
| left=booleanExpression operator=AND right=booleanExpression #logicalBinary
7178
| left=booleanExpression operator=OR right=booleanExpression #logicalBinary
@@ -81,9 +88,7 @@ predicated
8188
// dedicated calls for each branch are not used to reuse the NOT handling across them
8289
// instead the property kind is used for differentiation
8390
predicate
84-
: NOT? kind=BETWEEN lower=valueExpression AND upper=valueExpression
85-
| NOT? kind=IN LP valueExpression (COMMA valueExpression)* RP
86-
| NOT? kind=IN LP query RP
91+
: NOT? kind=IN LP valueExpression (COMMA valueExpression)* RP
8792
;
8893

8994
valueExpression
@@ -102,14 +107,14 @@ primaryExpression
102107
;
103108

104109
functionExpression
105-
: identifier LP (expression (COMMA expression)*)? RP
110+
: name=IDENTIFIER LP (expression (COMMA expression)*)? RP
106111
;
107112

108113
constant
109114
: NULL #nullLiteral
110115
| number #numericLiteral
111116
| booleanValue #booleanLiteral
112-
| STRING+ #stringLiteral
117+
| string #stringLiteral
113118
;
114119

115120
comparisonOperator
@@ -120,26 +125,17 @@ booleanValue
120125
: TRUE | FALSE
121126
;
122127

123-
qualifiedNames
124-
: qualifiedName (COMMA qualifiedName)*
125-
;
126-
127128
qualifiedName
128-
: (identifier DOT)* identifier
129+
: identifier (DOT identifier | LB INTEGER_VALUE+ RB)*
129130
;
130131

131132
identifier
132-
: quoteIdentifier
133-
| unquoteIdentifier
133+
: IDENTIFIER
134+
| ESCAPED_IDENTIFIER
134135
;
135136

136-
quoteIdentifier
137-
: QUOTED_IDENTIFIER #quotedIdentifier
138-
;
139-
140-
unquoteIdentifier
141-
: IDENTIFIER #unquotedIdentifier
142-
| DIGIT_IDENTIFIER #digitIdentifier
137+
timeUnit
138+
: number unit=IDENTIFIER?
143139
;
144140

145141
number
@@ -151,31 +147,26 @@ string
151147
: STRING
152148
;
153149

154-
AND: 'AND';
155-
ANY: 'ANY';
156-
ASC: 'ASC';
157-
BETWEEN: 'BETWEEN';
158-
BY: 'BY';
159-
CHILD: 'CHILD';
160-
DESCENDANT: 'DESCENDANT';
161-
EVENT: 'EVENT';
162-
FALSE: 'FALSE';
163-
IN: 'IN';
164-
JOIN: 'JOIN';
165-
MAXSPAN: 'MAXSPAN';
166-
NOT: 'NOT';
167-
NULL: 'NULL';
168-
OF: 'OF';
169-
OR: 'OR';
170-
SEQUENCE: 'SEQUENCE';
171-
TRUE: 'TRUE';
172-
UNTIL: 'UNTIL';
173-
WHERE: 'WHERE';
174-
WITH: 'WITH';
150+
AND: 'and';
151+
BY: 'by';
152+
FALSE: 'false';
153+
FORK: 'fork';
154+
IN: 'in';
155+
JOIN: 'join';
156+
MAXSPAN: 'maxspan';
157+
NOT: 'not';
158+
NULL: 'null';
159+
OF: 'of';
160+
OR: 'or';
161+
SEQUENCE: 'sequence';
162+
TRUE: 'true';
163+
UNTIL: 'until';
164+
WHERE: 'where';
165+
WITH: 'with';
175166

176167
// Operators
177168
EQ : '=' | '==';
178-
NEQ : '<>' | '!=';
169+
NEQ : '!=';
179170
LT : '<';
180171
LTE : '<=';
181172
GT : '>';
@@ -194,9 +185,16 @@ LP: '(';
194185
RP: ')';
195186
PIPE: '|';
196187

188+
189+
ESCAPED_IDENTIFIER
190+
: '`' (~'`')* '`'
191+
;
192+
197193
STRING
198-
: '\'' ( ~'\'')* '\''
199-
| '"' ( ~'"' )* '"'
194+
: '\'' ('\\' [btnfr"'\\] | ~[\r\n'\\])* '\''
195+
| '"' ('\\' [btnfr"'\\] | ~[\r\n"\\])* '"'
196+
| '?"' ('\\"' |~["\r\n])* '"'
197+
| '?\'' ('\\\'' |~['\r\n])* '\''
200198
;
201199
202200
INTEGER_VALUE
@@ -210,31 +208,24 @@ DECIMAL_VALUE
210208
| DOT DIGIT+ EXPONENT
211209
;
212210
211+
// make @timestamp not require escaping, since @ has no other meaning
213212
IDENTIFIER
214-
: (LETTER | '_') (LETTER | DIGIT | '_' | '@' )*
215-
;
216-
217-
DIGIT_IDENTIFIER
218-
: DIGIT (LETTER | DIGIT | '_' | '@')+
213+
: (LETTER | '_' | '@') (LETTER | DIGIT | '_')*
219214
;
220215
221-
QUOTED_IDENTIFIER
222-
: '"' ( ~'"' | '""' )* '"'
223-
;
224-
225216
fragment EXPONENT
226-
: 'E' [+-]? DIGIT+
217+
: [Ee] [+-]? DIGIT+
227218
;
228219
229220
fragment DIGIT
230221
: [0-9]
231222
;
232223
233224
fragment LETTER
234-
: [A-Z]
225+
: [A-Za-z]
235226
;
236227
237-
SIMPLE_COMMENT
228+
LINE_COMMENT
238229
: '//' ~[\r\n]* '\r'? '\n'? -> channel(HIDDEN)
239230
;
240231
@@ -246,9 +237,12 @@ WS
246237
: [ \r\n\t]+ -> channel(HIDDEN)
247238
;
248239
240+
249241
// Catch-all for anything we can't recognize.
250242
// We use this to be able to ignore and recover all the text
251243
// when splitting statements with DelimiterLexer
244+
/*
252245
UNRECOGNIZED
253246
: .
254-
;
247+
;
248+
*/

x-pack/plugin/eql/src/main/antlr/EqlBase.tokens

Lines changed: 0 additions & 87 deletions
This file was deleted.

0 commit comments

Comments
 (0)