Skip to content

Commit ca745db

Browse files
committed
SQL: Fix RLIKE bug and improve testing for RLIKE statement (#40354)
* Refactor RegexMatch to support both LIKE and RLIKE * Add integration tests for RLIKE * Polish the rest of tests (cherry picked from commit 7562d6e)
1 parent 2685fb6 commit ca745db

File tree

11 files changed

+226
-58
lines changed

11 files changed

+226
-58
lines changed

x-pack/plugin/sql/qa/src/main/java/org/elasticsearch/xpack/sql/qa/jdbc/CsvSpecTestCase.java

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -31,19 +31,25 @@ public abstract class CsvSpecTestCase extends SpecBaseIntegrationTestCase {
3131
public static List<Object[]> readScriptSpec() throws Exception {
3232
Parser parser = specParser();
3333
List<Object[]> tests = new ArrayList<>();
34-
tests.addAll(readScriptSpec("/select.csv-spec", parser));
35-
tests.addAll(readScriptSpec("/command.csv-spec", parser));
36-
tests.addAll(readScriptSpec("/fulltext.csv-spec", parser));
3734
tests.addAll(readScriptSpec("/agg.csv-spec", parser));
35+
tests.addAll(readScriptSpec("/alias.csv-spec", parser));
36+
tests.addAll(readScriptSpec("/arithmetic.csv-spec", parser));
3837
tests.addAll(readScriptSpec("/columns.csv-spec", parser));
38+
tests.addAll(readScriptSpec("/command.csv-spec", parser));
39+
//tests.addAll(readScriptSpec("/command-sys.csv-spec", parser));
3940
tests.addAll(readScriptSpec("/date.csv-spec", parser));
4041
tests.addAll(readScriptSpec("/datetime.csv-spec", parser));
41-
tests.addAll(readScriptSpec("/alias.csv-spec", parser));
42-
tests.addAll(readScriptSpec("/null.csv-spec", parser));
43-
tests.addAll(readScriptSpec("/nested.csv-spec", parser));
42+
tests.addAll(readScriptSpec("/datetime-interval.csv-spec", parser));
43+
tests.addAll(readScriptSpec("/field-alias.csv-spec", parser));
44+
tests.addAll(readScriptSpec("/filter.csv-spec", parser));
45+
tests.addAll(readScriptSpec("/fulltext.csv-spec", parser));
4446
tests.addAll(readScriptSpec("/functions.csv-spec", parser));
47+
//tests.addAll(readScriptSpec("/ip.csv-spec", parser));
4548
tests.addAll(readScriptSpec("/math.csv-spec", parser));
46-
tests.addAll(readScriptSpec("/field-alias.csv-spec", parser));
49+
tests.addAll(readScriptSpec("/null.csv-spec", parser));
50+
tests.addAll(readScriptSpec("/nested.csv-spec", parser));
51+
tests.addAll(readScriptSpec("/select.csv-spec", parser));
52+
4753
return tests;
4854
}
4955

x-pack/plugin/sql/qa/src/main/resources/agg.sql-spec

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -178,10 +178,7 @@ aggCountOnColumnAndMultipleHaving
178178
SELECT gender g, COUNT(gender) c FROM "test_emp" GROUP BY g HAVING c > 10 AND c < 70 ORDER BY gender ;
179179
aggCountOnColumnAndMultipleHavingEquals
180180
SELECT gender g, COUNT(gender) c FROM "test_emp" GROUP BY g HAVING c > 10 AND c = 63 ORDER BY gender ;
181-
//
182-
// Count(column) = Column(*) which is a bug
183-
// https://github.com/elastic/elasticsearch/issues/34549
184-
//
181+
185182
aggCountOnColumnAndMultipleHavingWithLimit
186183
SELECT gender g, COUNT(gender) c FROM "test_emp" GROUP BY g HAVING c > 10 AND c < 70 ORDER BY gender LIMIT 1;
187184
aggCountOnColumnAndHavingBetween-Ignore

x-pack/plugin/sql/qa/src/main/resources/arithmetic.csv-spec

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,14 @@
55
// the standard behavior here is to return the constant for each element
66
// the weird thing is that an actual query needs to be ran
77
arithmeticWithFrom
8-
SELECT 5 - 2 x FROM test_emp;
8+
SELECT 5 - 2 x FROM test_emp LIMIT 5;
99

10-
x
10+
x:i
11+
---------------
12+
3
13+
3
14+
3
15+
3
1116
3
1217
;
1318

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
//
2+
// Filter
3+
//
4+
5+
whereFieldWithRLikeMatch
6+
SELECT last_name l FROM "test_emp" WHERE emp_no < 10003 AND last_name RLIKE 'S.*';
7+
8+
l:s
9+
---------------
10+
Simmel
11+
;
12+
13+
whereFieldWithNotRLikeMatch
14+
SELECT last_name, first_name FROM "test_emp" WHERE emp_no < 10020 AND first_name NOT RLIKE 'Ma.*' ORDER BY first_name LIMIT 5;
15+
16+
last_name:s | first_name:s
17+
---------------+---------------
18+
Preusig |Anneke
19+
Genin |Berni
20+
Simmel |Bezalel
21+
Koblick |Chirstian
22+
Bouloucos |Cristinel
23+
;
24+
25+
whereFieldWithRLikeMatchNot
26+
SELECT last_name AS L, emp_no FROM "test_emp" WHERE NOT (emp_no < 10003 AND L NOT RLIKE 'K.*') ORDER BY emp_no LIMIT 5;
27+
28+
L:s | emp_no:i
29+
---------------+---------------
30+
Bamford |10003
31+
Koblick |10004
32+
Maliniak |10005
33+
Preusig |10006
34+
Zielinski |10007
35+
;
36+
37+
whereFieldOnMatchWithAndAndOr
38+
SELECT last_name l, gender g FROM "test_emp" WHERE (last_name RLIKE 'K.*' OR gender = 'F') AND emp_no < 10050 ORDER BY last_name;
39+
40+
l:s | g:s
41+
---------------+---------------
42+
Casley |F
43+
Kalloufi |M
44+
Koblick |M
45+
Lenart |F
46+
Meriste |F
47+
Montemayor |F
48+
Peac |F
49+
Pettey |F
50+
Preusig |F
51+
Reistad |F
52+
Reistad |F
53+
Simmel |F
54+
Stamatiou |F
55+
Tramer |F
56+
Zielinski |F
57+
;
58+
59+
whereFieldWithRLikeAndGroupByOrderBy
60+
SELECT last_name l, gender g, COUNT(*) c, MAX(salary) AS sal FROM "test_emp" WHERE emp_no < 10050 AND (last_name RLIKE 'B.*' OR gender = 'F') GROUP BY g, l ORDER BY sal;
61+
62+
l:s | g:s | c:l | sal:i
63+
---------------+---------------+---------------+---------------
64+
Berztiss |M |1 |28336
65+
Stamatiou |F |1 |30404
66+
Brender |M |1 |36051
67+
Meriste |F |1 |37112
68+
Tramer |F |1 |37853
69+
Casley |F |1 |39728
70+
Montemayor |F |1 |47896
71+
Bridgland |null |1 |48942
72+
Simmel |F |1 |56371
73+
Lenart |F |1 |56415
74+
Bouloucos |null |1 |58715
75+
Preusig |F |1 |60335
76+
Bamford |M |1 |61805
77+
Pettey |F |1 |64675
78+
Peac |F |1 |66174
79+
Reistad |F |2 |73851
80+
Zielinski |F |1 |74572
81+
;
82+
83+
whereFieldWithRLikeAndNotRLike
84+
SELECT COUNT(*), last_name AS f FROM test_emp WHERE last_name RLIKE '.*o.*' AND last_name NOT RLIKE '.*f.*' GROUP BY f HAVING COUNT(*) > 1;
85+
86+
COUNT(*):l | f:s
87+
---------------+---------------
88+
2 |Lortz
89+
;
90+
91+
whereInlineRLike
92+
SELECT emp_no FROM test_emp WHERE 'aaabbb' RLIKE 'aa+b+' AND 'aaabbb' NOT RLIKE 'a++c+' AND emp_no < 10080 ORDER BY emp_no DESC LIMIT 5;
93+
94+
emp_no:i
95+
---------------
96+
10079
97+
10078
98+
10077
99+
10076
100+
10075
101+
;
102+
103+
whereInlineRLikeAndCount_1
104+
SELECT COUNT(*), TRUNCATE(emp_no, -2) t FROM test_emp WHERE 'aaabbb' RLIKE '.....?.?' AND 'aaabbb' NOT RLIKE 'aa?bb?' GROUP BY TRUNCATE(emp_no, -2) ORDER BY t ASC;
105+
106+
COUNT(*):l | t:i
107+
---------------+---------------
108+
99 |10000
109+
1 |10100
110+
;
111+
112+
whereInlineRLikeAndCount_2
113+
SELECT COUNT(*), TRUNCATE(emp_no, -2) t FROM test_emp WHERE 'aaabbb' RLIKE 'a{2,}b{2,}' AND 'aaabbb' NOT RLIKE 'a{4,6}b{4,6}' GROUP BY TRUNCATE(emp_no, -2) ORDER BY t ASC;
114+
115+
COUNT(*):l | t:i
116+
---------------+---------------
117+
99 |10000
118+
1 |10100
119+
;

x-pack/plugin/sql/qa/src/main/resources/filter.sql-spec

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ whereFieldWithLikeMatch
5151
SELECT last_name l FROM "test_emp" WHERE emp_no < 10003 AND last_name LIKE 'K%';
5252
whereFieldWithNotLikeMatch
5353
SELECT last_name l FROM "test_emp" WHERE emp_no < 10020 AND first_name NOT LIKE 'Ma%';
54+
whereFieldWithInlineLikeMatch
55+
SELECT emp_no FROM "test_emp" WHERE 'aaabbb' LIKE 'aa%b%' AND 'aaabbb' NOT LIKE 'a%%c%' AND emp_no < 10080 ORDER BY emp_no DESC LIMIT 5;
5456

5557
whereFieldWithOrderNot
5658
SELECT last_name l FROM "test_emp" WHERE NOT emp_no < 10003 ORDER BY emp_no LIMIT 5;

x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/analysis/analyzer/Analyzer.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
import org.elasticsearch.xpack.sql.expression.function.aggregate.Count;
3232
import org.elasticsearch.xpack.sql.expression.function.scalar.Cast;
3333
import org.elasticsearch.xpack.sql.expression.predicate.operator.arithmetic.ArithmeticOperation;
34-
import org.elasticsearch.xpack.sql.expression.predicate.regex.Like;
34+
import org.elasticsearch.xpack.sql.expression.predicate.regex.RegexMatch;
3535
import org.elasticsearch.xpack.sql.plan.TableIdentifier;
3636
import org.elasticsearch.xpack.sql.plan.logical.Aggregate;
3737
import org.elasticsearch.xpack.sql.plan.logical.EsRelation;
@@ -852,8 +852,8 @@ private Expression collectResolvedAndReplace(Expression e, Map<String, List<Func
852852
// TODO: we should move to always compare the functions directly
853853
// Special check for COUNT: an already seen COUNT function will be returned only if its DISTINCT property
854854
// matches the one from the unresolved function to be checked.
855-
// Same for LIKE: the equals function also compares the pattern of LIKE
856-
if (seenFunction instanceof Count || seenFunction instanceof Like) {
855+
// Same for LIKE/RLIKE: the equals function also compares the pattern of LIKE/RLIKE
856+
if (seenFunction instanceof Count || seenFunction instanceof RegexMatch) {
857857
if (seenFunction.equals(f)){
858858
return seenFunction;
859859
}

x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/predicate/regex/Like.java

Lines changed: 11 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,41 +6,35 @@
66
package org.elasticsearch.xpack.sql.expression.predicate.regex;
77

88
import org.elasticsearch.xpack.sql.expression.Expression;
9+
import org.elasticsearch.xpack.sql.expression.gen.processor.Processor;
10+
import org.elasticsearch.xpack.sql.expression.predicate.regex.RegexProcessor.RegexOperation;
911
import org.elasticsearch.xpack.sql.tree.NodeInfo;
1012
import org.elasticsearch.xpack.sql.tree.Source;
1113

12-
import java.util.Objects;
13-
14-
public class Like extends RegexMatch {
15-
16-
private final LikePattern pattern;
14+
public class Like extends RegexMatch<LikePattern> {
1715

1816
public Like(Source source, Expression left, LikePattern pattern) {
19-
super(source, left, pattern.asJavaRegex());
20-
this.pattern = pattern;
21-
}
22-
23-
public LikePattern pattern() {
24-
return pattern;
17+
super(source, left, pattern);
2518
}
2619

2720
@Override
2821
protected NodeInfo<Like> info() {
29-
return NodeInfo.create(this, Like::new, field(), pattern);
22+
return NodeInfo.create(this, Like::new, field(), pattern());
3023
}
3124

3225
@Override
3326
protected Like replaceChild(Expression newLeft) {
34-
return new Like(source(), newLeft, pattern);
27+
return new Like(source(), newLeft, pattern());
3528
}
3629

3730
@Override
38-
public boolean equals(Object obj) {
39-
return super.equals(obj) && Objects.equals(((Like) obj).pattern(), pattern());
31+
public Boolean fold() {
32+
Object val = field().fold();
33+
return RegexOperation.match(val, pattern().asJavaRegex());
4034
}
4135

4236
@Override
43-
public int hashCode() {
44-
return Objects.hash(super.hashCode(), pattern());
37+
protected Processor makeProcessor() {
38+
return new RegexProcessor(pattern().asJavaRegex());
4539
}
4640
}

x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/predicate/regex/RLike.java

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,29 +6,35 @@
66
package org.elasticsearch.xpack.sql.expression.predicate.regex;
77

88
import org.elasticsearch.xpack.sql.expression.Expression;
9+
import org.elasticsearch.xpack.sql.expression.gen.processor.Processor;
10+
import org.elasticsearch.xpack.sql.expression.predicate.regex.RegexProcessor.RegexOperation;
911
import org.elasticsearch.xpack.sql.tree.Source;
1012
import org.elasticsearch.xpack.sql.tree.NodeInfo;
1113

12-
public class RLike extends RegexMatch {
14+
public class RLike extends RegexMatch<String> {
1315

14-
private final String pattern;
15-
16-
public RLike(Source source, Expression left, String pattern) {
17-
super(source, left, pattern);
18-
this.pattern = pattern;
19-
}
20-
21-
public String pattern() {
22-
return pattern;
16+
public RLike(Source source, Expression value, String pattern) {
17+
super(source, value, pattern);
2318
}
2419

2520
@Override
2621
protected NodeInfo<RLike> info() {
27-
return NodeInfo.create(this, RLike::new, field(), pattern);
22+
return NodeInfo.create(this, RLike::new, field(), pattern());
2823
}
2924

3025
@Override
3126
protected RLike replaceChild(Expression newChild) {
32-
return new RLike(source(), newChild, pattern);
27+
return new RLike(source(), newChild, pattern());
28+
}
29+
30+
@Override
31+
public Boolean fold() {
32+
Object val = field().fold();
33+
return RegexOperation.match(val, pattern());
34+
}
35+
36+
@Override
37+
protected Processor makeProcessor() {
38+
return new RegexProcessor(pattern());
3339
}
3440
}

x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/predicate/regex/RegexMatch.java

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -10,21 +10,25 @@
1010
import org.elasticsearch.xpack.sql.expression.Expressions;
1111
import org.elasticsearch.xpack.sql.expression.Nullability;
1212
import org.elasticsearch.xpack.sql.expression.function.scalar.UnaryScalarFunction;
13-
import org.elasticsearch.xpack.sql.expression.gen.processor.Processor;
14-
import org.elasticsearch.xpack.sql.expression.predicate.regex.RegexProcessor.RegexOperation;
1513
import org.elasticsearch.xpack.sql.tree.Source;
1614
import org.elasticsearch.xpack.sql.type.DataType;
1715

18-
import static org.elasticsearch.xpack.sql.expression.TypeResolutions.isStringAndExact;
16+
import java.util.Objects;
1917

20-
public abstract class RegexMatch extends UnaryScalarFunction {
18+
import static org.elasticsearch.xpack.sql.expression.TypeResolutions.isStringAndExact;
2119

22-
private final String pattern;
20+
public abstract class RegexMatch<T> extends UnaryScalarFunction {
2321

24-
protected RegexMatch(Source source, Expression value, String pattern) {
22+
private final T pattern;
23+
24+
protected RegexMatch(Source source, Expression value, T pattern) {
2525
super(source, value);
2626
this.pattern = pattern;
2727
}
28+
29+
public T pattern() {
30+
return pattern;
31+
}
2832

2933
@Override
3034
public DataType dataType() {
@@ -33,7 +37,7 @@ public DataType dataType() {
3337

3438
@Override
3539
public Nullability nullable() {
36-
if (pattern == null) {
40+
if (pattern() == null) {
3741
return Nullability.TRUE;
3842
}
3943
return field().nullable();
@@ -49,15 +53,14 @@ public boolean foldable() {
4953
// right() is not directly foldable in any context but Like can fold it.
5054
return field().foldable();
5155
}
52-
56+
5357
@Override
54-
public Boolean fold() {
55-
Object val = field().fold();
56-
return RegexOperation.match(val, pattern);
58+
public boolean equals(Object obj) {
59+
return super.equals(obj) && Objects.equals(((RegexMatch<?>) obj).pattern(), pattern());
5760
}
5861

5962
@Override
60-
protected Processor makeProcessor() {
61-
return new RegexProcessor(pattern);
63+
public int hashCode() {
64+
return Objects.hash(super.hashCode(), pattern());
6265
}
6366
}

x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/planner/QueryTranslator.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -469,6 +469,7 @@ private static String topAggsField(AggregateFunction af, Expression e) {
469469

470470
// TODO: need to optimize on ngram
471471
// TODO: see whether escaping is needed
472+
@SuppressWarnings("rawtypes")
472473
static class Likes extends ExpressionTranslator<RegexMatch> {
473474

474475
@Override

0 commit comments

Comments
 (0)