From 480fe6d21a1127668db157269edd0bbdadd8f5a4 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Thu, 10 Sep 2020 15:05:51 +0800 Subject: [PATCH 1/4] [SPARK-32840][SQL] Invalid interval value can happen to be just adhesive with the unit --- .../spark/sql/catalyst/parser/AstBuilder.scala | 9 ++++++++- .../test/resources/sql-tests/inputs/interval.sql | 2 ++ .../sql-tests/results/ansi/interval.sql.out | 16 +++++++++++++++- .../resources/sql-tests/results/interval.sql.out | 16 +++++++++++++++- 4 files changed, 40 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index d90c9901f96ac..629a4cca7d135 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -2120,6 +2120,8 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging } } + private final val alphabet = "[a-zA-Z]".r.pattern + /** * Creates a [[CalendarInterval]] with multiple unit value pairs, e.g. 1 YEAR 2 DAYS. */ @@ -2132,7 +2134,12 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging val kvs = units.indices.map { i => val u = units(i).getText val v = if (values(i).STRING() != null) { - string(values(i).STRING()) + val value = string(values(i).STRING()) + if (alphabet.matcher(value).find()) { + throw new ParseException("Can only use numbers in the interval value part for" + + s" multiple unit value pairs interval form, but got invalid value: $value", ctx) + } + value } else { values(i).getText } diff --git a/sql/core/src/test/resources/sql-tests/inputs/interval.sql b/sql/core/src/test/resources/sql-tests/inputs/interval.sql index e925c4508f630..f431b705f60a4 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/interval.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/interval.sql @@ -206,3 +206,5 @@ select interval '1.2'; select interval '- 2'; select interval '1 day -'; select interval '1 day 1'; + +select interval '1 day 2' day; diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out index 33d918bbeb94d..e26f00f794528 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 107 +-- Number of queries: 108 -- !query @@ -1122,3 +1122,17 @@ Cannot parse the INTERVAL value: 1 day 1(line 1, pos 7) == SQL == select interval '1 day 1' -------^^^ + + +-- !query +select interval '1 day 2' day +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException + +Can only use numbers in the interval value part for multiple unit value pairs interval form, but got invalid value: 1 day 2(line 1, pos 16) + +== SQL == +select interval '1 day 2' day +----------------^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/interval.sql.out index 898be09a40318..92077872f4943 100644 --- a/sql/core/src/test/resources/sql-tests/results/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/interval.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 107 +-- Number of queries: 108 -- !query @@ -1110,3 +1110,17 @@ Cannot parse the INTERVAL value: 1 day 1(line 1, pos 7) == SQL == select interval '1 day 1' -------^^^ + + +-- !query +select interval '1 day 2' day +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException + +Can only use numbers in the interval value part for multiple unit value pairs interval form, but got invalid value: 1 day 2(line 1, pos 16) + +== SQL == +select interval '1 day 2' day +----------------^^^ From aa9e9bf2b67fbe658a33dff2add49ea429519be0 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Thu, 10 Sep 2020 16:07:31 +0800 Subject: [PATCH 2/4] using valid interval value pattern --- .../sql/catalyst/parser/AstBuilder.scala | 6 ++--- .../resources/sql-tests/inputs/interval.sql | 2 ++ .../sql-tests/results/ansi/interval.sql.out | 24 ++++++++++++++++++- .../sql-tests/results/interval.sql.out | 24 ++++++++++++++++++- 4 files changed, 51 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 629a4cca7d135..f48e48c06749c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -2120,7 +2120,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging } } - private final val alphabet = "[a-zA-Z]".r.pattern + private final lazy val intervalValStrPattern = "^[+-]?\\s*\\d*\\.?\\d*$".r.pattern /** * Creates a [[CalendarInterval]] with multiple unit value pairs, e.g. 1 YEAR 2 DAYS. @@ -2134,8 +2134,8 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging val kvs = units.indices.map { i => val u = units(i).getText val v = if (values(i).STRING() != null) { - val value = string(values(i).STRING()) - if (alphabet.matcher(value).find()) { + val value = string(values(i).STRING()).trim + if (!intervalValStrPattern.matcher(value).matches()) { throw new ParseException("Can only use numbers in the interval value part for" + s" multiple unit value pairs interval form, but got invalid value: $value", ctx) } diff --git a/sql/core/src/test/resources/sql-tests/inputs/interval.sql b/sql/core/src/test/resources/sql-tests/inputs/interval.sql index f431b705f60a4..c3e4748e76e3c 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/interval.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/interval.sql @@ -208,3 +208,5 @@ select interval '1 day -'; select interval '1 day 1'; select interval '1 day 2' day; +select interval 'interval 1' day; +select interval '-\t 1' day; diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out index e26f00f794528..d6cf9433a06b7 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 108 +-- Number of queries: 110 -- !query @@ -1136,3 +1136,25 @@ Can only use numbers in the interval value part for multiple unit value pairs in == SQL == select interval '1 day 2' day ----------------^^^ + + +-- !query +select interval 'interval 1' day +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException + +Can only use numbers in the interval value part for multiple unit value pairs interval form, but got invalid value: interval 1(line 1, pos 16) + +== SQL == +select interval 'interval 1' day +----------------^^^ + + +-- !query +select interval '-\t 1' day +-- !query schema +struct +-- !query output +-1 days diff --git a/sql/core/src/test/resources/sql-tests/results/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/interval.sql.out index 92077872f4943..6b149fd6bb961 100644 --- a/sql/core/src/test/resources/sql-tests/results/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/interval.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 108 +-- Number of queries: 110 -- !query @@ -1124,3 +1124,25 @@ Can only use numbers in the interval value part for multiple unit value pairs in == SQL == select interval '1 day 2' day ----------------^^^ + + +-- !query +select interval 'interval 1' day +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException + +Can only use numbers in the interval value part for multiple unit value pairs interval form, but got invalid value: interval 1(line 1, pos 16) + +== SQL == +select interval 'interval 1' day +----------------^^^ + + +-- !query +select interval '-\t 1' day +-- !query schema +struct +-- !query output +-1 days From a9294a1b701ce39578715362396c2fb3138a03a2 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Thu, 10 Sep 2020 16:21:14 +0800 Subject: [PATCH 3/4] address comments --- .../org/apache/spark/sql/catalyst/parser/AstBuilder.scala | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index f48e48c06749c..c527bf9c071c1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -2120,8 +2120,6 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging } } - private final lazy val intervalValStrPattern = "^[+-]?\\s*\\d*\\.?\\d*$".r.pattern - /** * Creates a [[CalendarInterval]] with multiple unit value pairs, e.g. 1 YEAR 2 DAYS. */ @@ -2135,7 +2133,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging val u = units(i).getText val v = if (values(i).STRING() != null) { val value = string(values(i).STRING()).trim - if (!intervalValStrPattern.matcher(value).matches()) { + if (value.exists(Character.isLetter)) { throw new ParseException("Can only use numbers in the interval value part for" + s" multiple unit value pairs interval form, but got invalid value: $value", ctx) } From 7e98555e073f16fdcb255edfd759c7470efb63e5 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Thu, 10 Sep 2020 16:35:22 +0800 Subject: [PATCH 4/4] add comments --- .../org/apache/spark/sql/catalyst/parser/AstBuilder.scala | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index c527bf9c071c1..6682b0575430a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -2132,7 +2132,11 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging val kvs = units.indices.map { i => val u = units(i).getText val v = if (values(i).STRING() != null) { - val value = string(values(i).STRING()).trim + val value = string(values(i).STRING()) + // SPARK-32840: For invalid cases, e.g. INTERVAL '1 day 2' hour, + // INTERVAL 'interval 1' day, we need to check ahead before they are concatenated with + // units and become valid ones, e.g. '1 day 2 hour'. + // Ideally, we only ensure the value parts don't contain any units here. if (value.exists(Character.isLetter)) { throw new ParseException("Can only use numbers in the interval value part for" + s" multiple unit value pairs interval form, but got invalid value: $value", ctx)