From 66192ce5e7c96a5bb04b2fa522f8362ace1d3611 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Mon, 28 Oct 2019 14:14:51 +0800 Subject: [PATCH 1/5] do not allow multiple unit TO unit statements in interval literal syntax --- docs/sql-migration-guide.md | 6 +- .../spark/sql/catalyst/parser/SqlBase.g4 | 22 ++- .../sql/catalyst/parser/AstBuilder.scala | 149 ++++++++++-------- .../resources/sql-tests/inputs/literals.sql | 5 + .../sql-tests/results/literals.sql.out | 44 +++++- 5 files changed, 150 insertions(+), 76 deletions(-) diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index d03ca663e8e3..9834286a09d6 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -9,9 +9,9 @@ license: | The ASF licenses this file to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - + http://www.apache.org/licenses/LICENSE-2.0 - + Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -218,6 +218,8 @@ license: | - Since Spark 3.0, the `size` function returns `NULL` for the `NULL` input. In Spark version 2.4 and earlier, this function gives `-1` for the same input. To restore the behavior before Spark 3.0, you can set `spark.sql.legacy.sizeOfNull` to `true`. + - Since Spark 3.0, the interval literal syntax does not allow multiple unit TO unit statements anymore. For example, `SELECT INTERVAL '1-1' YEAR TO MONTH '2-2' YEAR TO MONTH'` throws parser exception. + ## Upgrading from Spark SQL 2.4 to 2.4.1 - The value of `spark.executor.heartbeatInterval`, when specified without units like "30" rather than "30s", was diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 7ad008ae5d90..0e6e1eb90f98 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -80,7 +80,7 @@ singleTableSchema ; singleInterval - : INTERVAL? (intervalValue intervalUnit)+ EOF + : INTERVAL? multiUnitsInterval EOF ; statement @@ -761,12 +761,24 @@ booleanValue ; interval - : {ansi}? INTERVAL? intervalField+ - | {!ansi}? INTERVAL intervalField* + : {ansi}? INTERVAL? (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval) + | {!ansi}? INTERVAL (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval) ; -intervalField - : value=intervalValue unit=intervalUnit (TO to=intervalUnit)? +errorCapturingMultiUnitsInterval + : multiUnitsInterval unitToUnitInterval? + ; + +multiUnitsInterval + : (intervalValue intervalUnit)+ + ; + +errorCapturingUnitToUnitInterval + : body=unitToUnitInterval (error1=multiUnitsInterval | error2=unitToUnitInterval)? + ; + +unitToUnitInterval + : value=STRING from=intervalUnit TO to=intervalUnit ; intervalValue diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 072b9a16e6d4..99ca0ad76ad1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -102,20 +102,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging } override def visitSingleInterval(ctx: SingleIntervalContext): CalendarInterval = { - withOrigin(ctx) { - val units = ctx.intervalUnit().asScala.map { - u => normalizeInternalUnit(u.getText.toLowerCase(Locale.ROOT)) - }.toArray - val values = ctx.intervalValue().asScala.map(getIntervalValue).toArray - try { - IntervalUtils.fromUnitStrings(units, values) - } catch { - case i: IllegalArgumentException => - val e = new ParseException(i.getMessage, ctx) - e.setStackTrace(i.getStackTrace) - throw e - } - } + withOrigin(ctx)(visitMultiUnitsInterval(ctx.multiUnitsInterval)) } /* ******************************************************************************************** @@ -1929,72 +1916,98 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging } } - /** - * Create a [[CalendarInterval]] literal expression. An interval expression can contain multiple - * unit value pairs, for instance: interval 2 months 2 days. - */ override def visitInterval(ctx: IntervalContext): Literal = withOrigin(ctx) { - val intervals = ctx.intervalField.asScala.map(visitIntervalField) - validate(intervals.nonEmpty, "at least one time unit should be given for interval literal", ctx) - Literal(intervals.reduce(_.add(_))) + if (ctx.errorCapturingMultiUnitsInterval != null) { + val innerCtx = ctx.errorCapturingMultiUnitsInterval + if (innerCtx.unitToUnitInterval != null) { + throw new ParseException( + "Can only have a single unit TO unit statement in the interval literal syntax", + innerCtx.unitToUnitInterval) + } + Literal(visitMultiUnitsInterval(innerCtx.multiUnitsInterval)) + } else { + assert(ctx.errorCapturingUnitToUnitInterval != null) + val innerCtx = ctx.errorCapturingUnitToUnitInterval + if (innerCtx.error1 != null || innerCtx.error2 != null) { + val errorCtx = if (innerCtx.error1 != null) innerCtx.error1 else innerCtx.error2 + throw new ParseException( + "Can only have a single unit TO unit statement in the interval literal syntax", + errorCtx) + } + Literal(visitUnitToUnitInterval(innerCtx.body)) + } } /** - * Create a [[CalendarInterval]] for a unit value pair. Two unit configuration types are - * supported: - * - Single unit. - * - From-To unit ('YEAR TO MONTH', 'DAY TO HOUR', 'DAY TO MINUTE', 'DAY TO SECOND', - * 'HOUR TO MINUTE', 'HOUR TO SECOND' and 'MINUTE TO SECOND' are supported). + * Creates a [[CalendarInterval]] literal expression with multiple units, e.g. 1 YEAR 2 DAYS. */ - override def visitIntervalField(ctx: IntervalFieldContext): CalendarInterval = withOrigin(ctx) { - import ctx._ - val s = getIntervalValue(value) - try { - val unitText = unit.getText.toLowerCase(Locale.ROOT) - val interval = (unitText, Option(to).map(_.getText.toLowerCase(Locale.ROOT))) match { - case (u, None) => - IntervalUtils.fromUnitStrings(Array(normalizeInternalUnit(u)), Array(s)) - case ("year", Some("month")) => - IntervalUtils.fromYearMonthString(s) - case ("day", Some("hour")) => - IntervalUtils.fromDayTimeString(s, "day", "hour") - case ("day", Some("minute")) => - IntervalUtils.fromDayTimeString(s, "day", "minute") - case ("day", Some("second")) => - IntervalUtils.fromDayTimeString(s, "day", "second") - case ("hour", Some("minute")) => - IntervalUtils.fromDayTimeString(s, "hour", "minute") - case ("hour", Some("second")) => - IntervalUtils.fromDayTimeString(s, "hour", "second") - case ("minute", Some("second")) => - IntervalUtils.fromDayTimeString(s, "minute", "second") - case (from, Some(t)) => - throw new ParseException(s"Intervals FROM $from TO $t are not supported.", ctx) + override def visitMultiUnitsInterval(ctx: MultiUnitsIntervalContext): CalendarInterval = { + withOrigin(ctx) { + val units = ctx.intervalUnit().asScala.map { unit => + val u = unit.getText.toLowerCase(Locale.ROOT) + // Handle plural forms, e.g: yearS/monthS/weekS/dayS/hourS/minuteS/hourS/... + if (u.endsWith("s")) u.substring(0, u.length - 1) else u + }.toArray + + val values = ctx.intervalValue().asScala.map { value => + if (value.STRING() != null) { + string(value.STRING()) + } else { + value.getText + } + }.toArray + + try { + IntervalUtils.fromUnitStrings(units, values) + } catch { + case i: IllegalArgumentException => + val e = new ParseException(i.getMessage, ctx) + e.setStackTrace(i.getStackTrace) + throw e } - validate(interval != null, "No interval can be constructed", ctx) - interval - } catch { - // Handle Exceptions thrown by CalendarInterval - case e: IllegalArgumentException => - val pe = new ParseException(e.getMessage, ctx) - pe.setStackTrace(e.getStackTrace) - throw pe } } - private def getIntervalValue(value: IntervalValueContext): String = { - if (value.STRING() != null) { - string(value.STRING()) - } else { - value.getText + /** + * Creates a [[CalendarInterval]] literal expression with the unit TO unit syntax, + * e.g. '2-1' YEAR TO MONTH. + */ + override def visitUnitToUnitInterval(ctx: UnitToUnitIntervalContext): CalendarInterval = { + withOrigin(ctx) { + val value = string(ctx.STRING()) + try { + val from = ctx.from.getText.toLowerCase(Locale.ROOT) + val to = ctx.to.getText.toLowerCase(Locale.ROOT) + val interval = (from, to) match { + case ("year", "month") => + IntervalUtils.fromYearMonthString(value) + case ("day", "hour") => + IntervalUtils.fromDayTimeString(value, "day", "hour") + case ("day", "minute") => + IntervalUtils.fromDayTimeString(value, "day", "minute") + case ("day", "second") => + IntervalUtils.fromDayTimeString(value, "day", "second") + case ("hour", "minute") => + IntervalUtils.fromDayTimeString(value, "hour", "minute") + case ("hour", "second") => + IntervalUtils.fromDayTimeString(value, "hour", "second") + case ("minute", "second") => + IntervalUtils.fromDayTimeString(value, "minute", "second") + case _ => + throw new ParseException(s"Intervals $from TO $to are not supported.", ctx) + } + validate(interval != null, "No interval can be constructed", ctx) + interval + } catch { + // Handle Exceptions thrown by CalendarInterval + case e: IllegalArgumentException => + val pe = new ParseException(e.getMessage, ctx) + pe.setStackTrace(e.getStackTrace) + throw pe + } } } - // Handle plural forms, e.g: yearS/monthS/weekS/dayS/hourS/minuteS/hourS/... - private def normalizeInternalUnit(s: String): String = { - if (s.endsWith("s")) s.substring(0, s.length - 1) else s - } - /* ******************************************************************************************** * DataType parsing * ******************************************************************************************** */ diff --git a/sql/core/src/test/resources/sql-tests/inputs/literals.sql b/sql/core/src/test/resources/sql-tests/inputs/literals.sql index 0f95f8523782..1bb78f0167af 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/literals.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/literals.sql @@ -125,3 +125,8 @@ select map(1, interval 1 day, 2, interval 3 week); -- typed interval expression select interval 'interval 3 year 1 hour'; select interval '3 year 1 hour'; + +-- malformed interval literal +select interval '10-9' year to month '2-1' year to month; +select interval 1 year '2-1' year to month; +select interval '10-9' year to month '1' year; diff --git a/sql/core/src/test/resources/sql-tests/results/literals.sql.out b/sql/core/src/test/resources/sql-tests/results/literals.sql.out index fd6e51b2385d..a032b653f10d 100644 --- a/sql/core/src/test/resources/sql-tests/results/literals.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/literals.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 59 +-- Number of queries: 62 -- !query 0 @@ -542,3 +542,45 @@ select interval '3 year 1 hour' struct -- !query 58 output interval 3 years 1 hours + + +-- !query 59 +select interval '10-9' year to month '2-1' year to month +-- !query 59 schema +struct<> +-- !query 59 output +org.apache.spark.sql.catalyst.parser.ParseException + +Can only have a single unit TO unit statement in the interval literal syntax(line 1, pos 37) + +== SQL == +select interval '10-9' year to month '2-1' year to month +-------------------------------------^^^ + + +-- !query 60 +select interval 1 year '2-1' year to month +-- !query 60 schema +struct<> +-- !query 60 output +org.apache.spark.sql.catalyst.parser.ParseException + +Can only have a single unit TO unit statement in the interval literal syntax(line 1, pos 23) + +== SQL == +select interval 1 year '2-1' year to month +-----------------------^^^ + + +-- !query 61 +select interval '10-9' year to month '1' year +-- !query 61 schema +struct<> +-- !query 61 output +org.apache.spark.sql.catalyst.parser.ParseException + +Can only have a single unit TO unit statement in the interval literal syntax(line 1, pos 37) + +== SQL == +select interval '10-9' year to month '1' year +-------------------------------------^^^ From 1aa9a0e819507d8d122339be83613fc9f2bef714 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Wed, 30 Oct 2019 01:43:36 +0800 Subject: [PATCH 2/5] address comments --- docs/sql-migration-guide.md | 2 +- .../sql/catalyst/parser/AstBuilder.scala | 8 +- .../resources/sql-tests/inputs/literals.sql | 8 ++ .../sql-tests/results/literals.sql.out | 102 +++++++++++++++++- 4 files changed, 109 insertions(+), 11 deletions(-) diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index 9834286a09d6..26093be3ea02 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -218,7 +218,7 @@ license: | - Since Spark 3.0, the `size` function returns `NULL` for the `NULL` input. In Spark version 2.4 and earlier, this function gives `-1` for the same input. To restore the behavior before Spark 3.0, you can set `spark.sql.legacy.sizeOfNull` to `true`. - - Since Spark 3.0, the interval literal syntax does not allow multiple unit TO unit statements anymore. For example, `SELECT INTERVAL '1-1' YEAR TO MONTH '2-2' YEAR TO MONTH'` throws parser exception. + - Since Spark 3.0, the interval literal syntax does not allow multiple unit-TO-unit statements anymore. For example, `SELECT INTERVAL '1-1' YEAR TO MONTH '2-2' YEAR TO MONTH'` throws parser exception. ## Upgrading from Spark SQL 2.4 to 2.4.1 diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 99ca0ad76ad1..4a335c27740c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -1924,7 +1924,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging "Can only have a single unit TO unit statement in the interval literal syntax", innerCtx.unitToUnitInterval) } - Literal(visitMultiUnitsInterval(innerCtx.multiUnitsInterval)) + Literal(visitMultiUnitsInterval(innerCtx.multiUnitsInterval), CalendarIntervalType) } else { assert(ctx.errorCapturingUnitToUnitInterval != null) val innerCtx = ctx.errorCapturingUnitToUnitInterval @@ -1934,7 +1934,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging "Can only have a single unit TO unit statement in the interval literal syntax", errorCtx) } - Literal(visitUnitToUnitInterval(innerCtx.body)) + Literal(visitUnitToUnitInterval(innerCtx.body), CalendarIntervalType) } } @@ -1978,7 +1978,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging try { val from = ctx.from.getText.toLowerCase(Locale.ROOT) val to = ctx.to.getText.toLowerCase(Locale.ROOT) - val interval = (from, to) match { + (from, to) match { case ("year", "month") => IntervalUtils.fromYearMonthString(value) case ("day", "hour") => @@ -1996,8 +1996,6 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging case _ => throw new ParseException(s"Intervals $from TO $to are not supported.", ctx) } - validate(interval != null, "No interval can be constructed", ctx) - interval } catch { // Handle Exceptions thrown by CalendarInterval case e: IllegalArgumentException => diff --git a/sql/core/src/test/resources/sql-tests/inputs/literals.sql b/sql/core/src/test/resources/sql-tests/inputs/literals.sql index 1bb78f0167af..b6dd6d2be53c 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/literals.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/literals.sql @@ -128,5 +128,13 @@ select interval '3 year 1 hour'; -- malformed interval literal select interval '10-9' year to month '2-1' year to month; +select interval '10-9' year to month '12:11:10' hour to second; +select interval '1 15:11' day to minute '12:11:10' hour to second; select interval 1 year '2-1' year to month; +select interval 1 year '12:11:10' hour to second; select interval '10-9' year to month '1' year; +select interval '12:11:10' hour to second '1' year; +-- malformed interval literal with ansi mode +SET spark.sql.ansi.enabled=true; +select interval '10-9' year to month '2-1' year to month; +select '10-9' year to month '2-1' year to month; diff --git a/sql/core/src/test/resources/sql-tests/results/literals.sql.out b/sql/core/src/test/resources/sql-tests/results/literals.sql.out index a032b653f10d..b55eef6b8824 100644 --- a/sql/core/src/test/resources/sql-tests/results/literals.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/literals.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 62 +-- Number of queries: 69 -- !query 0 @@ -559,12 +559,40 @@ select interval '10-9' year to month '2-1' year to month -- !query 60 -select interval 1 year '2-1' year to month +select interval '10-9' year to month '12:11:10' hour to second -- !query 60 schema struct<> -- !query 60 output org.apache.spark.sql.catalyst.parser.ParseException +Can only have a single unit TO unit statement in the interval literal syntax(line 1, pos 37) + +== SQL == +select interval '10-9' year to month '12:11:10' hour to second +-------------------------------------^^^ + + +-- !query 61 +select interval '1 15:11' day to minute '12:11:10' hour to second +-- !query 61 schema +struct<> +-- !query 61 output +org.apache.spark.sql.catalyst.parser.ParseException + +Can only have a single unit TO unit statement in the interval literal syntax(line 1, pos 40) + +== SQL == +select interval '1 15:11' day to minute '12:11:10' hour to second +----------------------------------------^^^ + + +-- !query 62 +select interval 1 year '2-1' year to month +-- !query 62 schema +struct<> +-- !query 62 output +org.apache.spark.sql.catalyst.parser.ParseException + Can only have a single unit TO unit statement in the interval literal syntax(line 1, pos 23) == SQL == @@ -572,11 +600,25 @@ select interval 1 year '2-1' year to month -----------------------^^^ --- !query 61 +-- !query 63 +select interval 1 year '12:11:10' hour to second +-- !query 63 schema +struct<> +-- !query 63 output +org.apache.spark.sql.catalyst.parser.ParseException + +Can only have a single unit TO unit statement in the interval literal syntax(line 1, pos 23) + +== SQL == +select interval 1 year '12:11:10' hour to second +-----------------------^^^ + + +-- !query 64 select interval '10-9' year to month '1' year --- !query 61 schema +-- !query 64 schema struct<> --- !query 61 output +-- !query 64 output org.apache.spark.sql.catalyst.parser.ParseException Can only have a single unit TO unit statement in the interval literal syntax(line 1, pos 37) @@ -584,3 +626,53 @@ Can only have a single unit TO unit statement in the interval literal syntax(lin == SQL == select interval '10-9' year to month '1' year -------------------------------------^^^ + + +-- !query 65 +select interval '12:11:10' hour to second '1' year +-- !query 65 schema +struct<> +-- !query 65 output +org.apache.spark.sql.catalyst.parser.ParseException + +Can only have a single unit TO unit statement in the interval literal syntax(line 1, pos 42) + +== SQL == +select interval '12:11:10' hour to second '1' year +------------------------------------------^^^ + + +-- !query 66 +SET spark.sql.ansi.enabled=true +-- !query 66 schema +struct +-- !query 66 output +spark.sql.ansi.enabled true + + +-- !query 67 +select interval '10-9' year to month '2-1' year to month +-- !query 67 schema +struct<> +-- !query 67 output +org.apache.spark.sql.catalyst.parser.ParseException + +Can only have a single unit TO unit statement in the interval literal syntax(line 1, pos 37) + +== SQL == +select interval '10-9' year to month '2-1' year to month +-------------------------------------^^^ + + +-- !query 68 +select '10-9' year to month '2-1' year to month +-- !query 68 schema +struct<> +-- !query 68 output +org.apache.spark.sql.catalyst.parser.ParseException + +Can only have a single unit TO unit statement in the interval literal syntax(line 1, pos 28) + +== SQL == +select '10-9' year to month '2-1' year to month +----------------------------^^^ From 2b79f9c2909a7ec2ab0d105b3a40179fb766179f Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Wed, 30 Oct 2019 01:48:01 +0800 Subject: [PATCH 3/5] refine --- .../spark/sql/catalyst/parser/AstBuilder.scala | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 4a335c27740c..61c313847a26 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -1916,6 +1916,11 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging } } + /** + * Create a [[CalendarInterval]] literal expression. An interval expression can contain multiple + * unit value pairs, for instance: interval 2 months 2 days. Or a unit-TO-unit statement, for + * instance: interval '1-2' year to month. + */ override def visitInterval(ctx: IntervalContext): Literal = withOrigin(ctx) { if (ctx.errorCapturingMultiUnitsInterval != null) { val innerCtx = ctx.errorCapturingMultiUnitsInterval @@ -1931,7 +1936,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging if (innerCtx.error1 != null || innerCtx.error2 != null) { val errorCtx = if (innerCtx.error1 != null) innerCtx.error1 else innerCtx.error2 throw new ParseException( - "Can only have a single unit TO unit statement in the interval literal syntax", + "Can only have a single unit-TO-unit statement in the interval literal syntax", errorCtx) } Literal(visitUnitToUnitInterval(innerCtx.body), CalendarIntervalType) @@ -1939,7 +1944,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging } /** - * Creates a [[CalendarInterval]] literal expression with multiple units, e.g. 1 YEAR 2 DAYS. + * Creates a [[CalendarInterval]] with multiple unit value pairs, e.g. 1 YEAR 2 DAYS. */ override def visitMultiUnitsInterval(ctx: MultiUnitsIntervalContext): CalendarInterval = { withOrigin(ctx) { @@ -1969,8 +1974,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging } /** - * Creates a [[CalendarInterval]] literal expression with the unit TO unit syntax, - * e.g. '2-1' YEAR TO MONTH. + * Creates a [[CalendarInterval]] with a unit-TO-unit statement, e.g. '2-1' YEAR TO MONTH. */ override def visitUnitToUnitInterval(ctx: UnitToUnitIntervalContext): CalendarInterval = { withOrigin(ctx) { From 51d9b455fdd40739c44e0fcb974338843d1c7bb8 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Wed, 30 Oct 2019 13:24:08 +0800 Subject: [PATCH 4/5] refine --- docs/sql-migration-guide.md | 2 +- .../spark/sql/catalyst/parser/SqlBase.g4 | 6 +- .../sql/catalyst/parser/AstBuilder.scala | 23 +- .../parser/ExpressionParserSuite.scala | 8 +- .../resources/sql-tests/inputs/literals.sql | 14 +- .../sql-tests/results/literals.sql.out | 220 ++++++++++++++---- 6 files changed, 211 insertions(+), 62 deletions(-) diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index 26093be3ea02..a97a4b04ded6 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -218,7 +218,7 @@ license: | - Since Spark 3.0, the `size` function returns `NULL` for the `NULL` input. In Spark version 2.4 and earlier, this function gives `-1` for the same input. To restore the behavior before Spark 3.0, you can set `spark.sql.legacy.sizeOfNull` to `true`. - - Since Spark 3.0, the interval literal syntax does not allow multiple unit-TO-unit statements anymore. For example, `SELECT INTERVAL '1-1' YEAR TO MONTH '2-2' YEAR TO MONTH'` throws parser exception. + - Since Spark 3.0, the interval literal syntax does not allow multiple from-to units anymore. For example, `SELECT INTERVAL '1-1' YEAR TO MONTH '2-2' YEAR TO MONTH'` throws parser exception. ## Upgrading from Spark SQL 2.4 to 2.4.1 diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 0e6e1eb90f98..e19ed0a722d1 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -761,8 +761,8 @@ booleanValue ; interval - : {ansi}? INTERVAL? (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval) - | {!ansi}? INTERVAL (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval) + : INTERVAL (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval)? + | {ansi}? (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval) ; errorCapturingMultiUnitsInterval @@ -778,7 +778,7 @@ errorCapturingUnitToUnitInterval ; unitToUnitInterval - : value=STRING from=intervalUnit TO to=intervalUnit + : value=intervalValue from=intervalUnit TO to=intervalUnit ; intervalValue diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 1574fb9be945..1224fbfe0bfd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -1927,29 +1927,30 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging } /** - * Create a [[CalendarInterval]] literal expression. An interval expression can contain multiple - * unit value pairs, for instance: interval 2 months 2 days. Or a unit-TO-unit statement, for - * instance: interval '1-2' year to month. + * Create a [[CalendarInterval]] literal expression. Two syntaxes are supported: + * - multiple unit value pairs, for instance: interval 2 months 2 days. + * - from-to unit, for instance: interval '1-2' year to month. */ override def visitInterval(ctx: IntervalContext): Literal = withOrigin(ctx) { if (ctx.errorCapturingMultiUnitsInterval != null) { val innerCtx = ctx.errorCapturingMultiUnitsInterval if (innerCtx.unitToUnitInterval != null) { throw new ParseException( - "Can only have a single unit TO unit statement in the interval literal syntax", + "Can only have a single from-to unit in the interval literal syntax", innerCtx.unitToUnitInterval) } Literal(visitMultiUnitsInterval(innerCtx.multiUnitsInterval), CalendarIntervalType) - } else { - assert(ctx.errorCapturingUnitToUnitInterval != null) + } else if (ctx.errorCapturingUnitToUnitInterval != null) { val innerCtx = ctx.errorCapturingUnitToUnitInterval if (innerCtx.error1 != null || innerCtx.error2 != null) { val errorCtx = if (innerCtx.error1 != null) innerCtx.error1 else innerCtx.error2 throw new ParseException( - "Can only have a single unit-TO-unit statement in the interval literal syntax", + "Can only have a single from-to unit in the interval literal syntax", errorCtx) } Literal(visitUnitToUnitInterval(innerCtx.body), CalendarIntervalType) + } else { + throw new ParseException("at least one time unit should be given for interval literal", ctx) } } @@ -1984,11 +1985,13 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging } /** - * Creates a [[CalendarInterval]] with a unit-TO-unit statement, e.g. '2-1' YEAR TO MONTH. + * Creates a [[CalendarInterval]] with from-to unit, e.g. '2-1' YEAR TO MONTH. */ override def visitUnitToUnitInterval(ctx: UnitToUnitIntervalContext): CalendarInterval = { withOrigin(ctx) { - val value = string(ctx.STRING()) + val value = Option(ctx.intervalValue.STRING).map(string).getOrElse { + throw new ParseException("The value of from-to unit must be a string", ctx.intervalValue) + } try { val from = ctx.from.getText.toLowerCase(Locale.ROOT) val to = ctx.to.getText.toLowerCase(Locale.ROOT) @@ -2008,7 +2011,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging case ("minute", "second") => IntervalUtils.fromDayTimeString(value, "minute", "second") case _ => - throw new ParseException(s"Intervals $from TO $to are not supported.", ctx) + throw new ParseException(s"Intervals FROM $from TO $to are not supported.", ctx) } } catch { // Handle Exceptions thrown by CalendarInterval diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala index 5a7b3ffec53f..24b32cd43f80 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala @@ -632,7 +632,7 @@ class ExpressionParserSuite extends AnalysisTest { // Non Existing unit intercept("interval 10 nanoseconds", - "no viable alternative at input 'interval 10 nanoseconds'") + "missing {'DAY', 'DAYS', 'HOUR'", "'YEAR', 'YEARS'} at 'nanoseconds'") // Year-Month intervals. val yearMonthValues = Seq("123-10", "496-0", "-2-3", "-123-0") @@ -667,17 +667,13 @@ class ExpressionParserSuite extends AnalysisTest { } // Unknown FROM TO intervals - intercept("interval 10 month to second", + intercept("interval '10' month to second", "Intervals FROM month TO second are not supported.") // Composed intervals. checkIntervals( "3 months 22 seconds 1 millisecond", Literal(new CalendarInterval(3, 22001000L))) - checkIntervals( - "3 years '-1-10' year to month 3 weeks '1 0:0:2' day to second", - Literal(new CalendarInterval(14, - 22 * CalendarInterval.MICROS_PER_DAY + 2 * CalendarInterval.MICROS_PER_SECOND))) } test("SPARK-23264 Interval Compatibility tests") { diff --git a/sql/core/src/test/resources/sql-tests/inputs/literals.sql b/sql/core/src/test/resources/sql-tests/inputs/literals.sql index ce500de9dd4c..3c3be2913abc 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/literals.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/literals.sql @@ -132,6 +132,10 @@ select integer'7'; select integer '2147483648'; -- malformed interval literal +select interval; +select interval 1 fake_unit; +select interval 1 year to month; +select interval '1' year to second; select interval '10-9' year to month '2-1' year to month; select interval '10-9' year to month '12:11:10' hour to second; select interval '1 15:11' day to minute '12:11:10' hour to second; @@ -141,5 +145,11 @@ select interval '10-9' year to month '1' year; select interval '12:11:10' hour to second '1' year; -- malformed interval literal with ansi mode SET spark.sql.ansi.enabled=true; -select interval '10-9' year to month '2-1' year to month; -select '10-9' year to month '2-1' year to month; +select interval; +select interval 1 fake_unit; +select interval 1 year to month; +select 1 year to month; +select interval '1' year to second; +select '1' year to second; +select interval 1 year '2-1' year to month; +select 1 year '2-1' year to month; diff --git a/sql/core/src/test/resources/sql-tests/results/literals.sql.out b/sql/core/src/test/resources/sql-tests/results/literals.sql.out index 6ac31ddb9c63..7a5e33a9a463 100644 --- a/sql/core/src/test/resources/sql-tests/results/literals.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/literals.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 72 +-- Number of queries: 82 -- !query 0 @@ -447,7 +447,7 @@ struct<> -- !query 49 output org.apache.spark.sql.catalyst.parser.ParseException -no viable alternative at input 'interval 10 nanoseconds'(line 1, pos 19) +no viable alternative at input '10 nanoseconds'(line 1, pos 19) == SQL == select interval 10 nanoseconds @@ -575,134 +575,274 @@ select integer '2147483648' -- !query 62 -select interval '10-9' year to month '2-1' year to month +select interval -- !query 62 schema struct<> -- !query 62 output org.apache.spark.sql.catalyst.parser.ParseException -Can only have a single unit-TO-unit statement in the interval literal syntax(line 1, pos 37) +at least one time unit should be given for interval literal(line 1, pos 7) == SQL == -select interval '10-9' year to month '2-1' year to month --------------------------------------^^^ +select interval +-------^^^ -- !query 63 -select interval '10-9' year to month '12:11:10' hour to second +select interval 1 fake_unit -- !query 63 schema struct<> -- !query 63 output org.apache.spark.sql.catalyst.parser.ParseException -Can only have a single unit-TO-unit statement in the interval literal syntax(line 1, pos 37) +no viable alternative at input '1 fake_unit'(line 1, pos 18) == SQL == -select interval '10-9' year to month '12:11:10' hour to second --------------------------------------^^^ +select interval 1 fake_unit +------------------^^^ -- !query 64 -select interval '1 15:11' day to minute '12:11:10' hour to second +select interval 1 year to month -- !query 64 schema struct<> -- !query 64 output org.apache.spark.sql.catalyst.parser.ParseException -Can only have a single unit-TO-unit statement in the interval literal syntax(line 1, pos 40) +The value of from-to unit must be a string(line 1, pos 16) == SQL == -select interval '1 15:11' day to minute '12:11:10' hour to second -----------------------------------------^^^ +select interval 1 year to month +----------------^^^ -- !query 65 -select interval 1 year '2-1' year to month +select interval '1' year to second -- !query 65 schema struct<> -- !query 65 output org.apache.spark.sql.catalyst.parser.ParseException -Can only have a single unit TO unit statement in the interval literal syntax(line 1, pos 23) +Intervals FROM year TO second are not supported.(line 1, pos 16) == SQL == -select interval 1 year '2-1' year to month ------------------------^^^ +select interval '1' year to second +----------------^^^ -- !query 66 -select interval 1 year '12:11:10' hour to second +select interval '10-9' year to month '2-1' year to month -- !query 66 schema struct<> -- !query 66 output org.apache.spark.sql.catalyst.parser.ParseException -Can only have a single unit TO unit statement in the interval literal syntax(line 1, pos 23) +Can only have a single from-to unit in the interval literal syntax(line 1, pos 37) == SQL == -select interval 1 year '12:11:10' hour to second ------------------------^^^ +select interval '10-9' year to month '2-1' year to month +-------------------------------------^^^ -- !query 67 -select interval '10-9' year to month '1' year +select interval '10-9' year to month '12:11:10' hour to second -- !query 67 schema struct<> -- !query 67 output org.apache.spark.sql.catalyst.parser.ParseException -Can only have a single unit-TO-unit statement in the interval literal syntax(line 1, pos 37) +Can only have a single from-to unit in the interval literal syntax(line 1, pos 37) == SQL == -select interval '10-9' year to month '1' year +select interval '10-9' year to month '12:11:10' hour to second -------------------------------------^^^ -- !query 68 -select interval '12:11:10' hour to second '1' year +select interval '1 15:11' day to minute '12:11:10' hour to second -- !query 68 schema struct<> -- !query 68 output org.apache.spark.sql.catalyst.parser.ParseException -Can only have a single unit-TO-unit statement in the interval literal syntax(line 1, pos 42) +Can only have a single from-to unit in the interval literal syntax(line 1, pos 40) == SQL == -select interval '12:11:10' hour to second '1' year -------------------------------------------^^^ +select interval '1 15:11' day to minute '12:11:10' hour to second +----------------------------------------^^^ -- !query 69 -SET spark.sql.ansi.enabled=true +select interval 1 year '2-1' year to month -- !query 69 schema -struct +struct<> -- !query 69 output -spark.sql.ansi.enabled true +org.apache.spark.sql.catalyst.parser.ParseException + +Can only have a single from-to unit in the interval literal syntax(line 1, pos 23) + +== SQL == +select interval 1 year '2-1' year to month +-----------------------^^^ -- !query 70 -select interval '10-9' year to month '2-1' year to month +select interval 1 year '12:11:10' hour to second -- !query 70 schema struct<> -- !query 70 output org.apache.spark.sql.catalyst.parser.ParseException -Can only have a single unit-TO-unit statement in the interval literal syntax(line 1, pos 37) +Can only have a single from-to unit in the interval literal syntax(line 1, pos 23) == SQL == -select interval '10-9' year to month '2-1' year to month --------------------------------------^^^ +select interval 1 year '12:11:10' hour to second +-----------------------^^^ -- !query 71 -select '10-9' year to month '2-1' year to month +select interval '10-9' year to month '1' year -- !query 71 schema struct<> -- !query 71 output org.apache.spark.sql.catalyst.parser.ParseException -Can only have a single unit-TO-unit statement in the interval literal syntax(line 1, pos 28) +Can only have a single from-to unit in the interval literal syntax(line 1, pos 37) + +== SQL == +select interval '10-9' year to month '1' year +-------------------------------------^^^ + + +-- !query 72 +select interval '12:11:10' hour to second '1' year +-- !query 72 schema +struct<> +-- !query 72 output +org.apache.spark.sql.catalyst.parser.ParseException + +Can only have a single from-to unit in the interval literal syntax(line 1, pos 42) + +== SQL == +select interval '12:11:10' hour to second '1' year +------------------------------------------^^^ + + +-- !query 73 +SET spark.sql.ansi.enabled=true +-- !query 73 schema +struct +-- !query 73 output +spark.sql.ansi.enabled true + + +-- !query 74 +select interval +-- !query 74 schema +struct<> +-- !query 74 output +org.apache.spark.sql.catalyst.parser.ParseException + +at least one time unit should be given for interval literal(line 1, pos 7) + +== SQL == +select interval +-------^^^ + + +-- !query 75 +select interval 1 fake_unit +-- !query 75 schema +struct<> +-- !query 75 output +org.apache.spark.sql.catalyst.parser.ParseException + +no viable alternative at input '1 fake_unit'(line 1, pos 18) + +== SQL == +select interval 1 fake_unit +------------------^^^ + + +-- !query 76 +select interval 1 year to month +-- !query 76 schema +struct<> +-- !query 76 output +org.apache.spark.sql.catalyst.parser.ParseException + +The value of from-to unit must be a string(line 1, pos 16) + +== SQL == +select interval 1 year to month +----------------^^^ + + +-- !query 77 +select 1 year to month +-- !query 77 schema +struct<> +-- !query 77 output +org.apache.spark.sql.catalyst.parser.ParseException + +The value of from-to unit must be a string(line 1, pos 7) + +== SQL == +select 1 year to month +-------^^^ + + +-- !query 78 +select interval '1' year to second +-- !query 78 schema +struct<> +-- !query 78 output +org.apache.spark.sql.catalyst.parser.ParseException + +Intervals FROM year TO second are not supported.(line 1, pos 16) + +== SQL == +select interval '1' year to second +----------------^^^ + + +-- !query 79 +select '1' year to second +-- !query 79 schema +struct<> +-- !query 79 output +org.apache.spark.sql.catalyst.parser.ParseException + +Intervals FROM year TO second are not supported.(line 1, pos 7) + +== SQL == +select '1' year to second +-------^^^ + + +-- !query 80 +select interval 1 year '2-1' year to month +-- !query 80 schema +struct<> +-- !query 80 output +org.apache.spark.sql.catalyst.parser.ParseException + +Can only have a single from-to unit in the interval literal syntax(line 1, pos 23) + +== SQL == +select interval 1 year '2-1' year to month +-----------------------^^^ + + +-- !query 81 +select 1 year '2-1' year to month +-- !query 81 schema +struct<> +-- !query 81 output +org.apache.spark.sql.catalyst.parser.ParseException + +Can only have a single from-to unit in the interval literal syntax(line 1, pos 14) == SQL == -select '10-9' year to month '2-1' year to month -----------------------------^^^ +select 1 year '2-1' year to month +--------------^^^ From e560cd890d9f0e42de21a9c4bd38816eee52b778 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Wed, 30 Oct 2019 22:03:23 +0800 Subject: [PATCH 5/5] fix test --- .../sql/catalyst/parser/ExpressionParserSuite.scala | 2 +- .../scala/org/apache/spark/sql/SQLQuerySuite.scala | 11 ----------- 2 files changed, 1 insertion(+), 12 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala index 24b32cd43f80..6acd19b952a7 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala @@ -632,7 +632,7 @@ class ExpressionParserSuite extends AnalysisTest { // Non Existing unit intercept("interval 10 nanoseconds", - "missing {'DAY', 'DAYS', 'HOUR'", "'YEAR', 'YEARS'} at 'nanoseconds'") + "no viable alternative at input '10 nanoseconds'") // Year-Month intervals. val yearMonthValues = Seq("123-10", "496-0", "-2-3", "-123-0") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 630489ad9c60..9e99aaaef9e9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -1564,17 +1564,6 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession { } e.message.contains("Cannot save interval data type into external storage") }) - - val e1 = intercept[AnalysisException] { - sql("select interval") - } - assert(e1.message.contains("at least one time unit should be given for interval literal")) - - // Currently we don't yet support nanosecond - val e2 = intercept[AnalysisException] { - sql("select interval 23 nanosecond") - } - assert(e2.message.contains("no viable alternative at input 'interval 23 nanosecond'")) } test("SPARK-8945: add and subtract expressions for interval type") {