diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala index 008ba61b2ae0..eeea0e2722a4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala @@ -136,8 +136,7 @@ object IntervalUtils { s"Error parsing interval year-month string: ${e.getMessage}", e) } } - assert(input.length == input.trim.length) - input match { + input.trim match { case yearMonthPattern("-", yearStr, monthStr) => negateExact(toInterval(yearStr, monthStr)) case yearMonthPattern(_, yearStr, monthStr) => @@ -300,7 +299,7 @@ object IntervalUtils { val regexp = dayTimePattern.get(from -> to) require(regexp.isDefined, s"Cannot support (interval '$input' $from to $to) expression") val pattern = regexp.get.pattern - val m = pattern.matcher(input) + val m = pattern.matcher(input.trim) require(m.matches, s"Interval string must match day-time format of '$pattern': $input, " + s"$fallbackNotice") var micros: Long = 0L diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala index e8beb612b513..74fd48d922a3 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala @@ -694,7 +694,7 @@ class ExpressionParserSuite extends AnalysisTest { intercept("interval 10 nanoseconds", "invalid unit 'nanoseconds'") // Year-Month intervals. - val yearMonthValues = Seq("123-10", "496-0", "-2-3", "-123-0") + val yearMonthValues = Seq("123-10", "496-0", "-2-3", "-123-0", "\t -1-2\t") yearMonthValues.foreach { value => val result = Literal(IntervalUtils.fromYearMonthString(value)) checkIntervals(s"'$value' year to month", result) @@ -707,7 +707,8 @@ class ExpressionParserSuite extends AnalysisTest { "10 9:8:7.123456789", "1 0:0:0", "-1 0:0:0", - "1 0:0:1") + "1 0:0:1", + "\t 1 0:0:1 ") datTimeValues.foreach { value => val result = Literal(IntervalUtils.fromDayTimeString(value)) checkIntervals(s"'$value' day to second", result) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala index 4943dc365d71..ba738eeebbcd 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala @@ -137,6 +137,15 @@ class IntervalUtilsSuite extends SparkFunSuite with SQLHelper { failFuncWithInvalidInput("99-15", "month 15 outside range", fromYearMonthString) failFuncWithInvalidInput("9a9-15", "Interval string does not match year-month format", fromYearMonthString) + + // whitespaces + assert(fromYearMonthString("99-10 ") === new CalendarInterval(99 * 12 + 10, 0, 0L)) + assert(fromYearMonthString("+99-10\t") === new CalendarInterval(99 * 12 + 10, 0, 0L)) + assert(fromYearMonthString("\t\t-8-10\t") === new CalendarInterval(-8 * 12 - 10, 0, 0L)) + failFuncWithInvalidInput("99\t-15", "Interval string does not match year-month format", + fromYearMonthString) + failFuncWithInvalidInput("-\t99-15", "Interval string does not match year-month format", + fromYearMonthString) } test("from day-time string - legacy") { @@ -312,6 +321,11 @@ class IntervalUtilsSuite extends SparkFunSuite with SQLHelper { checkFail("5 30:12:20", DAY, SECOND, "hour 30 outside range") checkFail("5 30-12", DAY, SECOND, "must match day-time format") checkFail("5 1:12:20", HOUR, MICROSECOND, "Cannot support (interval") + + // whitespaces + check("\t +5 12:40\t ", DAY, MINUTE, "5 days 12 hours 40 minutes") + checkFail("+5\t 12:40", DAY, MINUTE, "must match day-time format") + } test("interval overflow check") { diff --git a/sql/core/src/test/resources/sql-tests/inputs/interval.sql b/sql/core/src/test/resources/sql-tests/inputs/interval.sql index 4f26e75fa77f..fec11b4a9038 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/interval.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/interval.sql @@ -153,6 +153,10 @@ select interval 'interval \t 1\tday'; select interval 'interval\t1\tday'; select interval '1\t' day; select interval '1 ' day; +select interval '2-2\t' year to month; +select interval '-\t2-2\t' year to month; +select interval '\n0 12:34:46.789\t' day to second; +select interval '\n-\t10\t 12:34:46.789\t' day to second; -- interval overflow if (ansi) exception else NULL select -(a) from values (interval '-2147483648 months', interval '2147483647 months') t(a, b); diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out index 4a41dd669e1c..d4238c73e39e 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 85 +-- Number of queries: 89 -- !query @@ -804,6 +804,51 @@ struct 1 days +-- !query +select interval '2-2\t' year to month +-- !query schema +struct +-- !query output +2 years 2 months + + +-- !query +select interval '-\t2-2\t' year to month +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException + +Interval string does not match year-month format of 'y-m': - 2-2 (line 1, pos 16) + +== SQL == +select interval '-\t2-2\t' year to month +----------------^^^ + + +-- !query +select interval '\n0 12:34:46.789\t' day to second +-- !query schema +struct +-- !query output +12 hours 34 minutes 46.789 seconds + + +-- !query +select interval '\n-\t10\t 12:34:46.789\t' day to second +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException + +requirement failed: Interval string must match day-time format of '^(?[+|-])?(?\d+) (?\d{1,2}):(?\d{1,2}):(?(\d{1,2})(\.(\d{1,9}))?)$': +- 10 12:34:46.789 , set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0.(line 1, pos 16) + +== SQL == +select interval '\n-\t10\t 12:34:46.789\t' day to second +----------------^^^ + + -- !query select -(a) from values (interval '-2147483648 months', interval '2147483647 months') t(a, b) -- !query schema diff --git a/sql/core/src/test/resources/sql-tests/results/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/interval.sql.out index f1af335f2c78..7a3dd74b33bb 100644 --- a/sql/core/src/test/resources/sql-tests/results/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/interval.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 85 +-- Number of queries: 89 -- !query @@ -783,6 +783,51 @@ struct 1 days +-- !query +select interval '2-2\t' year to month +-- !query schema +struct +-- !query output +2 years 2 months + + +-- !query +select interval '-\t2-2\t' year to month +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException + +Interval string does not match year-month format of 'y-m': - 2-2 (line 1, pos 16) + +== SQL == +select interval '-\t2-2\t' year to month +----------------^^^ + + +-- !query +select interval '\n0 12:34:46.789\t' day to second +-- !query schema +struct +-- !query output +12 hours 34 minutes 46.789 seconds + + +-- !query +select interval '\n-\t10\t 12:34:46.789\t' day to second +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException + +requirement failed: Interval string must match day-time format of '^(?[+|-])?(?\d+) (?\d{1,2}):(?\d{1,2}):(?(\d{1,2})(\.(\d{1,9}))?)$': +- 10 12:34:46.789 , set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0.(line 1, pos 16) + +== SQL == +select interval '\n-\t10\t 12:34:46.789\t' day to second +----------------^^^ + + -- !query select -(a) from values (interval '-2147483648 months', interval '2147483647 months') t(a, b) -- !query schema