Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,7 @@ object IntervalUtils {
s"Error parsing interval year-month string: ${e.getMessage}", e)
}
}
assert(input.length == input.trim.length)
input match {
input.trim match {
case yearMonthPattern("-", yearStr, monthStr) =>
negateExact(toInterval(yearStr, monthStr))
case yearMonthPattern(_, yearStr, monthStr) =>
Expand Down Expand Up @@ -300,7 +299,7 @@ object IntervalUtils {
val regexp = dayTimePattern.get(from -> to)
require(regexp.isDefined, s"Cannot support (interval '$input' $from to $to) expression")
val pattern = regexp.get.pattern
val m = pattern.matcher(input)
val m = pattern.matcher(input.trim)
require(m.matches, s"Interval string must match day-time format of '$pattern': $input, " +
s"$fallbackNotice")
var micros: Long = 0L
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -694,7 +694,7 @@ class ExpressionParserSuite extends AnalysisTest {
intercept("interval 10 nanoseconds", "invalid unit 'nanoseconds'")

// Year-Month intervals.
val yearMonthValues = Seq("123-10", "496-0", "-2-3", "-123-0")
val yearMonthValues = Seq("123-10", "496-0", "-2-3", "-123-0", "\t -1-2\t")
yearMonthValues.foreach { value =>
val result = Literal(IntervalUtils.fromYearMonthString(value))
checkIntervals(s"'$value' year to month", result)
Expand All @@ -707,7 +707,8 @@ class ExpressionParserSuite extends AnalysisTest {
"10 9:8:7.123456789",
"1 0:0:0",
"-1 0:0:0",
"1 0:0:1")
"1 0:0:1",
"\t 1 0:0:1 ")
datTimeValues.foreach { value =>
val result = Literal(IntervalUtils.fromDayTimeString(value))
checkIntervals(s"'$value' day to second", result)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,15 @@ class IntervalUtilsSuite extends SparkFunSuite with SQLHelper {
failFuncWithInvalidInput("99-15", "month 15 outside range", fromYearMonthString)
failFuncWithInvalidInput("9a9-15", "Interval string does not match year-month format",
fromYearMonthString)

// whitespaces
assert(fromYearMonthString("99-10 ") === new CalendarInterval(99 * 12 + 10, 0, 0L))
assert(fromYearMonthString("+99-10\t") === new CalendarInterval(99 * 12 + 10, 0, 0L))
assert(fromYearMonthString("\t\t-8-10\t") === new CalendarInterval(-8 * 12 - 10, 0, 0L))
failFuncWithInvalidInput("99\t-15", "Interval string does not match year-month format",
fromYearMonthString)
failFuncWithInvalidInput("-\t99-15", "Interval string does not match year-month format",
fromYearMonthString)
}

test("from day-time string - legacy") {
Expand Down Expand Up @@ -312,6 +321,11 @@ class IntervalUtilsSuite extends SparkFunSuite with SQLHelper {
checkFail("5 30:12:20", DAY, SECOND, "hour 30 outside range")
checkFail("5 30-12", DAY, SECOND, "must match day-time format")
checkFail("5 1:12:20", HOUR, MICROSECOND, "Cannot support (interval")

// whitespaces
check("\t +5 12:40\t ", DAY, MINUTE, "5 days 12 hours 40 minutes")
checkFail("+5\t 12:40", DAY, MINUTE, "must match day-time format")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for multi-unit syntax, do we support 1 day \t 2 hours?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes. My opinion is that the interval string in multi-unit syntax, we parse each value and each unit separately and they can be produced by the providers separately too so the whitespaces can be in the middle of each part. In year-month and day-time syntax, the string should be considered as a whole part so the whitespaces only appear on both ends, not in the middle.

take java.sql.Timestamp for an example, the timestamp string is treated as a whole

scala> java.sql.Timestamp.valueOf("2011-11-11 11:11:11.11\t")
res8: java.sql.Timestamp = 2011-11-11 11:11:11.11

scala> java.sql.Timestamp.valueOf("2011-11-11 \t 11:11:11.11\t")
java.lang.NumberFormatException: For input string: "	 11"
  at java.lang.NumberFormatException.forInputString(NumberFormatException.java:65)
  at java.lang.Integer.parseInt(Integer.java:569)
  at java.lang.Integer.parseInt(Integer.java:615)
  at java.sql.Timestamp.valueOf(Timestamp.java:243)
  ... 28 elided

Also, we do the same in Spark

spark-sql> select timestamp '2013-01-02 \t 00:00:00';
Error in query:
Cannot parse the TIMESTAMP value: 2013-01-02 	 00:00:00(line 1, pos 7)

== SQL ==
select timestamp '2013-01-02 \t 00:00:00'
-------^^^

spark-sql> select timestamp '2013-01-02 00:00:00 \t';
2013-01-02 00:00:00


}

test("interval overflow check") {
Expand Down
4 changes: 4 additions & 0 deletions sql/core/src/test/resources/sql-tests/inputs/interval.sql
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,10 @@ select interval 'interval \t 1\tday';
select interval 'interval\t1\tday';
select interval '1\t' day;
select interval '1 ' day;
select interval '2-2\t' year to month;
select interval '-\t2-2\t' year to month;
select interval '\n0 12:34:46.789\t' day to second;
select interval '\n-\t10\t 12:34:46.789\t' day to second;

-- interval overflow if (ansi) exception else NULL
select -(a) from values (interval '-2147483648 months', interval '2147483647 months') t(a, b);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 85
-- Number of queries: 89


-- !query
Expand Down Expand Up @@ -804,6 +804,51 @@ struct<INTERVAL '1 days':interval>
1 days


-- !query
select interval '2-2\t' year to month
-- !query schema
struct<INTERVAL '2 years 2 months':interval>
-- !query output
2 years 2 months


-- !query
select interval '-\t2-2\t' year to month
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.catalyst.parser.ParseException

Interval string does not match year-month format of 'y-m': - 2-2 (line 1, pos 16)

== SQL ==
select interval '-\t2-2\t' year to month
----------------^^^


-- !query
select interval '\n0 12:34:46.789\t' day to second
-- !query schema
struct<INTERVAL '12 hours 34 minutes 46.789 seconds':interval>
-- !query output
12 hours 34 minutes 46.789 seconds


-- !query
select interval '\n-\t10\t 12:34:46.789\t' day to second
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.catalyst.parser.ParseException

requirement failed: Interval string must match day-time format of '^(?<sign>[+|-])?(?<day>\d+) (?<hour>\d{1,2}):(?<minute>\d{1,2}):(?<second>(\d{1,2})(\.(\d{1,9}))?)$':
- 10 12:34:46.789 , set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0.(line 1, pos 16)
Copy link
Contributor

@cloud-fan cloud-fan Mar 10, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not related to this PR, but we should only ask users to set the legacy config if it works.


== SQL ==
select interval '\n-\t10\t 12:34:46.789\t' day to second
----------------^^^


-- !query
select -(a) from values (interval '-2147483648 months', interval '2147483647 months') t(a, b)
-- !query schema
Expand Down
47 changes: 46 additions & 1 deletion sql/core/src/test/resources/sql-tests/results/interval.sql.out
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not related to this PR: we should put the test queries that have different behaviors if ansi mode is on to a different file, and only test that file with ansi mode on and off.

-- Number of queries: 85
-- Number of queries: 89


-- !query
Expand Down Expand Up @@ -783,6 +783,51 @@ struct<INTERVAL '1 days':interval>
1 days


-- !query
select interval '2-2\t' year to month
-- !query schema
struct<INTERVAL '2 years 2 months':interval>
-- !query output
2 years 2 months


-- !query
select interval '-\t2-2\t' year to month
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.catalyst.parser.ParseException

Interval string does not match year-month format of 'y-m': - 2-2 (line 1, pos 16)

== SQL ==
select interval '-\t2-2\t' year to month
----------------^^^


-- !query
select interval '\n0 12:34:46.789\t' day to second
-- !query schema
struct<INTERVAL '12 hours 34 minutes 46.789 seconds':interval>
-- !query output
12 hours 34 minutes 46.789 seconds


-- !query
select interval '\n-\t10\t 12:34:46.789\t' day to second
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.catalyst.parser.ParseException

requirement failed: Interval string must match day-time format of '^(?<sign>[+|-])?(?<day>\d+) (?<hour>\d{1,2}):(?<minute>\d{1,2}):(?<second>(\d{1,2})(\.(\d{1,9}))?)$':
- 10 12:34:46.789 , set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0.(line 1, pos 16)

== SQL ==
select interval '\n-\t10\t 12:34:46.789\t' day to second
----------------^^^


-- !query
select -(a) from values (interval '-2147483648 months', interval '2147483647 months') t(a, b)
-- !query schema
Expand Down