diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 911a5b3aa36a..a93e92788b6a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -38,6 +38,7 @@ import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.util.DateTimeUtils.{getZoneId, stringToDate, stringToTimestamp} import org.apache.spark.sql.catalyst.util.IntervalUtils +import org.apache.spark.sql.catalyst.util.IntervalUtils.UnitName import org.apache.spark.sql.connector.expressions.{ApplyTransform, BucketTransform, DaysTransform, Expression => V2Expression, FieldReference, HoursTransform, IdentityTransform, LiteralValue, MonthsTransform, Transform, YearsTransform} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ @@ -1967,17 +1968,17 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging case ("year", Some("month")) => IntervalUtils.fromYearMonthString(s) case ("day", Some("hour")) => - IntervalUtils.fromDayTimeString(s, "day", "hour") + IntervalUtils.fromDayTimeString(s, UnitName.day, UnitName.hour) case ("day", Some("minute")) => - IntervalUtils.fromDayTimeString(s, "day", "minute") + IntervalUtils.fromDayTimeString(s, UnitName.day, UnitName.minute) case ("day", Some("second")) => - IntervalUtils.fromDayTimeString(s, "day", "second") + IntervalUtils.fromDayTimeString(s, UnitName.day, UnitName.second) case ("hour", Some("minute")) => - IntervalUtils.fromDayTimeString(s, "hour", "minute") + IntervalUtils.fromDayTimeString(s, UnitName.hour, UnitName.minute) case ("hour", Some("second")) => - IntervalUtils.fromDayTimeString(s, "hour", "second") + IntervalUtils.fromDayTimeString(s, UnitName.hour, UnitName.second) case ("minute", Some("second")) => - IntervalUtils.fromDayTimeString(s, "minute", "second") + IntervalUtils.fromDayTimeString(s, UnitName.minute, UnitName.second) case (from, Some(t)) => throw new ParseException(s"Intervals FROM $from TO $t are not supported.", ctx) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala index 90e2402a5d7d..6412a67c5eba 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala @@ -175,11 +175,38 @@ object IntervalUtils { * adapted from HiveIntervalDayTime.valueOf */ def fromDayTimeString(s: String): CalendarInterval = { - fromDayTimeString(s, "day", "second") + fromDayTimeString(s, UnitName.day, UnitName.second) } - private val dayTimePattern = - "^([+|-])?((\\d+) )?((\\d+):)?(\\d+):(\\d+)(\\.(\\d+))?$".r + object UnitName extends Enumeration { + val microsecond = Value(0, "microsecond") + val millisecond = Value(1, "millisecond") + val second = Value(2, "second") + val minute = Value(3, "minute") + val hour = Value(4, "hour") + val day = Value(5, "day") + val week = Value(6, "week") + val month = Value(7, "month") + val year = Value(8, "year") + } + + val unitValueProps: Map[UnitName.Value, (Long, Long, Long => Long)] = Map( + UnitName.minute -> (0, 59, Math.multiplyExact(_, MICROS_PER_MINUTE)), + UnitName.hour -> (0, 23, Math.multiplyExact(_, MICROS_PER_HOUR)), + UnitName.day -> (0, Integer.MAX_VALUE, Math.multiplyExact(_, DateTimeUtils.MICROS_PER_DAY)) + ) + + private val signRe = "(?[+|-])" + private val dayRe = "((?\\d+)\\s+)" + private val hourRe = "(?\\d{1,2}+)" + private val minuteRe = "(?\\d{1,2}+)" + private val secondRe = "(?(\\d{1,2}+)(\\.(\\d{1,9}+))?)" + private val minsecRe = (s"^$signRe?$dayRe?($hourRe:)?$minuteRe:$secondRe$$").r + private val daysecRe = (s"^$signRe?$dayRe?$hourRe(:$minuteRe(:$secondRe)?)?$$").r + + private def unitsRange(start: UnitName.Value, end: UnitName.Value): Seq[UnitName.Value] = { + (start.id to end.id).map(UnitName(_)) + } /** * Parse dayTime string in form: [-]d HH:mm:ss.nnnnnnnnn and [-]HH:mm:ss.nnnnnnnnn @@ -190,54 +217,34 @@ object IntervalUtils { * - HOUR TO (MINUTE|SECOND) * - MINUTE TO SECOND */ - def fromDayTimeString(input: String, from: String, to: String): CalendarInterval = { + def fromDayTimeString( + input: String, + from: UnitName.Value, + to: UnitName.Value): CalendarInterval = { require(input != null, "Interval day-time string must be not null") assert(input.length == input.trim.length) - val m = dayTimePattern.pattern.matcher(input) - require(m.matches, s"Interval string must match day-time format of 'd h:m:s.n': $input") + val pattern = (from, to) match { + case (UnitName.minute, UnitName.second) => minsecRe.pattern + case _ => daysecRe.pattern + } + val m = pattern.matcher(input) + require(m.matches, s"Interval string must match day-time format of '$pattern': $input") + + def toLong(unitName: UnitName.Value): Long = { + val name = unitName.toString + val (minValue, maxValue, toMicros) = unitValueProps(unitName) + toMicros(toLongWithRange(name, m.group(name), minValue, maxValue)) + } try { - val sign = if (m.group(1) != null && m.group(1) == "-") -1 else 1 - val days = if (m.group(2) == null) { - 0 - } else { - toLongWithRange("day", m.group(3), 0, Integer.MAX_VALUE) - } - var hours: Long = 0L - var minutes: Long = 0L - var seconds: Long = 0L - if (m.group(5) != null || from == "minute") { // 'HH:mm:ss' or 'mm:ss minute' - hours = toLongWithRange("hour", m.group(5), 0, 23) - minutes = toLongWithRange("minute", m.group(6), 0, 59) - seconds = toLongWithRange("second", m.group(7), 0, 59) - } else if (m.group(8) != null) { // 'mm:ss.nn' - minutes = toLongWithRange("minute", m.group(6), 0, 59) - seconds = toLongWithRange("second", m.group(7), 0, 59) - } else { // 'HH:mm' - hours = toLongWithRange("hour", m.group(6), 0, 23) - minutes = toLongWithRange("second", m.group(7), 0, 59) - } - // Hive allow nanosecond precision interval - var secondsFraction = parseNanos(m.group(9), seconds < 0) - to match { - case "hour" => - minutes = 0 - seconds = 0 - secondsFraction = 0 - case "minute" => - seconds = 0 - secondsFraction = 0 - case "second" => - // No-op + val micros = unitsRange(to, from).map { + case name @ (UnitName.day | UnitName.hour | UnitName.minute) => toLong(name) + case UnitName.second => parseSecondNano(m.group(UnitName.second.toString)) case _ => throw new IllegalArgumentException( s"Cannot support (interval '$input' $from to $to) expression") - } - var micros = secondsFraction - micros = Math.addExact(micros, Math.multiplyExact(days, DateTimeUtils.MICROS_PER_DAY)) - micros = Math.addExact(micros, Math.multiplyExact(hours, MICROS_PER_HOUR)) - micros = Math.addExact(micros, Math.multiplyExact(minutes, MICROS_PER_MINUTE)) - micros = Math.addExact(micros, Math.multiplyExact(seconds, DateTimeUtils.MICROS_PER_SECOND)) + }.reduce((x: Long, y: Long) => Math.addExact(x, y)) + val sign = if (m.group("sign") != null && m.group("sign") == "-") -1 else 1 new CalendarInterval(0, sign * micros) } catch { case e: Exception => @@ -314,6 +321,7 @@ object IntervalUtils { Long.MaxValue / DateTimeUtils.MICROS_PER_SECOND) * DateTimeUtils.MICROS_PER_SECOND } + if (secondNano == null) return 0L secondNano.split("\\.") match { case Array(secondsStr) => parseSeconds(secondsStr) case Array("", nanosStr) => parseNanos(nanosStr, false) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala index 22944035f31d..110885788b72 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.util import java.util.concurrent.TimeUnit import org.apache.spark.SparkFunSuite -import org.apache.spark.sql.catalyst.util.IntervalUtils.{fromDayTimeString, fromString, fromYearMonthString} +import org.apache.spark.sql.catalyst.util.IntervalUtils.{fromDayTimeString, fromString, fromYearMonthString, UnitName} import org.apache.spark.unsafe.types.CalendarInterval import org.apache.spark.unsafe.types.CalendarInterval._ @@ -143,7 +143,7 @@ class IntervalUtilsSuite extends SparkFunSuite { } try { - fromDayTimeString("5 1:12:20", "hour", "microsecond") + fromDayTimeString("5 1:12:20", UnitName.hour, UnitName.microsecond) fail("Expected to throw an exception for the invalid convention type") } catch { case e: IllegalArgumentException => diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/interval.sql.out index bed5d7a56c1f..7b210e3b7c71 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/interval.sql.out @@ -149,46 +149,46 @@ interval 1 days 2 hours 3 minutes 4 seconds -- !query 18 SELECT interval '1 2:03' hour to minute -- !query 18 schema -struct +struct -- !query 18 output -interval 1 days 2 hours 3 minutes +interval 2 hours 3 minutes -- !query 19 SELECT interval '1 2:03:04' hour to minute -- !query 19 schema -struct +struct -- !query 19 output -interval 1 days 2 hours 3 minutes +interval 2 hours 3 minutes -- !query 20 SELECT interval '1 2:03' hour to second -- !query 20 schema -struct +struct -- !query 20 output -interval 1 days 2 hours 3 minutes +interval 2 hours 3 minutes -- !query 21 SELECT interval '1 2:03:04' hour to second -- !query 21 schema -struct +struct -- !query 21 output -interval 1 days 2 hours 3 minutes 4 seconds +interval 2 hours 3 minutes 4 seconds -- !query 22 SELECT interval '1 2:03' minute to second -- !query 22 schema -struct +struct -- !query 22 output -interval 1 days 2 minutes 3 seconds +interval 2 minutes 3 seconds -- !query 23 SELECT interval '1 2:03:04' minute to second -- !query 23 schema -struct +struct -- !query 23 output -interval 1 days 2 hours 3 minutes 4 seconds +interval 3 minutes 4 seconds