Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ import org.apache.spark.sql.catalyst.plans._
import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.sql.catalyst.util.DateTimeUtils.{getZoneId, stringToDate, stringToTimestamp}
import org.apache.spark.sql.catalyst.util.IntervalUtils
import org.apache.spark.sql.catalyst.util.IntervalUtils.UnitName
import org.apache.spark.sql.connector.expressions.{ApplyTransform, BucketTransform, DaysTransform, Expression => V2Expression, FieldReference, HoursTransform, IdentityTransform, LiteralValue, MonthsTransform, Transform, YearsTransform}
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._
Expand Down Expand Up @@ -1967,17 +1968,17 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
case ("year", Some("month")) =>
IntervalUtils.fromYearMonthString(s)
case ("day", Some("hour")) =>
IntervalUtils.fromDayTimeString(s, "day", "hour")
IntervalUtils.fromDayTimeString(s, UnitName.day, UnitName.hour)
case ("day", Some("minute")) =>
IntervalUtils.fromDayTimeString(s, "day", "minute")
IntervalUtils.fromDayTimeString(s, UnitName.day, UnitName.minute)
case ("day", Some("second")) =>
IntervalUtils.fromDayTimeString(s, "day", "second")
IntervalUtils.fromDayTimeString(s, UnitName.day, UnitName.second)
case ("hour", Some("minute")) =>
IntervalUtils.fromDayTimeString(s, "hour", "minute")
IntervalUtils.fromDayTimeString(s, UnitName.hour, UnitName.minute)
case ("hour", Some("second")) =>
IntervalUtils.fromDayTimeString(s, "hour", "second")
IntervalUtils.fromDayTimeString(s, UnitName.hour, UnitName.second)
case ("minute", Some("second")) =>
IntervalUtils.fromDayTimeString(s, "minute", "second")
IntervalUtils.fromDayTimeString(s, UnitName.minute, UnitName.second)
case (from, Some(t)) =>
throw new ParseException(s"Intervals FROM $from TO $t are not supported.", ctx)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -175,11 +175,38 @@ object IntervalUtils {
* adapted from HiveIntervalDayTime.valueOf
*/
def fromDayTimeString(s: String): CalendarInterval = {
fromDayTimeString(s, "day", "second")
fromDayTimeString(s, UnitName.day, UnitName.second)
}

private val dayTimePattern =
"^([+|-])?((\\d+) )?((\\d+):)?(\\d+):(\\d+)(\\.(\\d+))?$".r
object UnitName extends Enumeration {
val microsecond = Value(0, "microsecond")
val millisecond = Value(1, "millisecond")
val second = Value(2, "second")
val minute = Value(3, "minute")
val hour = Value(4, "hour")
val day = Value(5, "day")
val week = Value(6, "week")
val month = Value(7, "month")
val year = Value(8, "year")
}

val unitValueProps: Map[UnitName.Value, (Long, Long, Long => Long)] = Map(
UnitName.minute -> (0, 59, Math.multiplyExact(_, MICROS_PER_MINUTE)),
UnitName.hour -> (0, 23, Math.multiplyExact(_, MICROS_PER_HOUR)),
UnitName.day -> (0, Integer.MAX_VALUE, Math.multiplyExact(_, DateTimeUtils.MICROS_PER_DAY))
)

private val signRe = "(?<sign>[+|-])"
private val dayRe = "((?<day>\\d+)\\s+)"
private val hourRe = "(?<hour>\\d{1,2}+)"
private val minuteRe = "(?<minute>\\d{1,2}+)"
private val secondRe = "(?<second>(\\d{1,2}+)(\\.(\\d{1,9}+))?)"
private val minsecRe = (s"^$signRe?$dayRe?($hourRe:)?$minuteRe:$secondRe$$").r
private val daysecRe = (s"^$signRe?$dayRe?$hourRe(:$minuteRe(:$secondRe)?)?$$").r

private def unitsRange(start: UnitName.Value, end: UnitName.Value): Seq[UnitName.Value] = {
(start.id to end.id).map(UnitName(_))
}

/**
* Parse dayTime string in form: [-]d HH:mm:ss.nnnnnnnnn and [-]HH:mm:ss.nnnnnnnnn
Expand All @@ -190,54 +217,34 @@ object IntervalUtils {
* - HOUR TO (MINUTE|SECOND)
* - MINUTE TO SECOND
*/
def fromDayTimeString(input: String, from: String, to: String): CalendarInterval = {
def fromDayTimeString(
input: String,
from: UnitName.Value,
to: UnitName.Value): CalendarInterval = {
require(input != null, "Interval day-time string must be not null")
assert(input.length == input.trim.length)
val m = dayTimePattern.pattern.matcher(input)
require(m.matches, s"Interval string must match day-time format of 'd h:m:s.n': $input")
val pattern = (from, to) match {
case (UnitName.minute, UnitName.second) => minsecRe.pattern
case _ => daysecRe.pattern
}
val m = pattern.matcher(input)
require(m.matches, s"Interval string must match day-time format of '$pattern': $input")

def toLong(unitName: UnitName.Value): Long = {
val name = unitName.toString
val (minValue, maxValue, toMicros) = unitValueProps(unitName)
toMicros(toLongWithRange(name, m.group(name), minValue, maxValue))
}

try {
val sign = if (m.group(1) != null && m.group(1) == "-") -1 else 1
val days = if (m.group(2) == null) {
0
} else {
toLongWithRange("day", m.group(3), 0, Integer.MAX_VALUE)
}
var hours: Long = 0L
var minutes: Long = 0L
var seconds: Long = 0L
if (m.group(5) != null || from == "minute") { // 'HH:mm:ss' or 'mm:ss minute'
hours = toLongWithRange("hour", m.group(5), 0, 23)
minutes = toLongWithRange("minute", m.group(6), 0, 59)
seconds = toLongWithRange("second", m.group(7), 0, 59)
} else if (m.group(8) != null) { // 'mm:ss.nn'
minutes = toLongWithRange("minute", m.group(6), 0, 59)
seconds = toLongWithRange("second", m.group(7), 0, 59)
} else { // 'HH:mm'
hours = toLongWithRange("hour", m.group(6), 0, 23)
minutes = toLongWithRange("second", m.group(7), 0, 59)
}
// Hive allow nanosecond precision interval
var secondsFraction = parseNanos(m.group(9), seconds < 0)
to match {
case "hour" =>
minutes = 0
seconds = 0
secondsFraction = 0
case "minute" =>
seconds = 0
secondsFraction = 0
case "second" =>
// No-op
val micros = unitsRange(to, from).map {
case name @ (UnitName.day | UnitName.hour | UnitName.minute) => toLong(name)
case UnitName.second => parseSecondNano(m.group(UnitName.second.toString))
case _ =>
throw new IllegalArgumentException(
s"Cannot support (interval '$input' $from to $to) expression")
}
var micros = secondsFraction
micros = Math.addExact(micros, Math.multiplyExact(days, DateTimeUtils.MICROS_PER_DAY))
micros = Math.addExact(micros, Math.multiplyExact(hours, MICROS_PER_HOUR))
micros = Math.addExact(micros, Math.multiplyExact(minutes, MICROS_PER_MINUTE))
micros = Math.addExact(micros, Math.multiplyExact(seconds, DateTimeUtils.MICROS_PER_SECOND))
}.reduce((x: Long, y: Long) => Math.addExact(x, y))
val sign = if (m.group("sign") != null && m.group("sign") == "-") -1 else 1
new CalendarInterval(0, sign * micros)
} catch {
case e: Exception =>
Expand Down Expand Up @@ -314,6 +321,7 @@ object IntervalUtils {
Long.MaxValue / DateTimeUtils.MICROS_PER_SECOND) * DateTimeUtils.MICROS_PER_SECOND
}

if (secondNano == null) return 0L
secondNano.split("\\.") match {
case Array(secondsStr) => parseSeconds(secondsStr)
case Array("", nanosStr) => parseNanos(nanosStr, false)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.util
import java.util.concurrent.TimeUnit

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.util.IntervalUtils.{fromDayTimeString, fromString, fromYearMonthString}
import org.apache.spark.sql.catalyst.util.IntervalUtils.{fromDayTimeString, fromString, fromYearMonthString, UnitName}
import org.apache.spark.unsafe.types.CalendarInterval
import org.apache.spark.unsafe.types.CalendarInterval._

Expand Down Expand Up @@ -143,7 +143,7 @@ class IntervalUtilsSuite extends SparkFunSuite {
}

try {
fromDayTimeString("5 1:12:20", "hour", "microsecond")
fromDayTimeString("5 1:12:20", UnitName.hour, UnitName.microsecond)
fail("Expected to throw an exception for the invalid convention type")
} catch {
case e: IllegalArgumentException =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -149,46 +149,46 @@ interval 1 days 2 hours 3 minutes 4 seconds
-- !query 18
SELECT interval '1 2:03' hour to minute
-- !query 18 schema
struct<interval 1 days 2 hours 3 minutes:interval>
struct<interval 2 hours 3 minutes:interval>
-- !query 18 output
interval 1 days 2 hours 3 minutes
interval 2 hours 3 minutes


-- !query 19
SELECT interval '1 2:03:04' hour to minute
-- !query 19 schema
struct<interval 1 days 2 hours 3 minutes:interval>
struct<interval 2 hours 3 minutes:interval>
-- !query 19 output
interval 1 days 2 hours 3 minutes
interval 2 hours 3 minutes


-- !query 20
SELECT interval '1 2:03' hour to second
-- !query 20 schema
struct<interval 1 days 2 hours 3 minutes:interval>
struct<interval 2 hours 3 minutes:interval>
-- !query 20 output
interval 1 days 2 hours 3 minutes
interval 2 hours 3 minutes


-- !query 21
SELECT interval '1 2:03:04' hour to second
-- !query 21 schema
struct<interval 1 days 2 hours 3 minutes 4 seconds:interval>
struct<interval 2 hours 3 minutes 4 seconds:interval>
-- !query 21 output
interval 1 days 2 hours 3 minutes 4 seconds
interval 2 hours 3 minutes 4 seconds


-- !query 22
SELECT interval '1 2:03' minute to second
-- !query 22 schema
struct<interval 1 days 2 minutes 3 seconds:interval>
struct<interval 2 minutes 3 seconds:interval>
-- !query 22 output
interval 1 days 2 minutes 3 seconds
interval 2 minutes 3 seconds


-- !query 23
SELECT interval '1 2:03:04' minute to second
-- !query 23 schema
struct<interval 1 days 2 hours 3 minutes 4 seconds:interval>
struct<interval 3 minutes 4 seconds:interval>
-- !query 23 output
interval 1 days 2 hours 3 minutes 4 seconds
interval 3 minutes 4 seconds