Skip to content
Closed
Original file line number Diff line number Diff line change
Expand Up @@ -425,14 +425,18 @@ object IntervalUtils {
}

private object ParseState extends Enumeration {
type ParseState = Value

val PREFIX,
BEGIN_VALUE,
PARSE_SIGN,
PARSE_UNIT_VALUE,
FRACTIONAL_PART,
BEGIN_UNIT_NAME,
UNIT_NAME_SUFFIX,
END_UNIT_NAME = Value
TRIM_BEFORE_SIGN,
SIGN,
TRIM_BEFORE_VALUE,
VALUE,
VALUE_FRACTIONAL_PART,
TRIM_BEFORE_UNIT,
UNIT_BEGIN,
UNIT_SUFFIX,
UNIT_END = Value
}
private final val intervalStr = UTF8String.fromString("interval ")
private def unitToUtf8(unit: IntervalUnit): UTF8String = {
Expand All @@ -458,7 +462,7 @@ object IntervalUtils {
val s = input.trim.toLowerCase
// scalastyle:on
val bytes = s.getBytes
if (bytes.length == 0) {
if (bytes.isEmpty) {
return null
}
var state = PREFIX
Expand All @@ -471,6 +475,13 @@ object IntervalUtils {
var fractionScale: Int = 0
var fraction: Int = 0

def trimToNextState(b: Byte, next: ParseState): Unit = {
b match {
case ' ' => i += 1
case _ => state = next
}
}

while (i < bytes.length) {
val b = bytes(i)
state match {
Expand All @@ -482,13 +493,9 @@ object IntervalUtils {
i += intervalStr.numBytes()
}
}
state = BEGIN_VALUE
case BEGIN_VALUE =>
b match {
case ' ' => i += 1
case _ => state = PARSE_SIGN
}
case PARSE_SIGN =>
state = TRIM_BEFORE_SIGN
case TRIM_BEFORE_SIGN => trimToNextState(b, SIGN)
case SIGN =>
b match {
case '-' =>
isNegative = true
Expand All @@ -505,111 +512,108 @@ object IntervalUtils {
// Sets the scale to an invalid value to track fraction presence
// in the BEGIN_UNIT_NAME state
fractionScale = -1
state = PARSE_UNIT_VALUE
case PARSE_UNIT_VALUE =>
state = TRIM_BEFORE_VALUE
case TRIM_BEFORE_VALUE => trimToNextState(b, VALUE)
case VALUE =>
b match {
case _ if '0' <= b && b <= '9' =>
try {
currentValue = Math.addExact(Math.multiplyExact(10, currentValue), (b - '0'))
} catch {
case _: ArithmeticException => return null
}
case ' ' =>
state = BEGIN_UNIT_NAME
case ' ' => state = TRIM_BEFORE_UNIT
case '.' =>
fractionScale = (NANOS_PER_SECOND / 10).toInt
state = FRACTIONAL_PART
state = VALUE_FRACTIONAL_PART
case _ => return null
}
i += 1
case FRACTIONAL_PART =>
case VALUE_FRACTIONAL_PART =>
b match {
case _ if '0' <= b && b <= '9' && fractionScale > 0 =>
fraction += (b - '0') * fractionScale
fractionScale /= 10
case ' ' =>
fraction /= NANOS_PER_MICROS.toInt
state = BEGIN_UNIT_NAME
state = TRIM_BEFORE_UNIT
case _ => return null
}
i += 1
case BEGIN_UNIT_NAME =>
if (b == ' ') {
i += 1
} else {
// Checks that only seconds can have the fractional part
if (b != 's' && fractionScale >= 0) {
return null
}
if (isNegative) {
currentValue = -currentValue
fraction = -fraction
}
try {
b match {
case 'y' if s.matchAt(yearStr, i) =>
val monthsInYears = Math.multiplyExact(MONTHS_PER_YEAR, currentValue)
months = Math.toIntExact(Math.addExact(months, monthsInYears))
i += yearStr.numBytes()
case 'w' if s.matchAt(weekStr, i) =>
val daysInWeeks = Math.multiplyExact(DAYS_PER_WEEK, currentValue)
days = Math.toIntExact(Math.addExact(days, daysInWeeks))
i += weekStr.numBytes()
case 'd' if s.matchAt(dayStr, i) =>
days = Math.addExact(days, Math.toIntExact(currentValue))
i += dayStr.numBytes()
case 'h' if s.matchAt(hourStr, i) =>
val hoursUs = Math.multiplyExact(currentValue, MICROS_PER_HOUR)
microseconds = Math.addExact(microseconds, hoursUs)
i += hourStr.numBytes()
case 's' if s.matchAt(secondStr, i) =>
val secondsUs = Math.multiplyExact(currentValue, MICROS_PER_SECOND)
microseconds = Math.addExact(Math.addExact(microseconds, secondsUs), fraction)
i += secondStr.numBytes()
case 'm' =>
if (s.matchAt(monthStr, i)) {
months = Math.addExact(months, Math.toIntExact(currentValue))
i += monthStr.numBytes()
} else if (s.matchAt(minuteStr, i)) {
val minutesUs = Math.multiplyExact(currentValue, MICROS_PER_MINUTE)
microseconds = Math.addExact(microseconds, minutesUs)
i += minuteStr.numBytes()
} else if (s.matchAt(millisStr, i)) {
val millisUs = Math.multiplyExact(
currentValue,
MICROS_PER_MILLIS)
microseconds = Math.addExact(microseconds, millisUs)
i += millisStr.numBytes()
} else if (s.matchAt(microsStr, i)) {
microseconds = Math.addExact(microseconds, currentValue)
i += microsStr.numBytes()
} else return null
case _ => return null
}
} catch {
case _: ArithmeticException => return null
case TRIM_BEFORE_UNIT => trimToNextState(b, UNIT_BEGIN)
case UNIT_BEGIN =>
// Checks that only seconds can have the fractional part
if (b != 's' && fractionScale >= 0) {
return null
}
if (isNegative) {
currentValue = -currentValue
fraction = -fraction
}
try {
b match {
case 'y' if s.matchAt(yearStr, i) =>
val monthsInYears = Math.multiplyExact(MONTHS_PER_YEAR, currentValue)
months = Math.toIntExact(Math.addExact(months, monthsInYears))
i += yearStr.numBytes()
case 'w' if s.matchAt(weekStr, i) =>
val daysInWeeks = Math.multiplyExact(DAYS_PER_WEEK, currentValue)
days = Math.toIntExact(Math.addExact(days, daysInWeeks))
i += weekStr.numBytes()
case 'd' if s.matchAt(dayStr, i) =>
days = Math.addExact(days, Math.toIntExact(currentValue))
i += dayStr.numBytes()
case 'h' if s.matchAt(hourStr, i) =>
val hoursUs = Math.multiplyExact(currentValue, MICROS_PER_HOUR)
microseconds = Math.addExact(microseconds, hoursUs)
i += hourStr.numBytes()
case 's' if s.matchAt(secondStr, i) =>
val secondsUs = Math.multiplyExact(currentValue, MICROS_PER_SECOND)
microseconds = Math.addExact(Math.addExact(microseconds, secondsUs), fraction)
i += secondStr.numBytes()
case 'm' =>
if (s.matchAt(monthStr, i)) {
months = Math.addExact(months, Math.toIntExact(currentValue))
i += monthStr.numBytes()
} else if (s.matchAt(minuteStr, i)) {
val minutesUs = Math.multiplyExact(currentValue, MICROS_PER_MINUTE)
microseconds = Math.addExact(microseconds, minutesUs)
i += minuteStr.numBytes()
} else if (s.matchAt(millisStr, i)) {
val millisUs = Math.multiplyExact(
currentValue,
MICROS_PER_MILLIS)
microseconds = Math.addExact(microseconds, millisUs)
i += millisStr.numBytes()
} else if (s.matchAt(microsStr, i)) {
microseconds = Math.addExact(microseconds, currentValue)
i += microsStr.numBytes()
} else return null
case _ => return null
}
state = UNIT_NAME_SUFFIX
} catch {
case _: ArithmeticException => return null
}
case UNIT_NAME_SUFFIX =>
state = UNIT_SUFFIX
case UNIT_SUFFIX =>
b match {
case 's' => state = END_UNIT_NAME
case ' ' => state = BEGIN_VALUE
case 's' => state = UNIT_END
case ' ' => state = TRIM_BEFORE_SIGN
case _ => return null
}
i += 1
case END_UNIT_NAME =>
case UNIT_END =>
b match {
case ' ' =>
i += 1
state = BEGIN_VALUE
state = TRIM_BEFORE_SIGN
case _ => return null
}
}
}

val result = state match {
case UNIT_NAME_SUFFIX | END_UNIT_NAME | BEGIN_VALUE =>
case UNIT_SUFFIX | UNIT_END | TRIM_BEFORE_SIGN =>
new CalendarInterval(months, days, microseconds)
case _ => null
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ class IntervalUtilsSuite extends SparkFunSuite {
"-1 MONTH 1 day -1 microseconds" -> new CalendarInterval(-1, 1, -1),
" 123 MONTHS 123 DAYS 123 Microsecond " -> new CalendarInterval(123, 123, 123),
"interval -1 day +3 Microseconds" -> new CalendarInterval(0, -1, 3),
"interval - 1 day + 3 Microseconds" -> new CalendarInterval(0, -1, 3),
" interval 8 years -11 months 123 weeks -1 day " +
"23 hours -22 minutes 1 second -123 millisecond 567 microseconds " ->
new CalendarInterval(85, 860, 81480877567L)).foreach { case (input, expected) =>
Expand Down
48 changes: 26 additions & 22 deletions sql/core/benchmarks/IntervalBenchmark-jdk11-results.txt
Original file line number Diff line number Diff line change
@@ -1,25 +1,29 @@
OpenJDK 64-Bit Server VM 11.0.2+9 on Mac OS X 10.15.1
Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
Java HotSpot(TM) 64-Bit Server VM 11.0.5+10-LTS on Mac OS X 10.14.6
Intel(R) Core(TM) i5-5287U CPU @ 2.90GHz
cast strings to intervals: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
prepare string w/ interval 442 472 41 2.3 442.4 1.0X
prepare string w/o interval 420 423 6 2.4 419.6 1.1X
1 units w/ interval 350 359 9 2.9 349.8 1.3X
1 units w/o interval 316 317 1 3.2 316.4 1.4X
2 units w/ interval 457 459 2 2.2 457.0 1.0X
2 units w/o interval 432 435 3 2.3 432.2 1.0X
3 units w/ interval 610 613 3 1.6 609.8 0.7X
3 units w/o interval 581 583 2 1.7 580.5 0.8X
4 units w/ interval 720 724 4 1.4 720.4 0.6X
4 units w/o interval 699 704 8 1.4 699.4 0.6X
5 units w/ interval 850 850 0 1.2 849.9 0.5X
5 units w/o interval 829 832 5 1.2 828.7 0.5X
6 units w/ interval 927 932 4 1.1 927.1 0.5X
6 units w/o interval 891 892 1 1.1 890.5 0.5X
7 units w/ interval 1033 1040 8 1.0 1033.2 0.4X
7 units w/o interval 1020 1024 5 1.0 1020.2 0.4X
8 units w/ interval 1168 1169 2 0.9 1168.0 0.4X
8 units w/o interval 1155 1157 2 0.9 1154.5 0.4X
9 units w/ interval 1326 1328 3 0.8 1326.1 0.3X
9 units w/o interval 1372 1381 14 0.7 1372.5 0.3X
prepare string w/ interval 574 610 45 1.7 573.9 1.0X
prepare string w/o interval 518 538 27 1.9 517.7 1.1X
1 units w/ interval 425 439 16 2.4 425.3 1.3X
1 units w/o interval 385 393 10 2.6 385.2 1.5X
2 units w/ interval 553 561 11 1.8 553.1 1.0X
2 units w/o interval 531 543 11 1.9 531.0 1.1X
3 units w/ interval 1134 1159 32 0.9 1134.0 0.5X
3 units w/o interval 1121 1126 6 0.9 1121.3 0.5X
4 units w/ interval 1226 1250 21 0.8 1226.1 0.5X
4 units w/o interval 1227 1239 11 0.8 1227.1 0.5X
5 units w/ interval 1375 1447 93 0.7 1374.7 0.4X
5 units w/o interval 1335 1346 19 0.7 1335.1 0.4X
6 units w/ interval 1530 1556 24 0.7 1529.5 0.4X
6 units w/o interval 1481 1492 17 0.7 1480.7 0.4X
7 units w/ interval 1730 1745 14 0.6 1729.9 0.3X
7 units w/o interval 1788 1859 112 0.6 1788.1 0.3X
8 units w/ interval 1952 2087 117 0.5 1951.7 0.3X
8 units w/o interval 2083 2207 209 0.5 2082.5 0.3X
9 units w/ interval 2228 2291 60 0.4 2227.5 0.3X
9 units w/o interval 2130 2184 75 0.5 2130.1 0.3X
10 units w/ interval 2414 2502 81 0.4 2413.8 0.2X
10 units w/o interval 2463 2488 35 0.4 2463.1 0.2X
11 units w/ interval 2717 2755 42 0.4 2716.8 0.2X
11 units w/o interval 2578 2661 77 0.4 2577.7 0.2X

Loading