Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
af2e7ab
Add multiply to CalendarInterval
MaxGekk Oct 15, 2019
4227915
Test for multiply()
MaxGekk Oct 15, 2019
379a20a
Add MultiplyInterval expression
MaxGekk Oct 15, 2019
3e9ed0f
Add divide to CalendarInterval
MaxGekk Oct 15, 2019
670a7c6
Test for divide()
MaxGekk Oct 15, 2019
3ae94cf
Handle ArithmeticException in MultiplyInterval
MaxGekk Oct 15, 2019
3bce68e
Test for the MultiplyInterval expression
MaxGekk Oct 15, 2019
754109c
Add DivideInterval expression
MaxGekk Oct 15, 2019
166dbd8
Test for the DivideInterval expression
MaxGekk Oct 15, 2019
b4dc59a
Remove unused import
MaxGekk Oct 15, 2019
1e2a9a6
Add new rules
MaxGekk Oct 15, 2019
a6b6d81
Tests for new rules
MaxGekk Oct 15, 2019
6e569c0
Add tests to datetime.sql
MaxGekk Oct 15, 2019
69a3cc7
Regen datetime.sql.out
MaxGekk Oct 15, 2019
001d17b
Long -> Double
MaxGekk Oct 15, 2019
014cde5
Fix comment
MaxGekk Oct 15, 2019
049f428
Merge branch 'master' into interval-mul-div
MaxGekk Oct 16, 2019
1ca7c89
Regen datetime.sql.out
MaxGekk Oct 16, 2019
9e6745a
Implement multiply and divide as PostgreSQL does
MaxGekk Oct 17, 2019
b428070
rounded -> truncated
MaxGekk Oct 17, 2019
8ad4001
Merge remote-tracking branch 'origin/master' into interval-mul-div
MaxGekk Oct 18, 2019
91337e5
Merge remote-tracking branch 'remotes/origin/master' into interval-mu…
MaxGekk Oct 25, 2019
2bb916f
Add new line at the end of datetime.sql
MaxGekk Oct 25, 2019
6ba53f0
Avoid fromString() in CalendarIntervalSuite
MaxGekk Oct 25, 2019
719fe6c
Merge remote-tracking branch 'remotes/origin/master' into interval-mu…
MaxGekk Nov 1, 2019
d05ffa4
Rebase on interval with days
MaxGekk Nov 1, 2019
34f6605
Regenerate datetime.sql.out
MaxGekk Nov 1, 2019
00ede6c
Check round micros
MaxGekk Nov 1, 2019
690d9c1
Modify div test to check days div
MaxGekk Nov 1, 2019
5b25432
Regenerate datetime.sql.out
MaxGekk Nov 1, 2019
2265449
Merge remote-tracking branch 'remotes/origin/master' into interval-mu…
MaxGekk Nov 4, 2019
e559fb9
Move multiply() and divide() to IntervalUtils
MaxGekk Nov 5, 2019
35ab9c0
Use DAYS_PER_MONTH
MaxGekk Nov 5, 2019
dbc39e8
Simplify MultiplyInterval and DivideInterval
MaxGekk Nov 5, 2019
8244460
Minor
MaxGekk Nov 5, 2019
b70c0f8
Simplify tests
MaxGekk Nov 5, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -831,6 +831,8 @@ object TypeCoercion {
* 2. Turns Add/Subtract of TimestampType/DateType/IntegerType
* and TimestampType/IntegerType/DateType to DateAdd/DateSub/SubtractDates and
* to SubtractTimestamps.
* 3. Turns Multiply/Divide of CalendarIntervalType and NumericType
* to MultiplyInterval/DivideInterval
*/
object DateTimeOperations extends Rule[LogicalPlan] {

Expand All @@ -846,6 +848,12 @@ object TypeCoercion {
Cast(TimeAdd(l, r), l.dataType)
case Subtract(l, r @ CalendarIntervalType()) if acceptedTypes.contains(l.dataType) =>
Cast(TimeSub(l, r), l.dataType)
case Multiply(l @ CalendarIntervalType(), r @ NumericType()) =>
MultiplyInterval(l, r)
case Multiply(l @ NumericType(), r @ CalendarIntervalType()) =>
MultiplyInterval(r, l)
case Divide(l @ CalendarIntervalType(), r @ NumericType()) =>
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

postgres=# select interval '1 year' / '365';
   ?column?
---------------
 23:40:16.4064
(1 row)

could this be supported?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Taking into account the discussion in #26165, I am not sure. @cloud-fan Should I support this?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can, but this should only apply to literals, not string columns.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we want to support it, please open another PR.

DivideInterval(l, r)

case Add(l @ DateType(), r @ IntegerType()) => DateAdd(l, r)
case Add(l @ IntegerType(), r @ DateType()) => DateAdd(r, l)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,45 +45,45 @@ abstract class ExtractIntervalPart(
}

case class ExtractIntervalMillenniums(child: Expression)
extends ExtractIntervalPart(child, IntegerType, getMillenniums, "getMillenniums")
extends ExtractIntervalPart(child, IntegerType, getMillenniums, "getMillenniums")

case class ExtractIntervalCenturies(child: Expression)
extends ExtractIntervalPart(child, IntegerType, getCenturies, "getCenturies")
extends ExtractIntervalPart(child, IntegerType, getCenturies, "getCenturies")

case class ExtractIntervalDecades(child: Expression)
extends ExtractIntervalPart(child, IntegerType, getDecades, "getDecades")
extends ExtractIntervalPart(child, IntegerType, getDecades, "getDecades")

case class ExtractIntervalYears(child: Expression)
extends ExtractIntervalPart(child, IntegerType, getYears, "getYears")
extends ExtractIntervalPart(child, IntegerType, getYears, "getYears")

case class ExtractIntervalQuarters(child: Expression)
extends ExtractIntervalPart(child, ByteType, getQuarters, "getQuarters")
extends ExtractIntervalPart(child, ByteType, getQuarters, "getQuarters")

case class ExtractIntervalMonths(child: Expression)
extends ExtractIntervalPart(child, ByteType, getMonths, "getMonths")
extends ExtractIntervalPart(child, ByteType, getMonths, "getMonths")

case class ExtractIntervalDays(child: Expression)
extends ExtractIntervalPart(child, IntegerType, getDays, "getDays")
extends ExtractIntervalPart(child, IntegerType, getDays, "getDays")

case class ExtractIntervalHours(child: Expression)
extends ExtractIntervalPart(child, LongType, getHours, "getHours")
extends ExtractIntervalPart(child, LongType, getHours, "getHours")

case class ExtractIntervalMinutes(child: Expression)
extends ExtractIntervalPart(child, ByteType, getMinutes, "getMinutes")
extends ExtractIntervalPart(child, ByteType, getMinutes, "getMinutes")

case class ExtractIntervalSeconds(child: Expression)
extends ExtractIntervalPart(child, DecimalType(8, 6), getSeconds, "getSeconds")
extends ExtractIntervalPart(child, DecimalType(8, 6), getSeconds, "getSeconds")

case class ExtractIntervalMilliseconds(child: Expression)
extends ExtractIntervalPart(child, DecimalType(8, 3), getMilliseconds, "getMilliseconds")
extends ExtractIntervalPart(child, DecimalType(8, 3), getMilliseconds, "getMilliseconds")

case class ExtractIntervalMicroseconds(child: Expression)
extends ExtractIntervalPart(child, LongType, getMicroseconds, "getMicroseconds")
extends ExtractIntervalPart(child, LongType, getMicroseconds, "getMicroseconds")

// Number of seconds in 10000 years is 315576000001 (30 days per one month)
// which is 12 digits + 6 digits for the fractional part of seconds.
case class ExtractIntervalEpoch(child: Expression)
extends ExtractIntervalPart(child, DecimalType(18, 6), getEpoch, "getEpoch")
extends ExtractIntervalPart(child, DecimalType(18, 6), getEpoch, "getEpoch")

object ExtractIntervalPart {

Expand All @@ -109,3 +109,47 @@ object ExtractIntervalPart {
case _ => errorHandleFunc
}
}

abstract class IntervalNumOperation(
interval: Expression,
num: Expression,
operation: (CalendarInterval, Double) => CalendarInterval,
operationName: String)
extends BinaryExpression with ImplicitCastInputTypes with Serializable {
override def left: Expression = interval
override def right: Expression = num

override def inputTypes: Seq[AbstractDataType] = Seq(CalendarIntervalType, DoubleType)
override def dataType: DataType = CalendarIntervalType

override def nullable: Boolean = true

override def nullSafeEval(interval: Any, num: Any): Any = {
try {
operation(interval.asInstanceOf[CalendarInterval], num.asInstanceOf[Double])
} catch {
case _: java.lang.ArithmeticException => null
}
}

override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
nullSafeCodeGen(ctx, ev, (interval, num) => {
val iu = IntervalUtils.getClass.getName.stripSuffix("$")
s"""
try {
${ev.value} = $iu.$operationName($interval, $num);
} catch (java.lang.ArithmeticException e) {
${ev.isNull} = true;
}
"""
})
}

override def prettyName: String = operationName + "_interval"
}

case class MultiplyInterval(interval: Expression, num: Expression)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be added for only expressions registered as functions.

extends IntervalNumOperation(interval, num, multiply, "multiply")

case class DivideInterval(interval: Expression, num: Expression)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

extends IntervalNumOperation(interval, num, divide, "divide")
Original file line number Diff line number Diff line change
Expand Up @@ -365,4 +365,27 @@ object IntervalUtils {
def isNegative(interval: CalendarInterval, daysPerMonth: Int = 31): Boolean = {
getDuration(interval, TimeUnit.MICROSECONDS, daysPerMonth) < 0
}

/**
* Makes an interval from months, days and micros with the fractional part by
* adding the month fraction to days and the days fraction to micros.
*/
private def fromDoubles(
monthsWithFraction: Double,
daysWithFraction: Double,
microsWithFraction: Double): CalendarInterval = {
val truncatedMonths = Math.toIntExact(monthsWithFraction.toLong)
val days = daysWithFraction + DAYS_PER_MONTH * (monthsWithFraction - truncatedMonths)
val truncatedDays = Math.toIntExact(days.toLong)
val micros = microsWithFraction + DateTimeUtils.MICROS_PER_DAY * (days - truncatedDays)
new CalendarInterval(truncatedMonths, truncatedDays, micros.round)
}

def multiply(interval: CalendarInterval, num: Double): CalendarInterval = {
fromDoubles(num * interval.months, num * interval.days, num * interval.microseconds)
}

def divide(interval: CalendarInterval, num: Double): CalendarInterval = {
fromDoubles(interval.months / num, interval.days / num, interval.microseconds / num)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1597,6 +1597,27 @@ class TypeCoercionSuite extends AnalysisTest {
Multiply(CaseWhen(Seq((EqualTo(1, 2), Cast(1, DecimalType(34, 24)))),
Cast(100, DecimalType(34, 24))), Cast(1, IntegerType)))
}

test("rule for interval operations") {
val dateTimeOperations = TypeCoercion.DateTimeOperations
val interval = Literal(new CalendarInterval(0, 0, 0))

Seq(
Literal(10.toByte, ByteType),
Literal(10.toShort, ShortType),
Literal(10, IntegerType),
Literal(10L, LongType),
Literal(Decimal(10), DecimalType.SYSTEM_DEFAULT),
Literal(10.5.toFloat, FloatType),
Literal(10.5, DoubleType)).foreach { num =>
ruleTest(dateTimeOperations, Multiply(interval, num),
MultiplyInterval(interval, num))
ruleTest(dateTimeOperations, Multiply(num, interval),
MultiplyInterval(interval, num))
ruleTest(dateTimeOperations, Divide(interval, num),
DivideInterval(interval, num))
}
}
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,12 @@ package org.apache.spark.sql.catalyst.expressions
import scala.language.implicitConversions

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.util.IntervalUtils
import org.apache.spark.sql.catalyst.util.IntervalUtils.fromString
import org.apache.spark.sql.types.Decimal

class IntervalExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
implicit def interval(s: String): Literal = {
Literal(IntervalUtils.fromString("interval " + s))
Literal(fromString("interval " + s))
}

test("millenniums") {
Expand Down Expand Up @@ -191,4 +191,39 @@ class IntervalExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
ExtractIntervalEpoch("1 second 1 millisecond 1 microsecond"),
Decimal(1.001001, 18, 6))
}

test("multiply") {
def check(interval: String, num: Double, expected: String): Unit = {
checkEvaluation(
MultiplyInterval(Literal(fromString(interval)), Literal(num)),
if (expected == null) null else fromString(expected))
}

check("0 seconds", 10, "0 seconds")
check("10 hours", 0, "0 hours")
check("12 months 1 microseconds", 2, "2 years 2 microseconds")
check("-5 year 3 seconds", 3, "-15 years 9 seconds")
check("1 year 1 second", 0.5, "6 months 500 milliseconds")
check("-100 years -1 millisecond", 0.5, "-50 years -500 microseconds")
check("2 months 4 seconds", -0.5, "-1 months -2 seconds")
check("1 month 2 microseconds", 1.5, "1 months 15 days 3 microseconds")
check("2 months", Int.MaxValue, null)
}

test("divide") {
def check(interval: String, num: Double, expected: String): Unit = {
checkEvaluation(
DivideInterval(Literal(fromString(interval)), Literal(num)),
if (expected == null) null else fromString(expected))
}

check("0 seconds", 10, "0 seconds")
check("12 months 3 milliseconds", 2, "6 months 0.0015 seconds")
check("-5 year 3 seconds", 3, "-1 years -8 months 1 seconds")
check("6 years -7 seconds", 3, "2 years -2.333333 seconds")
check("2 years -8 seconds", 0.5, "4 years -16 seconds")
check("-1 month 2 microseconds", -0.25, "4 months -8 microseconds")
check("1 month 3 microsecond", 1.5, "20 days 2 microseconds")
check("2 months", 0, null)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ package org.apache.spark.sql.catalyst.util
import java.util.concurrent.TimeUnit

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.util.IntervalUtils.{fromDayTimeString, fromString, fromYearMonthString}
import org.apache.spark.sql.catalyst.util.DateTimeUtils.{MICROS_PER_MILLIS, MICROS_PER_SECOND}
import org.apache.spark.sql.catalyst.util.IntervalUtils._
import org.apache.spark.unsafe.types.CalendarInterval
import org.apache.spark.unsafe.types.CalendarInterval._

class IntervalUtilsSuite extends SparkFunSuite {

Expand All @@ -34,7 +34,7 @@ class IntervalUtilsSuite extends SparkFunSuite {
testSingleUnit("HouR", 3, 0, 0, 3 * MICROS_PER_HOUR)
testSingleUnit("MiNuTe", 3, 0, 0, 3 * MICROS_PER_MINUTE)
testSingleUnit("Second", 3, 0, 0, 3 * MICROS_PER_SECOND)
testSingleUnit("MilliSecond", 3, 0, 0, 3 * MICROS_PER_MILLI)
testSingleUnit("MilliSecond", 3, 0, 0, 3 * MICROS_PER_MILLIS)
testSingleUnit("MicroSecond", 3, 0, 0, 3)

for (input <- Seq(null, "", " ")) {
Expand Down Expand Up @@ -125,7 +125,7 @@ class IntervalUtilsSuite extends SparkFunSuite {
new CalendarInterval(
0,
10,
12 * MICROS_PER_MINUTE + 888 * MICROS_PER_MILLI))
12 * MICROS_PER_MINUTE + 888 * MICROS_PER_MILLIS))
assert(fromDayTimeString("-3 0:0:0") === new CalendarInterval(0, -3, 0L))

try {
Expand Down Expand Up @@ -186,4 +186,43 @@ class IntervalUtilsSuite extends SparkFunSuite {
assert(!isNegative("1 year -360 days", 31))
assert(!isNegative("-1 year 380 days", 31))
}

test("multiply by num") {
var interval = new CalendarInterval(0, 0, 0)
assert(interval === multiply(interval, 0))
interval = new CalendarInterval(123, 456, 789)
assert(new CalendarInterval(123 * 42, 456 * 42, 789 * 42) === multiply(interval, 42))
interval = new CalendarInterval(-123, -456, -789)
assert(new CalendarInterval(-123 * 42, -456 * 42, -789 * 42) === multiply(interval, 42))
assert(new CalendarInterval(1, 22, 12 * MICROS_PER_HOUR) ===
multiply(new CalendarInterval(1, 5, 0), 1.5))
assert(new CalendarInterval(2, 14, 12 * MICROS_PER_HOUR) ===
multiply(new CalendarInterval(2, 2, 2 * MICROS_PER_HOUR), 1.2))
try {
multiply(new CalendarInterval(2, 0, 0), Integer.MAX_VALUE)
fail("Expected to throw an exception on months overflow")
} catch {
case e: ArithmeticException =>
assert(e.getMessage.contains("overflow"))
}
}

test("divide by num") {
var interval = new CalendarInterval(0, 0, 0)
assert(interval === divide(interval, 10))
interval = new CalendarInterval(1, 3, 30 * MICROS_PER_SECOND)
assert(new CalendarInterval(0, 16, 12 * MICROS_PER_HOUR + 15 * MICROS_PER_SECOND) ===
divide(interval, 2))
assert(new CalendarInterval(2, 6, MICROS_PER_MINUTE) === divide(interval, 0.5))
interval = new CalendarInterval(-1, 0, -30 * MICROS_PER_SECOND)
assert(new CalendarInterval(0, -15, -15 * MICROS_PER_SECOND) === divide(interval, 2))
assert(new CalendarInterval(-2, 0, -1 * MICROS_PER_MINUTE) === divide(interval, 0.5))
try {
divide(new CalendarInterval(123, 456, 789), 0)
fail("Expected to throw an exception on divide by zero")
} catch {
case e: ArithmeticException =>
assert(e.getMessage.contains("overflow"))
}
}
}
5 changes: 5 additions & 0 deletions sql/core/src/test/resources/sql-tests/inputs/datetime.sql
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,8 @@ select date '2001-10-01' - 7;
select date '2001-10-01' - date '2001-09-28';
select date'2020-01-01' - timestamp'2019-10-06 10:11:12.345678';
select timestamp'2019-10-06 10:11:12.345678' - date'2020-01-01';

-- interval operations
select 3 * (timestamp'2019-10-15 10:11:12.001002' - date'2019-10-15');
select interval 4 month 2 weeks 3 microseconds * 1.5;
select (timestamp'2019-10-15' - timestamp'2019-10-14') / 1.5;
26 changes: 25 additions & 1 deletion sql/core/src/test/resources/sql-tests/results/datetime.sql.out
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 17
-- Number of queries: 20


-- !query 0
Expand Down Expand Up @@ -145,3 +145,27 @@ select timestamp'2019-10-06 10:11:12.345678' - date'2020-01-01'
struct<subtracttimestamps(TIMESTAMP('2019-10-06 10:11:12.345678'), CAST(DATE '2020-01-01' AS TIMESTAMP)):interval>
-- !query 16 output
interval -2078 hours -48 minutes -47.654322 seconds


-- !query 17
select 3 * (timestamp'2019-10-15 10:11:12.001002' - date'2019-10-15')
-- !query 17 schema
struct<multiply_interval(subtracttimestamps(TIMESTAMP('2019-10-15 10:11:12.001002'), CAST(DATE '2019-10-15' AS TIMESTAMP)), CAST(3 AS DOUBLE)):interval>
-- !query 17 output
interval 30 hours 33 minutes 36.003006 seconds


-- !query 18
select interval 4 month 2 weeks 3 microseconds * 1.5
-- !query 18 schema
struct<multiply_interval(interval 4 months 14 days 0.000003 seconds, CAST(1.5 AS DOUBLE)):interval>
-- !query 18 output
interval 6 months 21 days 0.000005 seconds


-- !query 19
select (timestamp'2019-10-15' - timestamp'2019-10-14') / 1.5
-- !query 19 schema
struct<divide_interval(subtracttimestamps(TIMESTAMP('2019-10-15 00:00:00'), TIMESTAMP('2019-10-14 00:00:00')), CAST(1.5 AS DOUBLE)):interval>
-- !query 19 output
interval 16 hours