Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import org.apache.spark.sql.catalyst.trees.TreeNodeTag
import org.apache.spark.sql.catalyst.util._
import org.apache.spark.sql.catalyst.util.DateTimeConstants._
import org.apache.spark.sql.catalyst.util.DateTimeUtils._
import org.apache.spark.sql.catalyst.util.IntervalStringStyles.ANSI_STYLE
import org.apache.spark.sql.errors.QueryExecutionErrors
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._
Expand Down Expand Up @@ -407,9 +408,11 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
case udt: UserDefinedType[_] =>
buildCast[Any](_, o => UTF8String.fromString(udt.deserialize(o).toString))
case YearMonthIntervalType =>
buildCast[Int](_, i => UTF8String.fromString(IntervalUtils.toYearMonthIntervalString(i)))
buildCast[Int](_, i => UTF8String.fromString(
IntervalUtils.toYearMonthIntervalString(i, ANSI_STYLE)))
case DayTimeIntervalType =>
buildCast[Long](_, i => UTF8String.fromString(IntervalUtils.toDayTimeIntervalString(i)))
buildCast[Long](_, i => UTF8String.fromString(
IntervalUtils.toDayTimeIntervalString(i, ANSI_STYLE)))
case _ => buildCast[Any](_, o => UTF8String.fromString(o.toString))
}

Expand Down Expand Up @@ -1125,14 +1128,13 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
(c, evPrim, evNull) => {
code"$evPrim = UTF8String.fromString($udtRef.deserialize($c).toString());"
}
case YearMonthIntervalType =>
case i @ (YearMonthIntervalType | DayTimeIntervalType) =>
val iu = IntervalUtils.getClass.getName.stripSuffix("$")
(c, evPrim, _) =>
code"""$evPrim = UTF8String.fromString($iu.toYearMonthIntervalString($c));"""
case DayTimeIntervalType =>
val iu = IntervalUtils.getClass.getName.stripSuffix("$")
(c, evPrim, _) =>
code"""$evPrim = UTF8String.fromString($iu.toDayTimeIntervalString($c));"""
val iss = IntervalStringStyles.getClass.getName.stripSuffix("$")
val subType = if (i.isInstanceOf[YearMonthIntervalType]) "YearMonth" else "DayTime"
val f = s"to${subType}IntervalString"
val style = s"$iss$$.MODULE$$.ANSI_STYLE()"
(c, evPrim, _) => code"""$evPrim = UTF8String.fromString($iu.$f($c, $style));"""
case _ =>
(c, evPrim, evNull) => code"$evPrim = UTF8String.fromString(String.valueOf($c));"
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,17 @@ import scala.util.control.NonFatal

import org.apache.spark.sql.catalyst.util.DateTimeConstants._
import org.apache.spark.sql.catalyst.util.DateTimeUtils.millisToMicros
import org.apache.spark.sql.catalyst.util.IntervalStringStyles.{ANSI_STYLE, HIVE_STYLE, IntervalStyle}
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types.Decimal
import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}

// The style of textual representation of intervals
object IntervalStringStyles extends Enumeration {
type IntervalStyle = Value
val ANSI_STYLE, HIVE_STYLE = Value
}

object IntervalUtils {

object IntervalUnit extends Enumeration {
Expand Down Expand Up @@ -840,47 +847,67 @@ object IntervalUtils {
* which conforms to the ANSI SQL standard.
*
* @param months The number of months, positive or negative
* @param style The style of textual representation of the interval
* @return Year-month interval string
*/
def toYearMonthIntervalString(months: Int): String = {
def toYearMonthIntervalString(months: Int, style: IntervalStyle): String = {
var sign = ""
var absMonths: Long = months
if (months < 0) {
sign = "-"
absMonths = -absMonths
}
s"INTERVAL '$sign${absMonths / MONTHS_PER_YEAR}-${absMonths % MONTHS_PER_YEAR}' YEAR TO MONTH"
val payload = s"$sign${absMonths / MONTHS_PER_YEAR}-${absMonths % MONTHS_PER_YEAR}"
style match {
case ANSI_STYLE => s"INTERVAL '$payload' YEAR TO MONTH"
case HIVE_STYLE => payload
}
}

/**
* Converts a day-time interval as a number of microseconds to its textual representation
* which conforms to the ANSI SQL standard.
*
* @param micros The number of microseconds, positive or negative
* @param style The style of textual representation of the interval
* @return Day-time interval string
*/
def toDayTimeIntervalString(micros: Long): String = {
def toDayTimeIntervalString(micros: Long, style: IntervalStyle): String = {
var sign = ""
var rest = micros
if (micros < 0) {
if (micros == Long.MinValue) {
// Especial handling of minimum `Long` value because negate op overflows `Long`.
// seconds = 106751991 * (24 * 60 * 60) + 4 * 60 * 60 + 54 = 9223372036854
// microseconds = -9223372036854000000L-775808 == Long.MinValue
return "INTERVAL '-106751991 04:00:54.775808' DAY TO SECOND"
val minIntervalString = style match {
case ANSI_STYLE => "INTERVAL '-106751991 04:00:54.775808' DAY TO SECOND"
case HIVE_STYLE => "-106751991 04:00:54.775808000"
}
return minIntervalString
} else {
sign = "-"
rest = -rest
}
}
val seconds = rest % MICROS_PER_MINUTE
val secondsWithFraction = rest % MICROS_PER_MINUTE
rest /= MICROS_PER_MINUTE
val minutes = rest % MINUTES_PER_HOUR
rest /= MINUTES_PER_HOUR
val hours = rest % HOURS_PER_DAY
val days = rest / HOURS_PER_DAY
val leadSecZero = if (seconds < 10 * MICROS_PER_SECOND) "0" else ""
val secStr = java.math.BigDecimal.valueOf(seconds, 6).stripTrailingZeros().toPlainString()
f"INTERVAL '$sign$days $hours%02d:$minutes%02d:$leadSecZero$secStr' DAY TO SECOND"
val leadSecZero = if (secondsWithFraction < 10 * MICROS_PER_SECOND) "0" else ""
val intervalString = style match {
case ANSI_STYLE =>
val secStr = java.math.BigDecimal.valueOf(secondsWithFraction, 6)
.stripTrailingZeros()
.toPlainString()
f"INTERVAL '$sign$days $hours%02d:$minutes%02d:$leadSecZero$secStr' DAY TO SECOND"
case HIVE_STYLE =>
val seconds = secondsWithFraction / MICROS_PER_SECOND
val nanos = (secondsWithFraction % MICROS_PER_SECOND) * NANOS_PER_MICROS
f"$sign$days $hours%02d:$minutes%02d:$seconds%02d.$nanos%09d"
}
intervalString
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.plans.SQLHelper
import org.apache.spark.sql.catalyst.util.DateTimeConstants._
import org.apache.spark.sql.catalyst.util.DateTimeUtils.millisToMicros
import org.apache.spark.sql.catalyst.util.IntervalStringStyles.{ANSI_STYLE, HIVE_STYLE}
import org.apache.spark.sql.catalyst.util.IntervalUtils._
import org.apache.spark.sql.catalyst.util.IntervalUtils.IntervalUnit._
import org.apache.spark.sql.internal.SQLConf
Expand Down Expand Up @@ -441,4 +442,36 @@ class IntervalUtilsSuite extends SparkFunSuite with SQLHelper {
assert(durationToMicros(duration) === micros)
}
}

test("SPARK-35016: format year-month intervals") {
Seq(
0 -> ("0-0", "INTERVAL '0-0' YEAR TO MONTH"),
-11 -> ("-0-11", "INTERVAL '-0-11' YEAR TO MONTH"),
11 -> ("0-11", "INTERVAL '0-11' YEAR TO MONTH"),
-13 -> ("-1-1", "INTERVAL '-1-1' YEAR TO MONTH"),
13 -> ("1-1", "INTERVAL '1-1' YEAR TO MONTH"),
-24 -> ("-2-0", "INTERVAL '-2-0' YEAR TO MONTH"),
24 -> ("2-0", "INTERVAL '2-0' YEAR TO MONTH"),
Int.MinValue -> ("-178956970-8", "INTERVAL '-178956970-8' YEAR TO MONTH"),
Int.MaxValue -> ("178956970-7", "INTERVAL '178956970-7' YEAR TO MONTH")
).foreach { case (months, (hiveIntervalStr, ansiIntervalStr)) =>
assert(toYearMonthIntervalString(months, ANSI_STYLE) === ansiIntervalStr)
assert(toYearMonthIntervalString(months, HIVE_STYLE) === hiveIntervalStr)
}
}

test("SPARK-35016: format day-time intervals") {
Seq(
0L -> ("0 00:00:00.000000000", "INTERVAL '0 00:00:00' DAY TO SECOND"),
-1L -> ("-0 00:00:00.000001000", "INTERVAL '-0 00:00:00.000001' DAY TO SECOND"),
10 * MICROS_PER_MILLIS -> ("0 00:00:00.010000000", "INTERVAL '0 00:00:00.01' DAY TO SECOND"),
(-123 * MICROS_PER_DAY - 3 * MICROS_PER_SECOND) ->
("-123 00:00:03.000000000", "INTERVAL '-123 00:00:03' DAY TO SECOND"),
Long.MinValue -> ("-106751991 04:00:54.775808000",
"INTERVAL '-106751991 04:00:54.775808' DAY TO SECOND")
).foreach { case (micros, (hiveIntervalStr, ansiIntervalStr)) =>
assert(toDayTimeIntervalString(micros, ANSI_STYLE) === ansiIntervalStr)
assert(toDayTimeIntervalString(micros, HIVE_STYLE) === hiveIntervalStr)
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import java.time.{Duration, Instant, LocalDate, Period, ZoneOffset}

import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils, TimestampFormatter}
import org.apache.spark.sql.catalyst.util.IntervalStringStyles.HIVE_STYLE
import org.apache.spark.sql.catalyst.util.IntervalUtils.{durationToMicros, periodToMonths, toDayTimeIntervalString, toYearMonthIntervalString}
import org.apache.spark.sql.execution.command.{DescribeCommandBase, ExecutedCommandExec, ShowTablesCommand, ShowViewsCommand}
import org.apache.spark.sql.execution.datasources.v2.{DescribeTableExec, ShowTablesExec}
Expand Down Expand Up @@ -119,9 +120,9 @@ object HiveResult {
s""""${t.name}":${toHiveString((v, t.dataType), true, formatters)}"""
}.mkString("{", ",", "}")
case (period: Period, YearMonthIntervalType) =>
toYearMonthIntervalString(periodToMonths(period))
toYearMonthIntervalString(periodToMonths(period), HIVE_STYLE)
case (duration: Duration, DayTimeIntervalType) =>
toDayTimeIntervalString(durationToMicros(duration))
toDayTimeIntervalString(durationToMicros(duration), HIVE_STYLE)
case (other, _: UserDefinedType[_]) => other.toString
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -110,19 +110,19 @@ class HiveResultSuite extends SharedSparkSession {
}
}

test("SPARK-34984: year-month interval formatting in hive result") {
test("SPARK-34984, SPARK-35016: year-month interval formatting in hive result") {
val df = Seq(Period.ofYears(-10).minusMonths(1)).toDF("i")
val plan1 = df.queryExecution.executedPlan
assert(hiveResultString(plan1) === Seq("INTERVAL '-10-1' YEAR TO MONTH"))
assert(hiveResultString(plan1) === Seq("-10-1"))
val plan2 = df.selectExpr("array(i)").queryExecution.executedPlan
assert(hiveResultString(plan2) === Seq("[INTERVAL '-10-1' YEAR TO MONTH]"))
assert(hiveResultString(plan2) === Seq("[-10-1]"))
}

test("SPARK-34984: day-time interval formatting in hive result") {
test("SPARK-34984, SPARK-35016: day-time interval formatting in hive result") {
val df = Seq(Duration.ofDays(5).plusMillis(10)).toDF("i")
val plan1 = df.queryExecution.executedPlan
assert(hiveResultString(plan1) === Seq("INTERVAL '5 00:00:00.01' DAY TO SECOND"))
assert(hiveResultString(plan1) === Seq("5 00:00:00.010000000"))
val plan2 = df.selectExpr("array(i)").queryExecution.executedPlan
assert(hiveResultString(plan2) === Seq("[INTERVAL '5 00:00:00.01' DAY TO SECOND]"))
assert(hiveResultString(plan2) === Seq("[5 00:00:00.010000000]"))
}
}