-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-28141][SQL] Support special date values #25708
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
c5034a8
695a6c6
ebc5b02
2663542
edda1f2
8d023ec
775c7ec
be787f4
f356b72
864e456
e237b27
0055a8a
4fb8834
9a2349b
6da7905
d57e11e
33befed
e2275d4
dabb6ec
ce7e04d
0fd86a0
92c5509
254567d
d61fdc7
03d3126
106524b
0b0e5d4
ff92531
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -287,7 +287,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String | |
| // [[func]] assumes the input is no longer null because eval already does the null check. | ||
| @inline private[this] def buildCast[T](a: Any, func: T => Any): Any = func(a.asInstanceOf[T]) | ||
|
|
||
| private lazy val dateFormatter = DateFormatter() | ||
| private lazy val dateFormatter = DateFormatter(zoneId) | ||
| private lazy val timestampFormatter = TimestampFormatter.getFractionFormatter(zoneId) | ||
| private val failOnIntegralTypeOverflow = SQLConf.get.failOnIntegralTypeOverflow | ||
|
|
||
|
|
@@ -469,7 +469,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String | |
| // DateConverter | ||
| private[this] def castToDate(from: DataType): Any => Any = from match { | ||
| case StringType => | ||
| buildCast[UTF8String](_, s => DateTimeUtils.stringToDate(s).orNull) | ||
| buildCast[UTF8String](_, s => DateTimeUtils.stringToDate(s, zoneId).orNull) | ||
| case TimestampType => | ||
| // throw valid precision more than seconds, according to Hive. | ||
| // Timestamp.nanos is in 0 to 999,999,999, no more than a second. | ||
|
|
@@ -1056,28 +1056,31 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String | |
|
|
||
| private[this] def castToDateCode( | ||
| from: DataType, | ||
| ctx: CodegenContext): CastFunction = from match { | ||
| case StringType => | ||
| val intOpt = ctx.freshVariable("intOpt", classOf[Option[Integer]]) | ||
| (c, evPrim, evNull) => code""" | ||
| scala.Option<Integer> $intOpt = | ||
| org.apache.spark.sql.catalyst.util.DateTimeUtils.stringToDate($c); | ||
| if ($intOpt.isDefined()) { | ||
| $evPrim = ((Integer) $intOpt.get()).intValue(); | ||
| } else { | ||
| $evNull = true; | ||
| } | ||
| """ | ||
| case TimestampType => | ||
| val zoneIdClass = classOf[ZoneId] | ||
| val zid = JavaCode.global( | ||
| ctx.addReferenceObj("zoneId", zoneId, zoneIdClass.getName), | ||
| zoneIdClass) | ||
| (c, evPrim, evNull) => | ||
| code"""$evPrim = | ||
| org.apache.spark.sql.catalyst.util.DateTimeUtils.microsToEpochDays($c, $zid);""" | ||
| case _ => | ||
| (c, evPrim, evNull) => code"$evNull = true;" | ||
| ctx: CodegenContext): CastFunction = { | ||
| val zoneIdClass = classOf[ZoneId] | ||
| val zid = JavaCode.global( | ||
| ctx.addReferenceObj("zoneId", zoneId, zoneIdClass.getName), | ||
| zoneIdClass) | ||
| from match { | ||
| case StringType => | ||
| val intOpt = ctx.freshVariable("intOpt", classOf[Option[Integer]]) | ||
| (c, evPrim, evNull) => | ||
| code""" | ||
| scala.Option<Integer> $intOpt = | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. btw, (this is not related to this pr though), |
||
| org.apache.spark.sql.catalyst.util.DateTimeUtils.stringToDate($c, $zid); | ||
| if ($intOpt.isDefined()) { | ||
| $evPrim = ((Integer) $intOpt.get()).intValue(); | ||
| } else { | ||
| $evNull = true; | ||
| } | ||
| """ | ||
| case TimestampType => | ||
| (c, evPrim, evNull) => | ||
| code"""$evPrim = | ||
| org.apache.spark.sql.catalyst.util.DateTimeUtils.microsToEpochDays($c, $zid);""" | ||
| case _ => | ||
| (c, evPrim, evNull) => code"$evNull = true;" | ||
| } | ||
| } | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just put the gen'd code for other reviewers; |
||
|
|
||
| private[this] def changePrecision(d: ExprValue, decimalType: DecimalType, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,13 +19,14 @@ package org.apache.spark.sql.catalyst.util | |
|
|
||
| import java.sql.{Date, Timestamp} | ||
| import java.text.SimpleDateFormat | ||
| import java.time.ZoneId | ||
| import java.time.{LocalDate, ZoneId, ZoneOffset} | ||
| import java.util.{Locale, TimeZone} | ||
| import java.util.concurrent.TimeUnit | ||
|
|
||
| import org.apache.spark.SparkFunSuite | ||
| import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._ | ||
| import org.apache.spark.sql.catalyst.util.DateTimeUtils._ | ||
| import org.apache.spark.sql.internal.SQLConf | ||
| import org.apache.spark.unsafe.types.UTF8String | ||
|
|
||
| class DateTimeUtilsSuite extends SparkFunSuite { | ||
|
|
@@ -118,28 +119,32 @@ class DateTimeUtilsSuite extends SparkFunSuite { | |
| checkFromToJavaDate(new Date(df2.parse("1776-07-04 18:30:00 UTC").getTime)) | ||
| } | ||
|
|
||
| private def toDate(s: String, zoneId: ZoneId = ZoneOffset.UTC): Option[SQLDate] = { | ||
| stringToDate(UTF8String.fromString(s), ZoneOffset.UTC) | ||
| } | ||
|
|
||
| test("string to date") { | ||
| assert(stringToDate(UTF8String.fromString("2015-01-28")).get === days(2015, 1, 28)) | ||
| assert(stringToDate(UTF8String.fromString("2015")).get === days(2015, 1, 1)) | ||
| assert(stringToDate(UTF8String.fromString("0001")).get === days(1, 1, 1)) | ||
| assert(stringToDate(UTF8String.fromString("2015-03")).get === days(2015, 3, 1)) | ||
| assert(toDate("2015-01-28").get === days(2015, 1, 28)) | ||
| assert(toDate("2015").get === days(2015, 1, 1)) | ||
| assert(toDate("0001").get === days(1, 1, 1)) | ||
| assert(toDate("2015-03").get === days(2015, 3, 1)) | ||
| Seq("2015-03-18", "2015-03-18 ", " 2015-03-18", " 2015-03-18 ", "2015-03-18 123142", | ||
| "2015-03-18T123123", "2015-03-18T").foreach { s => | ||
| assert(stringToDate(UTF8String.fromString(s)).get === days(2015, 3, 18)) | ||
| assert(toDate(s).get === days(2015, 3, 18)) | ||
| } | ||
|
|
||
| assert(stringToDate(UTF8String.fromString("2015-03-18X")).isEmpty) | ||
| assert(stringToDate(UTF8String.fromString("2015/03/18")).isEmpty) | ||
| assert(stringToDate(UTF8String.fromString("2015.03.18")).isEmpty) | ||
| assert(stringToDate(UTF8String.fromString("20150318")).isEmpty) | ||
| assert(stringToDate(UTF8String.fromString("2015-031-8")).isEmpty) | ||
| assert(stringToDate(UTF8String.fromString("02015-03-18")).isEmpty) | ||
| assert(stringToDate(UTF8String.fromString("015-03-18")).isEmpty) | ||
| assert(stringToDate(UTF8String.fromString("015")).isEmpty) | ||
| assert(stringToDate(UTF8String.fromString("02015")).isEmpty) | ||
| assert(stringToDate(UTF8String.fromString("1999 08 01")).isEmpty) | ||
| assert(stringToDate(UTF8String.fromString("1999-08 01")).isEmpty) | ||
| assert(stringToDate(UTF8String.fromString("1999 08")).isEmpty) | ||
| assert(toDate("2015-03-18X").isEmpty) | ||
| assert(toDate("2015/03/18").isEmpty) | ||
| assert(toDate("2015.03.18").isEmpty) | ||
| assert(toDate("20150318").isEmpty) | ||
| assert(toDate("2015-031-8").isEmpty) | ||
| assert(toDate("02015-03-18").isEmpty) | ||
| assert(toDate("015-03-18").isEmpty) | ||
| assert(toDate("015").isEmpty) | ||
| assert(toDate("02015").isEmpty) | ||
| assert(toDate("1999 08 01").isEmpty) | ||
| assert(toDate("1999-08 01").isEmpty) | ||
| assert(toDate("1999 08").isEmpty) | ||
| } | ||
|
|
||
| test("string to timestamp") { | ||
|
|
@@ -258,12 +263,10 @@ class DateTimeUtilsSuite extends SparkFunSuite { | |
|
|
||
| test("SPARK-15379: special invalid date string") { | ||
| // Test stringToDate | ||
| assert(stringToDate( | ||
| UTF8String.fromString("2015-02-29 00:00:00")).isEmpty) | ||
| assert(stringToDate( | ||
| UTF8String.fromString("2015-04-31 00:00:00")).isEmpty) | ||
| assert(stringToDate(UTF8String.fromString("2015-02-29")).isEmpty) | ||
| assert(stringToDate(UTF8String.fromString("2015-04-31")).isEmpty) | ||
| assert(toDate("2015-02-29 00:00:00").isEmpty) | ||
| assert(toDate("2015-04-31 00:00:00").isEmpty) | ||
| assert(toDate("2015-02-29").isEmpty) | ||
| assert(toDate("2015-04-31").isEmpty) | ||
|
|
||
|
|
||
| // Test stringToTimestamp | ||
|
|
@@ -564,4 +567,17 @@ class DateTimeUtilsSuite extends SparkFunSuite { | |
| assert(DateTimeUtils.toMillis(-9223372036844776001L) === -9223372036844777L) | ||
| assert(DateTimeUtils.toMillis(-157700927876544L) === -157700927877L) | ||
| } | ||
|
|
||
| test("special date values") { | ||
| DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone => | ||
| val zoneId = getZoneId(timeZone) | ||
|
|
||
| assert(toDate("epoch", zoneId).get === 0) | ||
| val today = localDateToDays(LocalDate.now(zoneId)) | ||
| assert(toDate("yesterday", zoneId).get === today - 1) | ||
|
||
| assert(toDate("now", zoneId).get === today) | ||
| assert(toDate("today", zoneId).get === today) | ||
| assert(toDate("tomorrow", zoneId).get === today + 1) | ||
| } | ||
| } | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.