From 06393e28c767f6c2c57c6a6b6428c4ac4a8f8c53 Mon Sep 17 00:00:00 2001 From: GuoPhilipse <46367746+GuoPhilipse@users.noreply.github.com> Date: Mon, 18 May 2020 16:24:22 +0800 Subject: [PATCH 1/5] SPARK-31710:Add compatibility flag to cast long to timestamp As we know,long datatype is interpreted as milliseconds when conversion to timestamp in hive, while long is interpreted as seconds when conversion to timestamp in spark, we have many sqls runing in product, so we need a compatibility flag to make them migrating smoothly ,meanwhile do not change the user behavior in spark. --- .../org/apache/spark/sql/internal/SQLConf.scala | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index a2576684d689..d9cc9bd2984e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -2576,6 +2576,15 @@ object SQLConf { .booleanConf .createWithDefault(false) + val LONG_TIMESTAMP_CONVERSION_IN_SECONDS = + buildConf("spark.sql.legacy.longTimestampConversionInSeconds") + .internal() + .doc("When false, Byte/Short/Int/Long value is interpreted as milliseconds " + + "during the timestamp conversion ." + + "when true, the value will be interpreted as seconds " + + "to be consistent with decimal/double. ") + .booleanConf + .createWithDefault(true) /** * Holds information about keys that have been deprecated. * @@ -3160,6 +3169,9 @@ class SQLConf extends Serializable with Logging { def parquetRebaseDateTimeInReadEnabled: Boolean = { getConf(SQLConf.LEGACY_PARQUET_REBASE_DATETIME_IN_READ) } + + def longTimestampConversionInSeconds: Boolean = getConf(LONG_TIMESTAMP_CONVERSION_IN_SECONDS) + /** ********************** SQLConf functionality methods ************ */ From 97ef3cc807d1e3072d41ddf960b032201dd80899 Mon Sep 17 00:00:00 2001 From: GuoPhilipse <46367746+GuoPhilipse@users.noreply.github.com> Date: Mon, 18 May 2020 16:33:17 +0800 Subject: [PATCH 2/5] SPARK-31710:Add compatibility flag to cast long to timestamp As we know,long datatype is interpreted as milliseconds when conversion to timestamp in hive, while long is interpreted as seconds when conversion to timestamp in spark, we have many sqls runing in product, so we need a compatibility flag to make them migrating smoothly ,meanwhile do not change the user behavior in spark. --- .../spark/sql/catalyst/expressions/Cast.scala | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index 8d82956cc6f7..4a75bf64817c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -453,7 +453,14 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit if (d.isNaN || d.isInfinite) null else (d * MICROS_PER_SECOND).toLong } - // converting seconds to us + // SPARK-31710 converting seconds to us,Add compatibility flag + private[this] def longToTimestamp(t: Long): Long = { + if ( SQLConf.get.getConf( SQLConf.LONG_TIMESTAMP_CONVERSION_IN_SECONDS ) ) + (t * MICROS_PER_SECOND).toLong + else + (t * MICROS_PER_MILLI).toLong + } + private[this] def longToTimestamp(t: Long): Long = SECONDS.toMicros(t) // converting us to seconds private[this] def timestampToLong(ts: Long): Long = { @@ -1270,7 +1277,15 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit val block = inline"new java.math.BigDecimal($MICROS_PER_SECOND)" code"($d.toBigDecimal().bigDecimal().multiply($block)).longValue()" } - private[this] def longToTimeStampCode(l: ExprValue): Block = code"$l * (long)$MICROS_PER_SECOND" + + // SPARK-31710 converting seconds to us,Add compatibility flag + private[this] def longToTimeStampCode(l: ExprValue): Block = { + if ( SQLConf.get.getConf( SQLConf.LONG_TIMESTAMP_CONVERSION_IN_SECONDS ) ) + code"$l * (long)$MICROS_PER_SECOND" + else + code"$l * (long)$MICROS_PER_MILLI" + } + private[this] def timestampToLongCode(ts: ExprValue): Block = code"java.lang.Math.floorDiv($ts, $MICROS_PER_SECOND)" private[this] def timestampToDoubleCode(ts: ExprValue): Block = From 9d3948358c15d68fbfcafc59b7ca0d380c66e83b Mon Sep 17 00:00:00 2001 From: GuoPhilipse <46367746+GuoPhilipse@users.noreply.github.com> Date: Mon, 18 May 2020 16:34:15 +0800 Subject: [PATCH 3/5] SPARK-31710:Add compatibility flag to cast long to timestamp As we know,long datatype is interpreted as milliseconds when conversion to timestamp in hive, while long is interpreted as seconds when conversion to timestamp in spark, we have many sqls runing in product, so we need a compatibility flag to make them migrating smoothly ,meanwhile do not change the user behavior in spark. --- .../sql/catalyst/expressions/CastSuite.scala | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index ee94f3587b55..fabe7ff6654e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -1341,4 +1341,33 @@ class AnsiCastSuite extends CastSuiteBase { cast("abc.com", dataType), "invalid input") } } + + test("SPARK-31710:Add compatibility flag to cast long to timestamp") { + withSQLConf( + SQLConf.LONG_TIMESTAMP_CONVERSION_IN_SECONDS.key -> "false") { + for (tz <- ALL_TIMEZONES) { + def checkLongToTimestamp(str: Long, expected: Long): Unit = { + checkEvaluation(cast(str, TimestampType, Option(tz.getID)), expected) + } + checkLongToTimestamp(253402272000L, 253402272000000L) + checkLongToTimestamp(-5L, -5000L) + checkLongToTimestamp(1L, 1000L) + checkLongToTimestamp(0L, 0L) + checkLongToTimestamp(123L, 123000L) + } + } + withSQLConf( + SQLConf.LONG_TIMESTAMP_CONVERSION_IN_SECONDS.key -> "true") { + for (tz <- ALL_TIMEZONES) { + def checkLongToTimestamp(str: Long, expected: Long): Unit = { + checkEvaluation(cast(str, TimestampType, Option(tz.getID)), expected) + } + checkLongToTimestamp(253402272000L, 253402272000000000L) + checkLongToTimestamp(-5L, -5000000L) + checkLongToTimestamp(1L, 1000000L) + checkLongToTimestamp(0L, 0L) + checkLongToTimestamp(123L, 123000000L) + } + } + } } From 0557f7397328398b80236fed56ed5a6cb3ca3164 Mon Sep 17 00:00:00 2001 From: GuoPhilipse <46367746+GuoPhilipse@users.noreply.github.com> Date: Mon, 18 May 2020 17:55:06 +0800 Subject: [PATCH 4/5] [SPARK-31710][SQL]Add compatibility flag to cast long to timestamp As we know,long datatype is interpreted as milliseconds when conversion to timestamp in hive, while long is interpreted as seconds when conversion to timestamp in spark, we have been facing error data during migrating hive sql to spark sql. with compatibility flag we can fix this error, --- .../spark/sql/catalyst/expressions/Cast.scala | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index cdd2d5fb2ef8..5e19c4faf928 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -455,13 +455,9 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit // SPARK-31710 converting seconds to us,Add compatibility flag private[this] def longToTimestamp(t: Long): Long = { - if ( SQLConf.get.getConf( SQLConf.LONG_TIMESTAMP_CONVERSION_IN_SECONDS ) ) - (t * MICROS_PER_SECOND).toLong - else - (t * MICROS_PER_MILLI).toLong + if ( SQLConf.get.getConf( SQLConf.LONG_TIMESTAMP_CONVERSION_IN_SECONDS )) t * 1000000L + else t * 1000L } - - private[this] def longToTimestamp(t: Long): Long = SECONDS.toMicros(t) // converting us to seconds private[this] def timestampToLong(ts: Long): Long = { Math.floorDiv(ts, MICROS_PER_SECOND) @@ -1287,10 +1283,8 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit // SPARK-31710 converting seconds to us,Add compatibility flag private[this] def longToTimeStampCode(l: ExprValue): Block = { - if ( SQLConf.get.getConf( SQLConf.LONG_TIMESTAMP_CONVERSION_IN_SECONDS ) ) - code"$l * (long)$MICROS_PER_SECOND" - else - code"$l * (long)$MICROS_PER_MILLI" + if (SQLConf.get.getConf(SQLConf.LONG_TIMESTAMP_CONVERSION_IN_SECONDS)) code"$l * 1000000L" + else code"$l * 1000L" } private[this] def timestampToLongCode(ts: ExprValue): Block = From 1e312659d73b2f54d8acb4a380cf42a6f241bff5 Mon Sep 17 00:00:00 2001 From: GuoPhilipse <46367746+GuoPhilipse@users.noreply.github.com> Date: Mon, 18 May 2020 17:59:04 +0800 Subject: [PATCH 5/5] [SPARK-31710][SQL]Add compatibility flag to cast long to timestamp As we know,long datatype is interpreted as milliseconds when conversion to timestamp in hive, while long is interpreted as seconds when conversion to timestamp in spark, we have been facing error data during migrating hive sql to spark sql. with compatibility flag we can fix this error, --- .../src/main/scala/org/apache/spark/sql/internal/SQLConf.scala | 2 -- 1 file changed, 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 1a1e16234c73..ea91fa51efc5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -3174,10 +3174,8 @@ class SQLConf extends Serializable with Logging { def integerGroupingIdEnabled: Boolean = getConf(SQLConf.LEGACY_INTEGER_GROUPING_ID) - def longTimestampConversionInSeconds: Boolean = getConf(LONG_TIMESTAMP_CONVERSION_IN_SECONDS) - /** ********************** SQLConf functionality methods ************ */ /** Set Spark SQL configuration properties. */