From a8f7131749ed579ec45ea586eb49baed0b938bdc Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Fri, 5 Jun 2020 18:38:54 +0800 Subject: [PATCH 1/3] [SPARK-31867][SQL][FOLLOWUP] Check result differences for datetime formatting --- .../sql/catalyst/util/DateFormatter.scala | 7 +++-- .../util/DateTimeFormatterHelper.scala | 30 ++++++++++++++++--- .../catalyst/util/TimestampFormatter.scala | 7 +++-- .../util/TimestampFormatterSuite.scala | 18 ++++++++++- 4 files changed, 53 insertions(+), 9 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala index b611ffa198b17..fc92c14fd2399 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala @@ -58,12 +58,15 @@ class Iso8601DateFormatter( try { val localDate = toLocalDate(formatter.parse(s)) localDateToDays(localDate) - } catch checkDiffResult(s, legacyFormatter.parse) + } catch checkParsedDiff(s, legacyFormatter.parse) } } override def format(localDate: LocalDate): String = { - localDate.format(formatter) + try { + localDate.format(formatter) + } catch checkDiffFormatResult(toJavaDate(localDateToDays(localDate)), + (d: Date) => format(d)) } override def format(days: Int): String = { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala index 8e5c8651c8c36..d941476bb7825 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala @@ -21,7 +21,7 @@ import java.time._ import java.time.chrono.IsoChronology import java.time.format.{DateTimeFormatter, DateTimeFormatterBuilder, ResolverStyle} import java.time.temporal.{ChronoField, TemporalAccessor, TemporalQueries} -import java.util.Locale +import java.util.{Date, Locale} import com.google.common.cache.CacheBuilder @@ -109,13 +109,17 @@ trait DateTimeFormatterHelper { formatter } + private def needConvertToSparkUpgradeException(e: Throwable): Boolean = e match { + case _: DateTimeException if SQLConf.get.legacyTimeParserPolicy == EXCEPTION => true + case _ => false + } // When legacy time parser policy set to EXCEPTION, check whether we will get different results // between legacy parser and new parser. If new parser fails but legacy parser works, throw a // SparkUpgradeException. On the contrary, if the legacy policy set to CORRECTED, // DateTimeParseException will address by the caller side. - protected def checkDiffResult[T]( + protected def checkParsedDiff[T]( s: String, legacyParseFunc: String => T): PartialFunction[Throwable, T] = { - case e: DateTimeException if SQLConf.get.legacyTimeParserPolicy == EXCEPTION => + case e if needConvertToSparkUpgradeException(e) => try { legacyParseFunc(s) } catch { @@ -126,6 +130,25 @@ trait DateTimeFormatterHelper { s"before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.", e) } + // When legacy time parser policy set to EXCEPTION, check whether we will get different results + // between legacy formatter and new formatter. If new formatter fails but legacy formatter works, + // throw a SparkUpgradeException. On the contrary, if the legacy policy set to CORRECTED, + // DateTimeParseException will address by the caller side. + protected def checkDiffFormatResult[T <: Date]( + d: T, + legacyFormatFunc: T => String): PartialFunction[Throwable, String] = { + case e if needConvertToSparkUpgradeException(e) => + val resultCandidate = try { + legacyFormatFunc(d) + } catch { + case _: Throwable => throw e + } + throw new SparkUpgradeException("3.0", s"Fail to format it to '$resultCandidate' in the new" + + s" formatter. You can set ${SQLConf.LEGACY_TIME_PARSER_POLICY.key} to LEGACY to restore" + + " the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid" + + " datetime string.", e) + } + /** * When the new DateTimeFormatter failed to initialize because of invalid datetime pattern, it * will throw IllegalArgumentException. If the pattern can be recognized by the legacy formatter @@ -137,7 +160,6 @@ trait DateTimeFormatterHelper { * @param tryLegacyFormatter a func to capture exception, identically which forces a legacy * datetime formatter to be initialized */ - protected def checkLegacyFormatter( pattern: String, tryLegacyFormatter: => Unit): PartialFunction[Throwable, DateTimeFormatter] = { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala index 11dcdec7356f6..ba449dde6168f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala @@ -84,12 +84,15 @@ class Iso8601TimestampFormatter( val microsOfSecond = zonedDateTime.get(MICRO_OF_SECOND) Math.addExact(SECONDS.toMicros(epochSeconds), microsOfSecond) - } catch checkDiffResult(s, legacyFormatter.parse) + } catch checkParsedDiff(s, legacyFormatter.parse) } } override def format(instant: Instant): String = { - formatter.withZone(zoneId).format(instant) + try { + formatter.withZone(zoneId).format(instant) + } catch checkDiffFormatResult(toJavaTimestamp(instantToMicros(instant)), + (t: Timestamp) => format(t)) } override def format(us: Long): String = { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala index 02333a3eb9fc5..30fc8b364ac0a 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala @@ -17,10 +17,11 @@ package org.apache.spark.sql.catalyst.util -import java.time.{DateTimeException, Instant, LocalDateTime, LocalTime} +import java.time.{DateTimeException, Instant, LocalDate, LocalDateTime, LocalTime} import java.util.concurrent.TimeUnit import org.apache.spark.SparkUpgradeException + import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._ import org.apache.spark.sql.catalyst.util.DateTimeUtils._ import org.apache.spark.sql.internal.SQLConf @@ -418,4 +419,19 @@ class TimestampFormatterSuite extends DatetimeFormatterSuite { val t5 = f3.parse("AM") assert(t5 === date(1970)) } + + test("check result differences for datetime formatting") { + val formatter = TimestampFormatter("DD", UTC, isParsing = false) + assert(formatter.format(date(1970, 1, 3)) == "03") + assert(formatter.format(date(1970, 4, 9)) == "99") + + if (System.getProperty("java.version").split("\\D+")(0).toInt < 9) { + // https://bugs.openjdk.java.net/browse/JDK-8079628 + intercept[SparkUpgradeException] { + formatter.format(date(1970, 4, 10)) + } + } else { + assert(formatter.format(date(1970, 4, 10)) == "100") + } + } } From bfa244f074af3f5ca8e7ffa831c6703238a44f8c Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Fri, 5 Jun 2020 19:53:01 +0800 Subject: [PATCH 2/3] style --- .../spark/sql/catalyst/util/TimestampFormatterSuite.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala index 30fc8b364ac0a..e70f805b30f39 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala @@ -17,11 +17,10 @@ package org.apache.spark.sql.catalyst.util -import java.time.{DateTimeException, Instant, LocalDate, LocalDateTime, LocalTime} +import java.time.{DateTimeException, Instant, LocalDateTime, LocalTime} import java.util.concurrent.TimeUnit import org.apache.spark.SparkUpgradeException - import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._ import org.apache.spark.sql.catalyst.util.DateTimeUtils._ import org.apache.spark.sql.internal.SQLConf From 54e70b617a2fdfe898af1b425044725bc4c698af Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Fri, 5 Jun 2020 19:54:39 +0800 Subject: [PATCH 3/3] style --- .../org/apache/spark/sql/catalyst/util/DateFormatter.scala | 2 +- .../spark/sql/catalyst/util/DateTimeFormatterHelper.scala | 2 +- .../org/apache/spark/sql/catalyst/util/TimestampFormatter.scala | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala index fc92c14fd2399..76ae3e5e8469a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala @@ -65,7 +65,7 @@ class Iso8601DateFormatter( override def format(localDate: LocalDate): String = { try { localDate.format(formatter) - } catch checkDiffFormatResult(toJavaDate(localDateToDays(localDate)), + } catch checkFormattedDiff(toJavaDate(localDateToDays(localDate)), (d: Date) => format(d)) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala index d941476bb7825..992a2b12a462f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala @@ -134,7 +134,7 @@ trait DateTimeFormatterHelper { // between legacy formatter and new formatter. If new formatter fails but legacy formatter works, // throw a SparkUpgradeException. On the contrary, if the legacy policy set to CORRECTED, // DateTimeParseException will address by the caller side. - protected def checkDiffFormatResult[T <: Date]( + protected def checkFormattedDiff[T <: Date]( d: T, legacyFormatFunc: T => String): PartialFunction[Throwable, String] = { case e if needConvertToSparkUpgradeException(e) => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala index ba449dde6168f..f3b589657b254 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala @@ -91,7 +91,7 @@ class Iso8601TimestampFormatter( override def format(instant: Instant): String = { try { formatter.withZone(zoneId).format(instant) - } catch checkDiffFormatResult(toJavaTimestamp(instantToMicros(instant)), + } catch checkFormattedDiff(toJavaTimestamp(instantToMicros(instant)), (t: Timestamp) => format(t)) }