diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala index b611ffa198b17..76ae3e5e8469a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala @@ -58,12 +58,15 @@ class Iso8601DateFormatter( try { val localDate = toLocalDate(formatter.parse(s)) localDateToDays(localDate) - } catch checkDiffResult(s, legacyFormatter.parse) + } catch checkParsedDiff(s, legacyFormatter.parse) } } override def format(localDate: LocalDate): String = { - localDate.format(formatter) + try { + localDate.format(formatter) + } catch checkFormattedDiff(toJavaDate(localDateToDays(localDate)), + (d: Date) => format(d)) } override def format(days: Int): String = { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala index 8e5c8651c8c36..992a2b12a462f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala @@ -21,7 +21,7 @@ import java.time._ import java.time.chrono.IsoChronology import java.time.format.{DateTimeFormatter, DateTimeFormatterBuilder, ResolverStyle} import java.time.temporal.{ChronoField, TemporalAccessor, TemporalQueries} -import java.util.Locale +import java.util.{Date, Locale} import com.google.common.cache.CacheBuilder @@ -109,13 +109,17 @@ trait DateTimeFormatterHelper { formatter } + private def needConvertToSparkUpgradeException(e: Throwable): Boolean = e match { + case _: DateTimeException if SQLConf.get.legacyTimeParserPolicy == EXCEPTION => true + case _ => false + } // When legacy time parser policy set to EXCEPTION, check whether we will get different results // between legacy parser and new parser. If new parser fails but legacy parser works, throw a // SparkUpgradeException. On the contrary, if the legacy policy set to CORRECTED, // DateTimeParseException will address by the caller side. - protected def checkDiffResult[T]( + protected def checkParsedDiff[T]( s: String, legacyParseFunc: String => T): PartialFunction[Throwable, T] = { - case e: DateTimeException if SQLConf.get.legacyTimeParserPolicy == EXCEPTION => + case e if needConvertToSparkUpgradeException(e) => try { legacyParseFunc(s) } catch { @@ -126,6 +130,25 @@ trait DateTimeFormatterHelper { s"before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.", e) } + // When legacy time parser policy set to EXCEPTION, check whether we will get different results + // between legacy formatter and new formatter. If new formatter fails but legacy formatter works, + // throw a SparkUpgradeException. On the contrary, if the legacy policy set to CORRECTED, + // DateTimeParseException will address by the caller side. + protected def checkFormattedDiff[T <: Date]( + d: T, + legacyFormatFunc: T => String): PartialFunction[Throwable, String] = { + case e if needConvertToSparkUpgradeException(e) => + val resultCandidate = try { + legacyFormatFunc(d) + } catch { + case _: Throwable => throw e + } + throw new SparkUpgradeException("3.0", s"Fail to format it to '$resultCandidate' in the new" + + s" formatter. You can set ${SQLConf.LEGACY_TIME_PARSER_POLICY.key} to LEGACY to restore" + + " the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid" + + " datetime string.", e) + } + /** * When the new DateTimeFormatter failed to initialize because of invalid datetime pattern, it * will throw IllegalArgumentException. If the pattern can be recognized by the legacy formatter @@ -137,7 +160,6 @@ trait DateTimeFormatterHelper { * @param tryLegacyFormatter a func to capture exception, identically which forces a legacy * datetime formatter to be initialized */ - protected def checkLegacyFormatter( pattern: String, tryLegacyFormatter: => Unit): PartialFunction[Throwable, DateTimeFormatter] = { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala index 11dcdec7356f6..f3b589657b254 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala @@ -84,12 +84,15 @@ class Iso8601TimestampFormatter( val microsOfSecond = zonedDateTime.get(MICRO_OF_SECOND) Math.addExact(SECONDS.toMicros(epochSeconds), microsOfSecond) - } catch checkDiffResult(s, legacyFormatter.parse) + } catch checkParsedDiff(s, legacyFormatter.parse) } } override def format(instant: Instant): String = { - formatter.withZone(zoneId).format(instant) + try { + formatter.withZone(zoneId).format(instant) + } catch checkFormattedDiff(toJavaTimestamp(instantToMicros(instant)), + (t: Timestamp) => format(t)) } override def format(us: Long): String = { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala index 02333a3eb9fc5..e70f805b30f39 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala @@ -418,4 +418,19 @@ class TimestampFormatterSuite extends DatetimeFormatterSuite { val t5 = f3.parse("AM") assert(t5 === date(1970)) } + + test("check result differences for datetime formatting") { + val formatter = TimestampFormatter("DD", UTC, isParsing = false) + assert(formatter.format(date(1970, 1, 3)) == "03") + assert(formatter.format(date(1970, 4, 9)) == "99") + + if (System.getProperty("java.version").split("\\D+")(0).toInt < 9) { + // https://bugs.openjdk.java.net/browse/JDK-8079628 + intercept[SparkUpgradeException] { + formatter.format(date(1970, 4, 10)) + } + } else { + assert(formatter.format(date(1970, 4, 10)) == "100") + } + } }