diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala index 05ec23f7ad47..04e4c99c445c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.util import java.time._ import java.time.chrono.IsoChronology import java.time.format.{DateTimeFormatter, DateTimeFormatterBuilder, DateTimeParseException, ResolverStyle} -import java.time.temporal.{ChronoField, TemporalAccessor, TemporalQueries} +import java.time.temporal.{ChronoField, TemporalAccessor, TemporalQueries, WeekFields} import java.util.Locale import com.google.common.cache.CacheBuilder @@ -99,12 +99,35 @@ private object DateTimeFormatterHelper { new DateTimeFormatterBuilder().parseCaseInsensitive() } - def toFormatter(builder: DateTimeFormatterBuilder, locale: Locale): DateTimeFormatter = { - builder - .parseDefaulting(ChronoField.MONTH_OF_YEAR, 1) - .parseDefaulting(ChronoField.DAY_OF_MONTH, 1) - .parseDefaulting(ChronoField.MINUTE_OF_HOUR, 0) - .parseDefaulting(ChronoField.SECOND_OF_MINUTE, 0) + def setDefaulting(builder: DateTimeFormatterBuilder, + locale: Locale, + isWeekBased: Boolean = false): DateTimeFormatterBuilder = { + + val defaults = Seq( + (ChronoField.ERA, 1), + (ChronoField.MINUTE_OF_HOUR, 0), + (ChronoField.SECOND_OF_MINUTE, 0) + ) + + val weekDayBased = if (isWeekBased) { + Seq((ChronoField.DAY_OF_WEEK, + WeekFields.of(locale).getFirstDayOfWeek.getValue)) + } else { + Seq( + (ChronoField.MONTH_OF_YEAR, 1), + (ChronoField.DAY_OF_MONTH, 1)) + } + + (defaults ++ weekDayBased).foldLeft(builder) { + case (builder, (chrono, value)) => + builder.parseDefaulting(chrono, value) + } + } + + def toFormatter(builder: DateTimeFormatterBuilder, + locale: Locale, + isWeekBased: Boolean = false): DateTimeFormatter = { + setDefaulting(builder, locale, isWeekBased) .toFormatter(locale) .withChronology(IsoChronology.INSTANCE) .withResolverStyle(ResolverStyle.STRICT) @@ -139,6 +162,13 @@ private object DateTimeFormatterHelper { builder } + def isWeekBasedPattern(pattern: String): Boolean = { + // Default values for parser needs to be changed when the pattern + // is week/year based. + // DAY_OF_MONTH and MONTH_OF_YEAR will be added by the week of the year. + Seq("YY", "w").exists(pattern.contains) + } + def buildFormatter( pattern: String, locale: Locale, @@ -148,7 +178,7 @@ private object DateTimeFormatterHelper { } else { createBuilder().appendPattern(pattern) } - toFormatter(builder, locale) + toFormatter(builder, locale, isWeekBasedPattern(pattern)) } lazy val fractionFormatter: DateTimeFormatter = { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala index d9b508a563a8..f6b54e991956 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala @@ -845,6 +845,10 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { val sdf3 = new SimpleDateFormat(fmt3, Locale.US) sdf3.setTimeZone(TimeZone.getTimeZone(UTC)) + // Week of Year relies on Locale to define the first day of the week. + val fmt4 = "YYYY-ww" + val sdf4 = new SimpleDateFormat(fmt4, Locale.US) + withDefaultTimeZone(UTC) { for (zid <- outstandingZoneIds) { val timeZoneId = Option(zid.getId) @@ -894,6 +898,11 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { Literal("2015-07-24"), Literal("not a valid format"), timeZoneId), null) + sdf4.setTimeZone(tz) + checkEvaluation( + ToUnixTimestamp(Literal("2020-06"), Literal(fmt4), timeZoneId), + sdf4.parse("2020-06").getTime/1000) + // SPARK-28072 The codegen path for non-literal input should also work checkEvaluation( expression = ToUnixTimestamp(