diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/RebaseDateTime.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/RebaseDateTime.scala index cc75340cd8fcd..c31dc624b0611 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/RebaseDateTime.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/RebaseDateTime.scala @@ -326,20 +326,34 @@ object RebaseDateTime { */ private[sql] def rebaseGregorianToJulianMicros(zoneId: ZoneId, micros: Long): Long = { val instant = microsToInstant(micros) - var ldt = instant.atZone(zoneId).toLocalDateTime + val zonedDateTime = instant.atZone(zoneId) + var ldt = zonedDateTime.toLocalDateTime if (ldt.isAfter(julianEndTs) && ldt.isBefore(gregorianStartTs)) { ldt = LocalDateTime.of(gregorianStartDate, ldt.toLocalTime) } val cal = new Calendar.Builder() - // `gregory` is a hybrid calendar that supports both - // the Julian and Gregorian calendar systems + // `gregory` is a hybrid calendar that supports both the Julian and Gregorian calendar systems .setCalendarType("gregory") .setDate(ldt.getYear, ldt.getMonthValue - 1, ldt.getDayOfMonth) .setTimeOfDay(ldt.getHour, ldt.getMinute, ldt.getSecond) - // Local time-line can overlaps, such as at an autumn daylight savings cutover. - // This setting selects the original local timestamp mapped to the given `micros`. - .set(Calendar.DST_OFFSET, zoneId.getRules.getDaylightSavings(instant).toMillis.toInt) .build() + // A local timestamp can have 2 instants in the cases of switching from: + // 1. Summer to winter time. + // 2. One standard time zone to another one. For example, Asia/Hong_Kong switched from JST + // to HKT on 18 November, 1945 01:59:59 AM. + // Below we check that the original `instant` is earlier or later instant. If it is an earlier + // instant, we take the standard and DST offsets of the previous day otherwise of the next one. + val trans = zoneId.getRules.getTransition(ldt) + if (trans != null && trans.isOverlap) { + val cloned = cal.clone().asInstanceOf[Calendar] + // Does the current offset belong to the offset before the transition. + // If so, we will take zone offsets from the previous day otherwise from the next day. + // This assumes that transitions cannot happen often than once per 2 days. + val shift = if (trans.getOffsetBefore == zonedDateTime.getOffset) -1 else 1 + cloned.add(Calendar.DAY_OF_MONTH, shift) + cal.set(Calendar.ZONE_OFFSET, cloned.get(Calendar.ZONE_OFFSET)) + cal.set(Calendar.DST_OFFSET, cloned.get(Calendar.DST_OFFSET)) + } millisToMicros(cal.getTimeInMillis) + ldt.get(ChronoField.MICRO_OF_SECOND) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/RebaseDateTimeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/RebaseDateTimeSuite.scala index cb5f8e43d762f..e3af64a562e23 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/RebaseDateTimeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/RebaseDateTimeSuite.scala @@ -409,4 +409,40 @@ class RebaseDateTimeSuite extends SparkFunSuite with Matchers with SQLHelper { } } } + + test("SPARK-31959: JST -> HKT at Asia/Hong_Kong in 1945") { + // The 'Asia/Hong_Kong' time zone switched from 'Japan Standard Time' (JST = UTC+9) + // to 'Hong Kong Time' (HKT = UTC+8). After Sunday, 18 November, 1945 01:59:59 AM, + // clocks were moved backward to become Sunday, 18 November, 1945 01:00:00 AM. + // In this way, the overlap happened w/o Daylight Saving Time. + val hkZid = getZoneId("Asia/Hong_Kong") + withDefaultTimeZone(hkZid) { + var expected = "1945-11-18 01:30:00.0" + var ldt = LocalDateTime.of(1945, 11, 18, 1, 30, 0) + var earlierMicros = instantToMicros(ldt.atZone(hkZid).withEarlierOffsetAtOverlap().toInstant) + var laterMicros = instantToMicros(ldt.atZone(hkZid).withLaterOffsetAtOverlap().toInstant) + if (earlierMicros + MICROS_PER_HOUR != laterMicros) { + // Old JDK might have an outdated time zone database. + // See https://bugs.openjdk.java.net/browse/JDK-8228469: "Hong Kong ... Its 1945 transition + // from JST to HKT was on 11-18 at 02:00, not 09-15 at 00:00" + expected = "1945-09-14 23:30:00.0" + ldt = LocalDateTime.of(1945, 9, 14, 23, 30, 0) + earlierMicros = instantToMicros(ldt.atZone(hkZid).withEarlierOffsetAtOverlap().toInstant) + laterMicros = instantToMicros(ldt.atZone(hkZid).withLaterOffsetAtOverlap().toInstant) + assert(earlierMicros + MICROS_PER_HOUR === laterMicros) + } + val rebasedEarlierMicros = rebaseGregorianToJulianMicros(hkZid, earlierMicros) + val rebasedLaterMicros = rebaseGregorianToJulianMicros(hkZid, laterMicros) + def toTsStr(micros: Long): String = toJavaTimestamp(micros).toString + assert(toTsStr(rebasedEarlierMicros) === expected) + assert(toTsStr(rebasedLaterMicros) === expected) + assert(rebasedEarlierMicros + MICROS_PER_HOUR === rebasedLaterMicros) + // Check optimized rebasing + assert(rebaseGregorianToJulianMicros(earlierMicros) === rebasedEarlierMicros) + assert(rebaseGregorianToJulianMicros(laterMicros) === rebasedLaterMicros) + // Check reverse rebasing + assert(rebaseJulianToGregorianMicros(rebasedEarlierMicros) === earlierMicros) + assert(rebaseJulianToGregorianMicros(rebasedLaterMicros) === laterMicros) + } + } }