Skip to content
Closed
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1033,6 +1033,40 @@ object DateTimeUtils {
instantToMicros(localDateTime.atZone(ZoneId.systemDefault).toInstant)
}

/**
* Rebases days since the epoch from an original to an target calendar, from instance
* from a hybrid (Julian + Gregorian) to Proleptic Gregorian calendar.
*
* It finds the latest switch day which is less than `days`, and adds the difference
* in days associated with the switch day to the given `days`. The function is based
* on linear search which starts from the most recent switch days. This allows to perform
* less comparisons for modern dates.
*
* @param switchDays The days when difference in days between original and target
* calendar was changed.
* @param diffs The differences in days between calendars.
* @param days The number of days since the epoch 1970-01-01 to be rebased to the
* target calendar.
* @return The rebased day
*/
private def rebaseDays(switchDays: Array[Int], diffs: Array[Int], days: Int): Int = {
var i = switchDays.length - 1
while (i >= 0 && days < switchDays(i)) {
i -= 1
}
val rebased = days + diffs(if (i < 0) 0 else i)
rebased
}

// The differences in days between Julian and Proleptic Gregorian dates.
// The diff at the index `i` is applicable for all days in the date interval:
// [julianGregDiffSwitchDay(i), julianGregDiffSwitchDay(i+1))
Comment thread
cloud-fan marked this conversation as resolved.
private val julianGregDiffs = Array(2, 1, 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, 0)
// The sorted days when difference in days between Julian and Proleptic

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sorted days -> sorted days in Julian calendar?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changed here and below

// Gregorian calendars was changed.
private val julianGregDiffSwitchDay = Array(
-719164, -682945, -646420, -609895, -536845, -500320, -463795,
-390745, -354220, -317695, -244645, -208120, -171595, -141427)
/**
* Converts the given number of days since the epoch day 1970-01-01 to
* a local date in Julian calendar, interprets the result as a local
Expand All @@ -1043,25 +1077,18 @@ object DateTimeUtils {
* @return The rebased number of days in Gregorian calendar.
*/
def rebaseJulianToGregorianDays(days: Int): Int = {
val utcCal = new Calendar.Builder()
// `gregory` is a hybrid calendar that supports both
// the Julian and Gregorian calendar systems
.setCalendarType("gregory")
.setTimeZone(TimeZoneUTC)
.setInstant(Math.multiplyExact(days, MILLIS_PER_DAY))
.build()
val localDate = LocalDate.of(
utcCal.get(Calendar.YEAR),
utcCal.get(Calendar.MONTH) + 1,
// The number of days will be added later to handle non-existing
// Julian dates in Proleptic Gregorian calendar.
// For example, 1000-02-29 exists in Julian calendar because 1000
// is a leap year but it is not a leap year in Gregorian calendar.
1)
.plusDays(utcCal.get(Calendar.DAY_OF_MONTH) - 1)
Math.toIntExact(localDate.toEpochDay)
rebaseDays(julianGregDiffSwitchDay, julianGregDiffs, days)
}

// The differences in days between Proleptic Gregorian and Julian dates.
// The diff at the index `i` is applicable for all days in the date interval:
// [gregJulianDiffSwitchDay(i), gregJulianDiffSwitchDay(i+1))
private val grepJulianDiffs = Array(-2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0)
// The sorted days when difference in days between Proleptic
// Gregorian and Julian was changed.
private val gregJulianDiffSwitchDay = Array(
-719162, -682944, -646420, -609896, -536847, -500323, -463799,
-390750, -354226, -317702, -244653, -208129, -171605, -141427)
/**
* Rebasing days since the epoch to store the same number of days
* as by Spark 2.4 and earlier versions. Spark 3.0 switched to
Expand All @@ -1079,14 +1106,6 @@ object DateTimeUtils {
* @return The rebased number of days since the epoch in Julian calendar.
*/
def rebaseGregorianToJulianDays(days: Int): Int = {
val localDate = LocalDate.ofEpochDay(days)
val utcCal = new Calendar.Builder()
// `gregory` is a hybrid calendar that supports both
// the Julian and Gregorian calendar systems
.setCalendarType("gregory")
.setTimeZone(TimeZoneUTC)
.setDate(localDate.getYear, localDate.getMonthValue - 1, localDate.getDayOfMonth)
.build()
Math.toIntExact(Math.floorDiv(utcCal.getTimeInMillis, MILLIS_PER_DAY))
rebaseDays(gregJulianDiffSwitchDay, grepJulianDiffs, days)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.util
import java.sql.{Date, Timestamp}
import java.text.SimpleDateFormat
import java.time.{Instant, LocalDate, LocalDateTime, LocalTime, ZoneId}
import java.util.{Locale, TimeZone}
import java.util.{Calendar, Locale, TimeZone}
import java.util.concurrent.TimeUnit

import org.scalatest.Matchers
Expand Down Expand Up @@ -765,4 +765,60 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
}
}
}

test("optimization of days rebasing - Gregorian to Julian") {
def refRebaseGregorianToJulianDays(days: Int): Int = {
val localDate = LocalDate.ofEpochDay(days)
val utcCal = new Calendar.Builder()
// `gregory` is a hybrid calendar that supports both
// the Julian and Gregorian calendar systems
.setCalendarType("gregory")
.setTimeZone(TimeZoneUTC)
.setDate(localDate.getYear, localDate.getMonthValue - 1, localDate.getDayOfMonth)
.build()
Math.toIntExact(Math.floorDiv(utcCal.getTimeInMillis, MILLIS_PER_DAY))
}

val start = localDateToDays(LocalDate.of(1, 1, 1))
val end = localDateToDays(LocalDate.of(2030, 1, 1))

var days = start
while (days < end) {
assert(rebaseGregorianToJulianDays(days) === refRebaseGregorianToJulianDays(days))
days += 1
}
}

test("optimization of days rebasing - Julian to Gregorian") {
def refRebaseJulianToGregorianDays(days: Int): Int = {
val utcCal = new Calendar.Builder()
// `gregory` is a hybrid calendar that supports both
// the Julian and Gregorian calendar systems
.setCalendarType("gregory")
.setTimeZone(TimeZoneUTC)
.setInstant(Math.multiplyExact(days, MILLIS_PER_DAY))
.build()
val localDate = LocalDate.of(
utcCal.get(Calendar.YEAR),
utcCal.get(Calendar.MONTH) + 1,
// The number of days will be added later to handle non-existing
// Julian dates in Proleptic Gregorian calendar.
// For example, 1000-02-29 exists in Julian calendar because 1000
// is a leap year but it is not a leap year in Gregorian calendar.
1)
.plusDays(utcCal.get(Calendar.DAY_OF_MONTH) - 1)
Math.toIntExact(localDate.toEpochDay)
}

val start = rebaseGregorianToJulianDays(
localDateToDays(LocalDate.of(1, 1, 1)))
val end = rebaseGregorianToJulianDays(
localDateToDays(LocalDate.of(2030, 1, 1)))

var days = start
while (days < end) {
assert(rebaseJulianToGregorianDays(days) === refRebaseJulianToGregorianDays(days))
days += 1
}
}
}