-
Notifications
You must be signed in to change notification settings - Fork 29.3k
[SPARK-31297][SQL] Speed up dates rebasing #28067
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
Closed
Changes from 10 commits
Commits
Show all changes
14 commits
Select commit
Hold shift + click to select a range
839b029
Optimize greg to jul days rebasing
MaxGekk cc4e3ec
Fix imports
MaxGekk 203fa54
Optimize jul to greg days rebasing
MaxGekk 3aa88bc
Add benchmark for dates
MaxGekk 2dc5be4
Linear search
MaxGekk 65f222e
Avoid unnecessary changes in DateTimeUtilsSuite
MaxGekk 5dfb27c
Merge remote-tracking branch 'remotes/origin/master' into optimize-re…
MaxGekk 89d35fd
Remove an unused import
MaxGekk 2b6d25d
Refactoring `rebaseDays`, and add comments
MaxGekk fd88c56
Refactoring and comments
MaxGekk 0152a1c
Address Wenchen's review comment
MaxGekk 08443d9
Re-gen DateTimeBenchmark results on JDK 11
MaxGekk db5badb
Re-gen DateTimeBenchmark results on JDK 8
MaxGekk b8fa18e
Add comments
MaxGekk File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1033,6 +1033,40 @@ object DateTimeUtils { | |
| instantToMicros(localDateTime.atZone(ZoneId.systemDefault).toInstant) | ||
| } | ||
|
|
||
| /** | ||
| * Rebases days since the epoch from an original to an target calendar, from instance | ||
| * from a hybrid (Julian + Gregorian) to Proleptic Gregorian calendar. | ||
| * | ||
| * It finds the latest switch day which is less than `days`, and adds the difference | ||
| * in days associated with the switch day to the given `days`. The function is based | ||
| * on linear search which starts from the most recent switch days. This allows to perform | ||
| * less comparisons for modern dates. | ||
| * | ||
| * @param switchDays The days when difference in days between original and target | ||
| * calendar was changed. | ||
| * @param diffs The differences in days between calendars. | ||
| * @param days The number of days since the epoch 1970-01-01 to be rebased to the | ||
| * target calendar. | ||
| * @return The rebased day | ||
| */ | ||
| private def rebaseDays(switchDays: Array[Int], diffs: Array[Int], days: Int): Int = { | ||
| var i = switchDays.length - 1 | ||
| while (i >= 0 && days < switchDays(i)) { | ||
| i -= 1 | ||
| } | ||
| val rebased = days + diffs(if (i < 0) 0 else i) | ||
| rebased | ||
| } | ||
|
|
||
| // The differences in days between Julian and Proleptic Gregorian dates. | ||
| // The diff at the index `i` is applicable for all days in the date interval: | ||
| // [julianGregDiffSwitchDay(i), julianGregDiffSwitchDay(i+1)) | ||
| private val julianGregDiffs = Array(2, 1, 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, 0) | ||
| // The sorted days when difference in days between Julian and Proleptic | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Changed here and below |
||
| // Gregorian calendars was changed. | ||
| private val julianGregDiffSwitchDay = Array( | ||
| -719164, -682945, -646420, -609895, -536845, -500320, -463795, | ||
| -390745, -354220, -317695, -244645, -208120, -171595, -141427) | ||
| /** | ||
| * Converts the given number of days since the epoch day 1970-01-01 to | ||
| * a local date in Julian calendar, interprets the result as a local | ||
|
|
@@ -1043,25 +1077,18 @@ object DateTimeUtils { | |
| * @return The rebased number of days in Gregorian calendar. | ||
| */ | ||
| def rebaseJulianToGregorianDays(days: Int): Int = { | ||
| val utcCal = new Calendar.Builder() | ||
| // `gregory` is a hybrid calendar that supports both | ||
| // the Julian and Gregorian calendar systems | ||
| .setCalendarType("gregory") | ||
| .setTimeZone(TimeZoneUTC) | ||
| .setInstant(Math.multiplyExact(days, MILLIS_PER_DAY)) | ||
| .build() | ||
| val localDate = LocalDate.of( | ||
| utcCal.get(Calendar.YEAR), | ||
| utcCal.get(Calendar.MONTH) + 1, | ||
| // The number of days will be added later to handle non-existing | ||
| // Julian dates in Proleptic Gregorian calendar. | ||
| // For example, 1000-02-29 exists in Julian calendar because 1000 | ||
| // is a leap year but it is not a leap year in Gregorian calendar. | ||
| 1) | ||
| .plusDays(utcCal.get(Calendar.DAY_OF_MONTH) - 1) | ||
| Math.toIntExact(localDate.toEpochDay) | ||
| rebaseDays(julianGregDiffSwitchDay, julianGregDiffs, days) | ||
| } | ||
|
|
||
| // The differences in days between Proleptic Gregorian and Julian dates. | ||
| // The diff at the index `i` is applicable for all days in the date interval: | ||
| // [gregJulianDiffSwitchDay(i), gregJulianDiffSwitchDay(i+1)) | ||
| private val grepJulianDiffs = Array(-2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0) | ||
| // The sorted days when difference in days between Proleptic | ||
| // Gregorian and Julian was changed. | ||
| private val gregJulianDiffSwitchDay = Array( | ||
| -719162, -682944, -646420, -609896, -536847, -500323, -463799, | ||
| -390750, -354226, -317702, -244653, -208129, -171605, -141427) | ||
| /** | ||
| * Rebasing days since the epoch to store the same number of days | ||
| * as by Spark 2.4 and earlier versions. Spark 3.0 switched to | ||
|
|
@@ -1079,14 +1106,6 @@ object DateTimeUtils { | |
| * @return The rebased number of days since the epoch in Julian calendar. | ||
| */ | ||
| def rebaseGregorianToJulianDays(days: Int): Int = { | ||
| val localDate = LocalDate.ofEpochDay(days) | ||
| val utcCal = new Calendar.Builder() | ||
| // `gregory` is a hybrid calendar that supports both | ||
| // the Julian and Gregorian calendar systems | ||
| .setCalendarType("gregory") | ||
| .setTimeZone(TimeZoneUTC) | ||
| .setDate(localDate.getYear, localDate.getMonthValue - 1, localDate.getDayOfMonth) | ||
| .build() | ||
| Math.toIntExact(Math.floorDiv(utcCal.getTimeInMillis, MILLIS_PER_DAY)) | ||
| rebaseDays(gregJulianDiffSwitchDay, grepJulianDiffs, days) | ||
| } | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.