Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -149,12 +149,15 @@ object RandomDataGenerator {
* @param dataType the type to generate values for
* @param nullable whether null values should be generated
* @param rand an optional random number generator
* @param validJulianDatetime whether to generate dates and timestamps that are valid
* in the Julian calendar.
* @return a function which can be called to generate random values.
*/
def forType(
dataType: DataType,
nullable: Boolean = true,
rand: Random = new Random): Option[() => Any] = {
rand: Random = new Random,
validJulianDatetime: Boolean = false): Option[() => Any] = {
val valueGenerator: Option[() => Any] = dataType match {
case StringType => Some(() => rand.nextString(rand.nextInt(MAX_STR_LEN)))
case BinaryType => Some(() => {
Expand Down Expand Up @@ -182,29 +185,37 @@ object RandomDataGenerator {
"1970-01-01", // the epoch date
"9999-12-31" // the last supported date according to SQL standard
)
def getRandomDate(rand: Random): java.sql.Date = {
val date = DateTimeUtils.toJavaDate(uniformDaysRand(rand))
// The generated `date` is based on the hybrid calendar Julian + Gregorian since
// 1582-10-15 but it should be valid in Proleptic Gregorian calendar too which is used
// by Spark SQL since version 3.0 (see SPARK-26651). We try to convert `date` to
// a local date in Proleptic Gregorian calendar to satisfy this requirement. Some
// years are leap years in Julian calendar but not in Proleptic Gregorian calendar.
// As the consequence of that, 29 February of such years might not exist in Proleptic
// Gregorian calendar. When this happens, we shift the date by one day.
Try { date.toLocalDate; date }.getOrElse(new Date(date.getTime + MILLIS_PER_DAY))
}
if (SQLConf.get.getConf(SQLConf.DATETIME_JAVA8API_ENABLED)) {
randomNumeric[LocalDate](
rand,
(rand: Random) => LocalDate.ofEpochDay(uniformDaysRand(rand)),
(rand: Random) => {
val days = if (validJulianDatetime) {
DateTimeUtils.fromJavaDate(getRandomDate(rand))
} else {
uniformDaysRand(rand)
}
LocalDate.ofEpochDay(days)
},
specialDates.map(LocalDate.parse))
} else {
randomNumeric[java.sql.Date](
rand,
(rand: Random) => {
val date = DateTimeUtils.toJavaDate(uniformDaysRand(rand))
// The generated `date` is based on the hybrid calendar Julian + Gregorian since
// 1582-10-15 but it should be valid in Proleptic Gregorian calendar too which is used
// by Spark SQL since version 3.0 (see SPARK-26651). We try to convert `date` to
// a local date in Proleptic Gregorian calendar to satisfy this requirement. Some
// years are leap years in Julian calendar but not in Proleptic Gregorian calendar.
// As the consequence of that, 29 February of such years might not exist in Proleptic
// Gregorian calendar. When this happens, we shift the date by one day.
Try { date.toLocalDate; date }.getOrElse(new Date(date.getTime + MILLIS_PER_DAY))
},
getRandomDate,
specialDates.map(java.sql.Date.valueOf))
}
case TimestampType =>
def uniformMicorsRand(rand: Random): Long = {
def uniformMicrosRand(rand: Random): Long = {
var milliseconds = rand.nextLong() % 253402329599999L
// -62135740800000L is the number of milliseconds before January 1, 1970, 00:00:00 GMT
// for "0001-01-01 00:00:00.000000". We need to find a
Expand All @@ -222,29 +233,37 @@ object RandomDataGenerator {
"1970-01-01 00:00:00", // the epoch timestamp
"9999-12-31 23:59:59" // the last supported timestamp according to SQL standard
)
def getRandomTimestamp(rand: Random): java.sql.Timestamp = {
// DateTimeUtils.toJavaTimestamp takes microsecond.
val ts = DateTimeUtils.toJavaTimestamp(uniformMicrosRand(rand))
// The generated `ts` is based on the hybrid calendar Julian + Gregorian since
// 1582-10-15 but it should be valid in Proleptic Gregorian calendar too which is used
// by Spark SQL since version 3.0 (see SPARK-26651). We try to convert `ts` to
// a local timestamp in Proleptic Gregorian calendar to satisfy this requirement. Some
// years are leap years in Julian calendar but not in Proleptic Gregorian calendar.
// As the consequence of that, 29 February of such years might not exist in Proleptic
// Gregorian calendar. When this happens, we shift the timestamp `ts` by one day.
Try { ts.toLocalDateTime; ts }.getOrElse(new Timestamp(ts.getTime + MILLIS_PER_DAY))
}
if (SQLConf.get.getConf(SQLConf.DATETIME_JAVA8API_ENABLED)) {
randomNumeric[Instant](
rand,
(rand: Random) => DateTimeUtils.microsToInstant(uniformMicorsRand(rand)),
(rand: Random) => {
val micros = if (validJulianDatetime) {
DateTimeUtils.fromJavaTimestamp(getRandomTimestamp(rand))
} else {
uniformMicrosRand(rand)
}
DateTimeUtils.microsToInstant(micros)
},
specialTs.map { s =>
val ldt = LocalDateTime.parse(s.replace(" ", "T"))
ldt.atZone(ZoneId.systemDefault()).toInstant
})
} else {
randomNumeric[java.sql.Timestamp](
rand,
(rand: Random) => {
// DateTimeUtils.toJavaTimestamp takes microsecond.
val ts = DateTimeUtils.toJavaTimestamp(uniformMicorsRand(rand))
// The generated `ts` is based on the hybrid calendar Julian + Gregorian since
// 1582-10-15 but it should be valid in Proleptic Gregorian calendar too which is used
// by Spark SQL since version 3.0 (see SPARK-26651). We try to convert `ts` to
// a local timestamp in Proleptic Gregorian calendar to satisfy this requirement. Some
// years are leap years in Julian calendar but not in Proleptic Gregorian calendar.
// As the consequence of that, 29 February of such years might not exist in Proleptic
// Gregorian calendar. When this happens, we shift the timestamp `ts` by one day.
Try { ts.toLocalDateTime; ts }.getOrElse(new Timestamp(ts.getTime + MILLIS_PER_DAY))
},
getRandomTimestamp,
specialTs.map(java.sql.Timestamp.valueOf))
}
case CalendarIntervalType => Some(() => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package org.apache.spark.sql.sources

import java.io.File
import java.util.Locale

import scala.util.Random

Expand Down Expand Up @@ -160,7 +161,17 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with Tes
val dataGenerator = RandomDataGenerator.forType(
dataType = dataType,
nullable = true,
new Random(seed)
new Random(seed),
// TODO(SPARK-34440): Allow saving/loading datetime in ORC w/o rebasing
// The ORC datasource always performs datetime rebasing that can lead to
// shifting of the original dates/timestamps. For instance, 1582-10-06 is valid
// date in the Proleptic Gregorian calendar but it does not exist in the Julian
// calendar. The ORC datasource shifts the date to the next valid date 1582-10-15
// during rebasing of this date to the Julian calendar. Since the test compares
// the original date before saving and the date loaded back from the ORC files,
// we set `validJulianDatetime` to `true` to generate only Proleptic Gregorian
// dates that exist in the Julian calendar and will be not changed during rebase.
validJulianDatetime = dataSourceName.toLowerCase(Locale.ROOT).contains("orc")
).getOrElse {
fail(s"Failed to create data generator for schema $dataType")
}
Expand Down