Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ class UnivocityParser(
case NonFatal(e) =>
// If fails to parse, then tries the way used in 2.0 and 1.x for backwards
// compatibility.
val str = UTF8String.fromString(DateTimeUtils.cleanLegacyTimestampStr(datum))
val str = DateTimeUtils.cleanLegacyTimestampStr(UTF8String.fromString(datum))
DateTimeUtils.stringToTimestamp(str, options.zoneId).getOrElse(throw e)
}
}
Expand All @@ -199,7 +199,7 @@ class UnivocityParser(
case NonFatal(e) =>
// If fails to parse, then tries the way used in 2.0 and 1.x for backwards
// compatibility.
val str = UTF8String.fromString(DateTimeUtils.cleanLegacyTimestampStr(datum))
val str = DateTimeUtils.cleanLegacyTimestampStr(UTF8String.fromString(datum))
DateTimeUtils.stringToDate(str, options.zoneId).getOrElse(throw e)
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ class JacksonParser(
case NonFatal(e) =>
// If fails to parse, then tries the way used in 2.0 and 1.x for backwards
// compatibility.
val str = UTF8String.fromString(DateTimeUtils.cleanLegacyTimestampStr(parser.getText))
val str = DateTimeUtils.cleanLegacyTimestampStr(UTF8String.fromString(parser.getText))
DateTimeUtils.stringToTimestamp(str, options.zoneId).getOrElse(throw e)
}

Expand All @@ -255,7 +255,7 @@ class JacksonParser(
case NonFatal(e) =>
// If fails to parse, then tries the way used in 2.0 and 1.x for backwards
// compatibility.
val str = UTF8String.fromString(DateTimeUtils.cleanLegacyTimestampStr(parser.getText))
val str = DateTimeUtils.cleanLegacyTimestampStr(UTF8String.fromString(parser.getText))
DateTimeUtils.stringToDate(str, options.zoneId).getOrElse {
// In Spark 1.5.0, we store the data as number of days since epoch in string.
// So, we just convert it to Int.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -203,20 +203,10 @@ object DateTimeUtils {
Math.multiplyExact(millis, MICROS_PER_MILLIS)
}

private final val gmtUtf8 = UTF8String.fromString("GMT")
// The method is called by JSON/CSV parser to clean up the legacy timestamp string by removing
// the "GMT" string.
def cleanLegacyTimestampStr(s: String): String = {
val indexOfGMT = s.indexOf("GMT")
if (indexOfGMT != -1) {
// ISO8601 with a weird time zone specifier (2000-01-01T00:00GMT+01:00)
val s0 = s.substring(0, indexOfGMT)
val s1 = s.substring(indexOfGMT + 3)
// Mapped to 2000-01-01T00:00+01:00
s0 + s1
} else {
s
}
}
// the "GMT" string. For example, it returns 2000-01-01T00:00+01:00 for 2000-01-01T00:00GMT+01:00.
def cleanLegacyTimestampStr(s: UTF8String): UTF8String = s.replace(gmtUtf8, UTF8String.EMPTY_UTF8)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

doesn't the java.lang.String have the replace method?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It has but look at how it is implemented via regexp. @JoshRosen implemented more effective replace in UTF8String #24707. That's why I took it. I hope it seems reasonable.


/**
* Trims and parses a given UTF8 timestamp string to the corresponding a corresponding [[Long]]
Expand Down