diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala index 3898eca79478e..898b963fd0ab5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala @@ -186,7 +186,7 @@ class UnivocityParser( case NonFatal(e) => // If fails to parse, then tries the way used in 2.0 and 1.x for backwards // compatibility. - val str = UTF8String.fromString(DateTimeUtils.cleanLegacyTimestampStr(datum)) + val str = DateTimeUtils.cleanLegacyTimestampStr(UTF8String.fromString(datum)) DateTimeUtils.stringToTimestamp(str, options.zoneId).getOrElse(throw e) } } @@ -199,7 +199,7 @@ class UnivocityParser( case NonFatal(e) => // If fails to parse, then tries the way used in 2.0 and 1.x for backwards // compatibility. - val str = UTF8String.fromString(DateTimeUtils.cleanLegacyTimestampStr(datum)) + val str = DateTimeUtils.cleanLegacyTimestampStr(UTF8String.fromString(datum)) DateTimeUtils.stringToDate(str, options.zoneId).getOrElse(throw e) } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala index e038f777c7a41..7f69180e87e7e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala @@ -238,7 +238,7 @@ class JacksonParser( case NonFatal(e) => // If fails to parse, then tries the way used in 2.0 and 1.x for backwards // compatibility. - val str = UTF8String.fromString(DateTimeUtils.cleanLegacyTimestampStr(parser.getText)) + val str = DateTimeUtils.cleanLegacyTimestampStr(UTF8String.fromString(parser.getText)) DateTimeUtils.stringToTimestamp(str, options.zoneId).getOrElse(throw e) } @@ -255,7 +255,7 @@ class JacksonParser( case NonFatal(e) => // If fails to parse, then tries the way used in 2.0 and 1.x for backwards // compatibility. - val str = UTF8String.fromString(DateTimeUtils.cleanLegacyTimestampStr(parser.getText)) + val str = DateTimeUtils.cleanLegacyTimestampStr(UTF8String.fromString(parser.getText)) DateTimeUtils.stringToDate(str, options.zoneId).getOrElse { // In Spark 1.5.0, we store the data as number of days since epoch in string. // So, we just convert it to Int. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index c466a60259c7f..5c3d91c105094 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -203,20 +203,10 @@ object DateTimeUtils { Math.multiplyExact(millis, MICROS_PER_MILLIS) } + private final val gmtUtf8 = UTF8String.fromString("GMT") // The method is called by JSON/CSV parser to clean up the legacy timestamp string by removing - // the "GMT" string. - def cleanLegacyTimestampStr(s: String): String = { - val indexOfGMT = s.indexOf("GMT") - if (indexOfGMT != -1) { - // ISO8601 with a weird time zone specifier (2000-01-01T00:00GMT+01:00) - val s0 = s.substring(0, indexOfGMT) - val s1 = s.substring(indexOfGMT + 3) - // Mapped to 2000-01-01T00:00+01:00 - s0 + s1 - } else { - s - } - } + // the "GMT" string. For example, it returns 2000-01-01T00:00+01:00 for 2000-01-01T00:00GMT+01:00. + def cleanLegacyTimestampStr(s: UTF8String): UTF8String = s.replace(gmtUtf8, UTF8String.EMPTY_UTF8) /** * Trims and parses a given UTF8 timestamp string to the corresponding a corresponding [[Long]]