Skip to content

Commit d39f15e

Browse files
kevincoxrxin
authored andcommitted
[SPARK-9794] [SQL] Fix datetime parsing in SparkSQL.
This fixes https://issues.apache.org/jira/browse/SPARK-9794 by using a real ISO8601 parser. (courtesy of the xml component of the standard java library) cc: angelini Author: Kevin Cox <[email protected]> Closes #8396 from kevincox/kevincox-sql-time-parsing.
1 parent 896edb5 commit d39f15e

File tree

2 files changed

+42
-17
lines changed

2 files changed

+42
-17
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala

Lines changed: 10 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.util
2020
import java.sql.{Date, Timestamp}
2121
import java.text.{DateFormat, SimpleDateFormat}
2222
import java.util.{TimeZone, Calendar}
23+
import javax.xml.bind.DatatypeConverter;
2324

2425
import org.apache.spark.unsafe.types.UTF8String
2526

@@ -109,30 +110,22 @@ object DateTimeUtils {
109110
}
110111

111112
def stringToTime(s: String): java.util.Date = {
112-
if (!s.contains('T')) {
113+
var indexOfGMT = s.indexOf("GMT");
114+
if (indexOfGMT != -1) {
115+
// ISO8601 with a weird time zone specifier (2000-01-01T00:00GMT+01:00)
116+
val s0 = s.substring(0, indexOfGMT)
117+
val s1 = s.substring(indexOfGMT + 3)
118+
// Mapped to 2000-01-01T00:00+01:00
119+
stringToTime(s0 + s1)
120+
} else if (!s.contains('T')) {
113121
// JDBC escape string
114122
if (s.contains(' ')) {
115123
Timestamp.valueOf(s)
116124
} else {
117125
Date.valueOf(s)
118126
}
119-
} else if (s.endsWith("Z")) {
120-
// this is zero timezone of ISO8601
121-
stringToTime(s.substring(0, s.length - 1) + "GMT-00:00")
122-
} else if (s.indexOf("GMT") == -1) {
123-
// timezone with ISO8601
124-
val inset = "+00.00".length
125-
val s0 = s.substring(0, s.length - inset)
126-
val s1 = s.substring(s.length - inset, s.length)
127-
if (s0.substring(s0.lastIndexOf(':')).contains('.')) {
128-
stringToTime(s0 + "GMT" + s1)
129-
} else {
130-
stringToTime(s0 + ".0GMT" + s1)
131-
}
132127
} else {
133-
// ISO8601 with GMT insert
134-
val ISO8601GMT: SimpleDateFormat = new SimpleDateFormat( "yyyy-MM-dd'T'HH:mm:ss.SSSz" )
135-
ISO8601GMT.parse(s)
128+
DatatypeConverter.parseDateTime(s).getTime()
136129
}
137130
}
138131

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,38 @@ class DateTimeUtilsSuite extends SparkFunSuite {
136136
assert(stringToDate(UTF8String.fromString("2015-031-8")).isEmpty)
137137
}
138138

139+
test("string to time") {
140+
// Tests with UTC.
141+
var c = Calendar.getInstance(TimeZone.getTimeZone("UTC"))
142+
c.set(Calendar.MILLISECOND, 0)
143+
144+
c.set(1900, 0, 1, 0, 0, 0)
145+
assert(stringToTime("1900-01-01T00:00:00GMT-00:00") === c.getTime())
146+
147+
c.set(2000, 11, 30, 10, 0, 0)
148+
assert(stringToTime("2000-12-30T10:00:00Z") === c.getTime())
149+
150+
// Tests with set time zone.
151+
c.setTimeZone(TimeZone.getTimeZone("GMT-04:00"))
152+
c.set(Calendar.MILLISECOND, 0)
153+
154+
c.set(1900, 0, 1, 0, 0, 0)
155+
assert(stringToTime("1900-01-01T00:00:00-04:00") === c.getTime())
156+
157+
c.set(1900, 0, 1, 0, 0, 0)
158+
assert(stringToTime("1900-01-01T00:00:00GMT-04:00") === c.getTime())
159+
160+
// Tests with local time zone.
161+
c.setTimeZone(TimeZone.getDefault())
162+
c.set(Calendar.MILLISECOND, 0)
163+
164+
c.set(2000, 11, 30, 0, 0, 0)
165+
assert(stringToTime("2000-12-30") === new Date(c.getTimeInMillis()))
166+
167+
c.set(2000, 11, 30, 10, 0, 0)
168+
assert(stringToTime("2000-12-30 10:00:00") === new Timestamp(c.getTimeInMillis()))
169+
}
170+
139171
test("string to timestamp") {
140172
var c = Calendar.getInstance()
141173
c.set(1969, 11, 31, 16, 0, 0)

0 commit comments

Comments
 (0)