Skip to content

Commit 01c9ff3

Browse files
committed
[SPARK-8995] support for time strings
1 parent 34ec573 commit 01c9ff3

File tree

2 files changed

+44
-5
lines changed

2 files changed

+44
-5
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,14 @@ object DateTimeUtils {
198198
* `yyyy-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]Z`
199199
* `yyyy-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]-[h]h:[m]m`
200200
* `yyyy-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]+[h]h:[m]m`
201+
* `[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]`
202+
* `[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]Z`
203+
* `[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]-[h]h:[m]m`
204+
* `[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]+[h]h:[m]m`
205+
* `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]`
206+
* `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]Z`
207+
* `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]-[h]h:[m]m`
208+
* `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]+[h]h:[m]m`
201209
*/
202210
def stringToTimestamp(s: UTF8String): Long = {
203211
if (s == null) {
@@ -210,15 +218,24 @@ object DateTimeUtils {
210218
val bytes = s.getBytes
211219
var j = 0
212220
var digitsMilli = 0
221+
var justTime = false
213222
while (j < bytes.length) {
214223
val b = bytes(j)
215224
val parsedValue = b - '0'.toByte
216225
if (parsedValue < 0 || parsedValue > 9) {
217-
if (i < 2) {
226+
if (j == 0 && b == 'T') {
227+
justTime = true
228+
i += 3
229+
} else if (i < 2) {
218230
if (b == '-') {
219231
segments(i) = currentSegmentValue
220232
currentSegmentValue = 0
221233
i += 1
234+
} else if (i == 0 && b == ':') {
235+
justTime = true
236+
segments(3) = currentSegmentValue
237+
currentSegmentValue = 0
238+
i = 4
222239
} else {
223240
return null.asInstanceOf[Long]
224241
}
@@ -287,7 +304,7 @@ object DateTimeUtils {
287304
if (segments(0) < 0 || segments(0) > 9999 || segments(1) < 1 || segments(1) > 12 ||
288305
segments(2) < 1 || segments(2) > 31 || segments(3) < 0 || segments(3) > 23 ||
289306
segments(4) < 0 || segments(4) > 59 || segments(5) < 0 || segments(5) > 59 ||
290-
segments(6) < 0 || segments(6) > 999999 || segments(7) < 0 || segments(7) > 14 ||
307+
segments(6) < 0 || segments(6) > 999999 || segments(7) < 0 || segments(7) > 23 ||
291308
segments(8) < 0 || segments(8) > 59) {
292309
return null.asInstanceOf[Long]
293310
}
@@ -297,14 +314,20 @@ object DateTimeUtils {
297314
Calendar.getInstance(
298315
TimeZone.getTimeZone(f"GMT${timeZone.get.toChar}${segments(7)}%02d:${segments(8)}%02d"))
299316
}
300-
c.set(segments(0), segments(1) - 1, segments(2), segments(3), segments(4), segments(5))
317+
if (justTime) {
318+
c.set(Calendar.HOUR, segments(3))
319+
c.set(Calendar.MINUTE, segments(4))
320+
c.set(Calendar.SECOND, segments(5))
321+
} else {
322+
c.set(segments(0), segments(1) - 1, segments(2), segments(3), segments(4), segments(5))
323+
}
301324
c.set(Calendar.MILLISECOND, segments(6) / 1000)
302325
c.getTimeInMillis * 1000 + segments(6) % 1000
303326
}
304327

305328
/**
306-
* Parses a given UTF8 date string to the corresponding [[Date]] object. The format of the date
307-
* has to be one of the following:
329+
* Parses a given UTF8 date string to the corresponding number of days since 1.1.1970.
330+
* The format of the date has to be one of the following:
308331
* `yyyy`,
309332
* `yyyy-[m]m`
310333
* `yyyy-[m]m-[d]d`

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,22 @@ class DateTimeUtilsSuite extends SparkFunSuite {
246246
assert(DateTimeUtils.stringToTimestamp(
247247
UTF8String.fromString("2015-03-18T12:03:17.12312+7:30")) == c.getTimeInMillis * 1000 + 120)
248248

249+
c = Calendar.getInstance(TimeZone.getTimeZone("GMT+07:30"))
250+
c.set(Calendar.HOUR, 18)
251+
c.set(Calendar.MINUTE, 12)
252+
c.set(Calendar.SECOND, 15)
253+
c.set(Calendar.MILLISECOND, 123)
254+
assert(DateTimeUtils.stringToTimestamp(
255+
UTF8String.fromString("T18:12:15.12312+7:30")) == c.getTimeInMillis * 1000 + 120)
256+
257+
c = Calendar.getInstance(TimeZone.getTimeZone("GMT+07:30"))
258+
c.set(Calendar.HOUR, 18)
259+
c.set(Calendar.MINUTE, 12)
260+
c.set(Calendar.SECOND, 15)
261+
c.set(Calendar.MILLISECOND, 123)
262+
assert(DateTimeUtils.stringToTimestamp(
263+
UTF8String.fromString("18:12:15.12312+7:30")) == c.getTimeInMillis * 1000 + 120)
264+
249265
c = Calendar.getInstance()
250266
c.set(2011, 4, 6, 7, 8, 9)
251267
c.set(Calendar.MILLISECOND, 100)

0 commit comments

Comments
 (0)