Skip to content

Commit f27e56a

Browse files
committed
Change timestamp cast semantics. When cast to numeric types, return the unix time in seconds (instead of millis).
@marmbrus @chenghao-intel Author: Reynold Xin <[email protected]> Closes #352 from rxin/timestamp-cast and squashes the following commits: 18aacd3 [Reynold Xin] Fixed precision for double. 2adb235 [Reynold Xin] Change timestamp cast semantics. When cast to numeric types, return the unix time in seconds (instead of millis).
1 parent 31e6fff commit f27e56a

File tree

3 files changed

+40
-12
lines changed

3 files changed

+40
-12
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ package object dsl {
104104
implicit class DslSymbol(sym: Symbol) extends ImplicitAttribute { def s = sym.name }
105105
// TODO more implicit class for literal?
106106
implicit class DslString(val s: String) extends ImplicitOperators {
107-
def expr: Expression = Literal(s)
107+
override def expr: Expression = Literal(s)
108108
def attr = analysis.UnresolvedAttribute(s)
109109
}
110110

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression {
8787

8888
private def decimalToTimestamp(d: BigDecimal) = {
8989
val seconds = d.longValue()
90-
val bd = (d - seconds) * (1000000000)
90+
val bd = (d - seconds) * 1000000000
9191
val nanos = bd.intValue()
9292

9393
// Convert to millis
@@ -96,18 +96,23 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression {
9696

9797
// remaining fractional portion as nanos
9898
t.setNanos(nanos)
99-
10099
t
101100
}
102101

103-
private def timestampToDouble(t: Timestamp) = (t.getSeconds() + t.getNanos().toDouble / 1000)
102+
// Timestamp to long, converting milliseconds to seconds
103+
private def timestampToLong(ts: Timestamp) = ts.getTime / 1000
104+
105+
private def timestampToDouble(ts: Timestamp) = {
106+
// First part is the seconds since the beginning of time, followed by nanosecs.
107+
ts.getTime / 1000 + ts.getNanos.toDouble / 1000000000
108+
}
104109

105110
def castToLong: Any => Any = child.dataType match {
106111
case StringType => nullOrCast[String](_, s => try s.toLong catch {
107112
case _: NumberFormatException => null
108113
})
109114
case BooleanType => nullOrCast[Boolean](_, b => if(b) 1 else 0)
110-
case TimestampType => nullOrCast[Timestamp](_, t => timestampToDouble(t).toLong)
115+
case TimestampType => nullOrCast[Timestamp](_, t => timestampToLong(t))
111116
case DecimalType => nullOrCast[BigDecimal](_, _.toLong)
112117
case x: NumericType => b => x.numeric.asInstanceOf[Numeric[Any]].toLong(b)
113118
}
@@ -117,7 +122,7 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression {
117122
case _: NumberFormatException => null
118123
})
119124
case BooleanType => nullOrCast[Boolean](_, b => if(b) 1 else 0)
120-
case TimestampType => nullOrCast[Timestamp](_, t => timestampToDouble(t).toInt)
125+
case TimestampType => nullOrCast[Timestamp](_, t => timestampToLong(t).toInt)
121126
case DecimalType => nullOrCast[BigDecimal](_, _.toInt)
122127
case x: NumericType => b => x.numeric.asInstanceOf[Numeric[Any]].toInt(b)
123128
}
@@ -127,7 +132,7 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression {
127132
case _: NumberFormatException => null
128133
})
129134
case BooleanType => nullOrCast[Boolean](_, b => if(b) 1 else 0)
130-
case TimestampType => nullOrCast[Timestamp](_, t => timestampToDouble(t).toShort)
135+
case TimestampType => nullOrCast[Timestamp](_, t => timestampToLong(t).toShort)
131136
case DecimalType => nullOrCast[BigDecimal](_, _.toShort)
132137
case x: NumericType => b => x.numeric.asInstanceOf[Numeric[Any]].toInt(b).toShort
133138
}
@@ -137,7 +142,7 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression {
137142
case _: NumberFormatException => null
138143
})
139144
case BooleanType => nullOrCast[Boolean](_, b => if(b) 1 else 0)
140-
case TimestampType => nullOrCast[Timestamp](_, t => timestampToDouble(t).toByte)
145+
case TimestampType => nullOrCast[Timestamp](_, t => timestampToLong(t).toByte)
141146
case DecimalType => nullOrCast[BigDecimal](_, _.toByte)
142147
case x: NumericType => b => x.numeric.asInstanceOf[Numeric[Any]].toInt(b).toByte
143148
}
@@ -147,7 +152,9 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression {
147152
case _: NumberFormatException => null
148153
})
149154
case BooleanType => nullOrCast[Boolean](_, b => if(b) BigDecimal(1) else BigDecimal(0))
150-
case TimestampType => nullOrCast[Timestamp](_, t => BigDecimal(timestampToDouble(t)))
155+
case TimestampType =>
156+
// Note that we lose precision here.
157+
nullOrCast[Timestamp](_, t => BigDecimal(timestampToDouble(t)))
151158
case x: NumericType => b => BigDecimal(x.numeric.asInstanceOf[Numeric[Any]].toDouble(b))
152159
}
153160

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -201,15 +201,14 @@ class ExpressionEvaluationSuite extends FunSuite {
201201

202202
val sts = "1970-01-01 00:00:01.0"
203203
val ts = Timestamp.valueOf(sts)
204-
204+
205205
checkEvaluation("abdef" cast StringType, "abdef")
206206
checkEvaluation("abdef" cast DecimalType, null)
207207
checkEvaluation("abdef" cast TimestampType, null)
208208
checkEvaluation("12.65" cast DecimalType, BigDecimal(12.65))
209209

210210
checkEvaluation(Literal(1) cast LongType, 1)
211211
checkEvaluation(Cast(Literal(1) cast TimestampType, LongType), 1)
212-
checkEvaluation(Cast(Literal(BigDecimal(1)) cast TimestampType, DecimalType), 1)
213212
checkEvaluation(Cast(Literal(1.toDouble) cast TimestampType, DoubleType), 1.toDouble)
214213

215214
checkEvaluation(Cast(Literal(sts) cast TimestampType, StringType), sts)
@@ -240,12 +239,34 @@ class ExpressionEvaluationSuite extends FunSuite {
240239

241240
intercept[Exception] {evaluate(Literal(1) cast BinaryType, null)}
242241
}
243-
242+
244243
test("timestamp") {
245244
val ts1 = new Timestamp(12)
246245
val ts2 = new Timestamp(123)
247246
checkEvaluation(Literal("ab") < Literal("abc"), true)
248247
checkEvaluation(Literal(ts1) < Literal(ts2), true)
249248
}
249+
250+
test("timestamp casting") {
251+
val millis = 15 * 1000 + 2
252+
val ts = new Timestamp(millis)
253+
val ts1 = new Timestamp(15 * 1000) // a timestamp without the milliseconds part
254+
checkEvaluation(Cast(ts, ShortType), 15)
255+
checkEvaluation(Cast(ts, IntegerType), 15)
256+
checkEvaluation(Cast(ts, LongType), 15)
257+
checkEvaluation(Cast(ts, FloatType), 15.002f)
258+
checkEvaluation(Cast(ts, DoubleType), 15.002)
259+
checkEvaluation(Cast(Cast(ts, ShortType), TimestampType), ts1)
260+
checkEvaluation(Cast(Cast(ts, IntegerType), TimestampType), ts1)
261+
checkEvaluation(Cast(Cast(ts, LongType), TimestampType), ts1)
262+
checkEvaluation(Cast(Cast(millis.toFloat / 1000, TimestampType), FloatType),
263+
millis.toFloat / 1000)
264+
checkEvaluation(Cast(Cast(millis.toDouble / 1000, TimestampType), DoubleType),
265+
millis.toDouble / 1000)
266+
checkEvaluation(Cast(Literal(BigDecimal(1)) cast TimestampType, DecimalType), 1)
267+
268+
// A test for higher precision than millis
269+
checkEvaluation(Cast(Cast(0.00000001, TimestampType), DoubleType), 0.00000001)
270+
}
250271
}
251272

0 commit comments

Comments
 (0)