Skip to content

Commit f8f219f

Browse files
committed
revise according to Cheng Hao
1 parent 0e0a4f5 commit f8f219f

File tree

3 files changed

+46
-30
lines changed

3 files changed

+46
-30
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -220,36 +220,52 @@ trait HiveTypeCoercion {
220220
case a: BinaryArithmetic if a.right.dataType == StringType =>
221221
a.makeCopy(Array(a.left, Cast(a.right, DoubleType)))
222222

223+
// we should cast all timestamp/date/string compare into string compare,
224+
// even if both sides are of same type, as Hive use xxxwritable to compare.
223225
case p: BinaryPredicate if p.left.dataType == StringType
224226
&& p.right.dataType == DateType =>
225-
p.makeCopy(Array(Cast(p.left, DateType), p.right))
227+
p.makeCopy(Array(p.left, Cast(p.right, StringType)))
226228
case p: BinaryPredicate if p.left.dataType == DateType
227229
&& p.right.dataType == StringType =>
228-
p.makeCopy(Array(p.left, Cast(p.right, DateType)))
230+
p.makeCopy(Array(Cast(p.left, StringType), p.right))
229231
case p: BinaryPredicate if p.left.dataType == StringType
230232
&& p.right.dataType == TimestampType =>
231-
p.makeCopy(Array(Cast(p.left, TimestampType), p.right))
233+
p.makeCopy(Array(p.left, Cast(p.right, StringType)))
232234
case p: BinaryPredicate if p.left.dataType == TimestampType
233235
&& p.right.dataType == StringType =>
234-
p.makeCopy(Array(p.left, Cast(p.right, TimestampType)))
236+
p.makeCopy(Array(Cast(p.left, StringType), p.right))
235237
case p: BinaryPredicate if p.left.dataType == TimestampType
236238
&& p.right.dataType == DateType =>
237-
p.makeCopy(Array(Cast(p.left, DateType), p.right))
239+
p.makeCopy(Array(Cast(p.left, StringType), Cast(p.right, StringType)))
238240
case p: BinaryPredicate if p.left.dataType == DateType
239241
&& p.right.dataType == TimestampType =>
240-
p.makeCopy(Array(p.left, Cast(p.right, DateType)))
242+
p.makeCopy(Array(Cast(p.left, StringType), Cast(p.right, StringType)))
243+
// same type
244+
case p: BinaryPredicate if p.left.dataType == DateType
245+
&& p.right.dataType == DateType =>
246+
p.makeCopy(Array(Cast(p.left, StringType), Cast(p.right, StringType)))
247+
case p: BinaryPredicate if p.left.dataType == TimestampType
248+
&& p.right.dataType == TimestampType =>
249+
p.makeCopy(Array(Cast(p.left, StringType), Cast(p.right, StringType)))
241250

242251
case p: BinaryPredicate if p.left.dataType == StringType && p.right.dataType != StringType =>
243252
p.makeCopy(Array(Cast(p.left, DoubleType), p.right))
244253
case p: BinaryPredicate if p.left.dataType != StringType && p.right.dataType == StringType =>
245254
p.makeCopy(Array(p.left, Cast(p.right, DoubleType)))
246255

247256
case i @ In(a, b) if a.dataType == DateType && b.forall(_.dataType == StringType) =>
248-
i.makeCopy(Array(a, b.map(Cast(_, DateType))))
257+
i.makeCopy(Array(Cast(a, StringType), b))
249258
case i @ In(a, b) if a.dataType == TimestampType && b.forall(_.dataType == StringType) =>
250-
i.makeCopy(Array(a, b.map(Cast(_, TimestampType))))
259+
i.makeCopy(Array(Cast(a, StringType), b))
251260
case i @ In(a, b) if a.dataType == DateType && b.forall(_.dataType == TimestampType) =>
252-
i.makeCopy(Array(a, b.map(Cast(_, DateType))))
261+
i.makeCopy(Array(Cast(a, StringType), b.map(Cast(_, StringType))))
262+
case i @ In(a, b) if a.dataType == TimestampType && b.forall(_.dataType == DateType) =>
263+
i.makeCopy(Array(Cast(a, StringType), b.map(Cast(_, StringType))))
264+
case i @ In(a, b) if a.dataType == DateType && b.forall(_.dataType == DateType) =>
265+
i.makeCopy(Array(Cast(a, StringType), b.map(Cast(_, StringType))))
266+
case i @ In(a, b) if a.dataType == TimestampType && b.forall(_.dataType == TimestampType) =>
267+
i.makeCopy(Array(Cast(a, StringType), b.map(Cast(_, StringType))))
268+
253269

254270
case Sum(e) if e.dataType == StringType =>
255271
Sum(Cast(e, DoubleType))

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import java.sql.{Date, Timestamp}
2121
import java.text.{DateFormat, SimpleDateFormat}
2222

2323
import org.apache.spark.Logging
24+
import org.apache.spark.sql.catalyst.errors.TreeNodeException
2425
import org.apache.spark.sql.catalyst.types._
2526

2627
/** Cast the child expression to the target data type. */
@@ -101,7 +102,7 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
101102
case ByteType =>
102103
buildCast[Byte](_, b => new Timestamp(b))
103104
case DateType =>
104-
buildCast[Date](_, d => Timestamp.valueOf(dateToString(d) + " 00:00:00"))
105+
buildCast[Date](_, d => new Timestamp(d.getTime))
105106
// TimestampWritable.decimalToTimestamp
106107
case DecimalType =>
107108
buildCast[BigDecimal](_, d => decimalToTimestamp(d))
@@ -154,15 +155,16 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
154155
// DateConverter
155156
private[this] def castToDate: Any => Any = child.dataType match {
156157
case StringType =>
157-
buildCast[String](_, s => if (s.contains(" ")) {
158-
try castToDate(castToTimestamp(s))
159-
catch { case _: java.lang.IllegalArgumentException => null }
160-
} else {
158+
buildCast[String](_, s =>
161159
try Date.valueOf(s) catch { case _: java.lang.IllegalArgumentException => null }
162-
})
160+
)
163161
case TimestampType =>
164-
buildCast[Timestamp](_, t => Date.valueOf(timestampToDateString(t)))
165-
// TimestampWritable.decimalToDate
162+
// throw valid precision more than seconds, according to Hive.
163+
// Timestamp.nanos is in 0 to 999,999,999, no more than a second.
164+
buildCast[Timestamp](_, t => new Date(Math.floor(t.getTime / 1000.0).toInt * 1000))
165+
// Hive throws this exception as a Semantic Exception
166+
// It is never possible to compare result when hive return with exception, so we can return null
167+
// NULL is more reasonable here, since the query itself obeys the grammar.
166168
case _ => _ => null
167169
}
168170

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -254,12 +254,12 @@ class ExpressionEvaluationSuite extends FunSuite {
254254

255255
val sd = "1970-01-01"
256256
val d = Date.valueOf(sd)
257-
val sts = sd + " 00:00:01.1"
258-
val ts = Timestamp.valueOf(sts)
257+
val sts = sd + " 00:00:02"
258+
val nts = sts + ".1"
259+
val ts = Timestamp.valueOf(nts)
259260

260261
checkEvaluation("abdef" cast StringType, "abdef")
261262
checkEvaluation("abdef" cast DecimalType, null)
262-
checkEvaluation("abdef" cast DateType, null)
263263
checkEvaluation("abdef" cast TimestampType, null)
264264
checkEvaluation("12.65" cast DecimalType, BigDecimal(12.65))
265265

@@ -271,19 +271,18 @@ class ExpressionEvaluationSuite extends FunSuite {
271271

272272
checkEvaluation(Cast(Literal(sd) cast DateType, StringType), sd)
273273
checkEvaluation(Cast(Literal(d) cast StringType, DateType), d)
274-
checkEvaluation(Cast(Literal(sts) cast TimestampType, StringType), sts)
274+
checkEvaluation(Cast(Literal(nts) cast TimestampType, StringType), nts)
275275
checkEvaluation(Cast(Literal(ts) cast StringType, TimestampType), ts)
276+
// all convert to string type to check
277+
checkEvaluation(
278+
Cast(Cast(Literal(nts) cast TimestampType, DateType), StringType), sd)
279+
checkEvaluation(
280+
Cast(Cast(Literal(ts) cast DateType, TimestampType), StringType), sts)
276281

277282
checkEvaluation(Cast("abdef" cast BinaryType, StringType), "abdef")
278283

279284
checkEvaluation(Cast(Cast(Cast(Cast(
280285
Cast("5" cast ByteType, ShortType), IntegerType), FloatType), DoubleType), LongType), 5)
281-
checkEvaluation(Cast(Cast(Cast(Cast(
282-
Cast("5" cast ByteType, DateType), DecimalType), LongType), StringType), ShortType), null)
283-
checkEvaluation(Cast(Cast(Cast(Cast(
284-
Cast("5" cast DateType, ByteType), DecimalType), LongType), StringType), ShortType), null)
285-
checkEvaluation(Cast(Cast(Cast(Cast(
286-
Cast("5" cast DecimalType, ByteType), DateType), LongType), StringType), ShortType), null)
287286
checkEvaluation(Cast(Cast(Cast(Cast(
288287
Cast("5" cast ByteType, TimestampType), DecimalType), LongType), StringType), ShortType), 5)
289288
checkEvaluation(Cast(Cast(Cast(Cast(
@@ -315,7 +314,6 @@ class ExpressionEvaluationSuite extends FunSuite {
315314
assert(("abcdef" cast StringType).nullable === false)
316315
assert(("abcdef" cast BinaryType).nullable === false)
317316
assert(("abcdef" cast BooleanType).nullable === false)
318-
assert(("abcdef" cast DateType).nullable === true)
319317
assert(("abcdef" cast TimestampType).nullable === true)
320318
assert(("abcdef" cast LongType).nullable === true)
321319
assert(("abcdef" cast IntegerType).nullable === true)
@@ -329,8 +327,8 @@ class ExpressionEvaluationSuite extends FunSuite {
329327
}
330328

331329
test("date") {
332-
val d1 = new Date(12)
333-
val d2 = new Date(123)
330+
val d1 = Date.valueOf("1970-01-01")
331+
val d2 = Date.valueOf("1970-01-02")
334332
checkEvaluation(Literal(d1) < Literal(d2), true)
335333
}
336334

0 commit comments

Comments
 (0)