From 826570f29cf1ab81f283d412a1d2b65203aa3470 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Wed, 24 Jul 2019 16:02:52 +0800 Subject: [PATCH 1/4] Disallow upcasting complex data types to string type --- .../org/apache/spark/sql/catalyst/expressions/Cast.scala | 3 ++- .../apache/spark/sql/catalyst/expressions/CastSuite.scala | 5 +++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index 969128838eba4..85bad74850dc1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -131,7 +131,8 @@ object Cast { case (from: DecimalType, to: NumericType) if from.isTighterThan(to) => true case (f, t) if legalNumericPrecedence(f, t) => true case (DateType, TimestampType) => true - case (_, StringType) => true + case (_: AtomicType, StringType) => true + case (_: CalendarIntervalType, StringType) => true // Spark supports casting between long and timestamp, please see `longToTimestamp` and // `timestampToLong` for details. diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index 4d667fd61ae01..400259bcfaa78 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -1002,6 +1002,11 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper { } } } + numericTypes.foreach { dt => + makeComplexTypes(dt, true).foreach { complexType => + assert(!Cast.canUpCast(dt, StringType)) + } + } } test("SPARK-27671: cast from nested null type in struct") { From ec517a510c905c24f88b2925ab020267152ced45 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Wed, 24 Jul 2019 23:26:00 +0800 Subject: [PATCH 2/4] add more test cases --- .../encoders/EncoderResolutionSuite.scala | 37 +++++++++++++++++++ .../spark/sql/UserDefinedTypeSuite.scala | 5 +++ 2 files changed, 42 insertions(+) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala index da1b695919dec..259b512c0a1e9 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala @@ -196,6 +196,43 @@ class EncoderResolutionSuite extends PlanTest { encoder.resolveAndBind(attrs) } + test("SPARK-28497: complex type is not compatible with string encoder schema") { + val encoder = ExpressionEncoder[String] + + { + val attrs = Seq('a.struct('x.long)) + assert(intercept[AnalysisException](encoder.resolveAndBind(attrs)).message == + s""" + |Cannot up cast `a` from struct to string. + |The type path of the target object is: + |- root class: "java.lang.String" + |You can either add an explicit cast to the input data or choose a higher precision type + """.stripMargin.trim + " of the field in the target object") + } + + { + val attrs = Seq('a.array(StringType)) + assert(intercept[AnalysisException](encoder.resolveAndBind(attrs)).message == + s""" + |Cannot up cast `a` from array to string. + |The type path of the target object is: + |- root class: "java.lang.String" + |You can either add an explicit cast to the input data or choose a higher precision type + """.stripMargin.trim + " of the field in the target object") + } + + { + val attrs = Seq('a.map(StringType, StringType)) + assert(intercept[AnalysisException](encoder.resolveAndBind(attrs)).message == + s""" + |Cannot up cast `a` from map to string. + |The type path of the target object is: + |- root class: "java.lang.String" + |You can either add an explicit cast to the input data or choose a higher precision type + """.stripMargin.trim + " of the field in the target object") + } + } + test("throw exception if real type is not compatible with encoder schema") { val msg1 = intercept[AnalysisException] { ExpressionEncoder[StringIntClass].resolveAndBind(Seq('a.string, 'b.long)) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala index 6628d36ffc702..49f0000212554 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala @@ -272,4 +272,9 @@ class UserDefinedTypeSuite extends QueryTest with SharedSQLContext with ParquetT val ret = Cast(Literal(data, udt), StringType, None) checkEvaluation(ret, "(1.0, 3.0, 5.0, 7.0, 9.0)") } + + test("SPARK-28497 Can't up cast UserDefinedType to string") { + val udt = new TestUDT.MyDenseVectorUDT() + assert(!Cast.canUpCast(udt, StringType)) + } } From f4fa932dbfa2f5b39f61477905626a7cef627672 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Wed, 24 Jul 2019 23:27:19 +0800 Subject: [PATCH 3/4] fix --- .../org/apache/spark/sql/catalyst/expressions/CastSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index 400259bcfaa78..44825c79781d9 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -1004,7 +1004,7 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper { } numericTypes.foreach { dt => makeComplexTypes(dt, true).foreach { complexType => - assert(!Cast.canUpCast(dt, StringType)) + assert(!Cast.canUpCast(complexType, StringType)) } } } From 6c590a0ea0a83bf39825e345341fc8815ed32f3f Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Thu, 25 Jul 2019 15:03:05 +0800 Subject: [PATCH 4/4] update test case --- .../encoders/EncoderResolutionSuite.scala | 28 ++----------------- 1 file changed, 3 insertions(+), 25 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala index 259b512c0a1e9..53cb8bce0a52d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala @@ -199,33 +199,11 @@ class EncoderResolutionSuite extends PlanTest { test("SPARK-28497: complex type is not compatible with string encoder schema") { val encoder = ExpressionEncoder[String] - { - val attrs = Seq('a.struct('x.long)) - assert(intercept[AnalysisException](encoder.resolveAndBind(attrs)).message == - s""" - |Cannot up cast `a` from struct to string. - |The type path of the target object is: - |- root class: "java.lang.String" - |You can either add an explicit cast to the input data or choose a higher precision type - """.stripMargin.trim + " of the field in the target object") - } - - { - val attrs = Seq('a.array(StringType)) - assert(intercept[AnalysisException](encoder.resolveAndBind(attrs)).message == - s""" - |Cannot up cast `a` from array to string. - |The type path of the target object is: - |- root class: "java.lang.String" - |You can either add an explicit cast to the input data or choose a higher precision type - """.stripMargin.trim + " of the field in the target object") - } - - { - val attrs = Seq('a.map(StringType, StringType)) + Seq('a.struct('x.long), 'a.array(StringType), 'a.map(StringType, StringType)).foreach { attr => + val attrs = Seq(attr) assert(intercept[AnalysisException](encoder.resolveAndBind(attrs)).message == s""" - |Cannot up cast `a` from map to string. + |Cannot up cast `a` from ${attr.dataType.catalogString} to string. |The type path of the target object is: |- root class: "java.lang.String" |You can either add an explicit cast to the input data or choose a higher precision type