-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-34856][SQL] ANSI mode: Allow casting complex types as string type #31954
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -686,6 +686,117 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { | |
| checkEvaluation(cast(value, DoubleType), Double.NaN) | ||
| } | ||
| } | ||
|
|
||
| test("SPARK-22825 Cast array to string") { | ||
| val ret1 = cast(Literal.create(Array(1, 2, 3, 4, 5)), StringType) | ||
| checkEvaluation(ret1, "[1, 2, 3, 4, 5]") | ||
| val ret2 = cast(Literal.create(Array("ab", "cde", "f")), StringType) | ||
| checkEvaluation(ret2, "[ab, cde, f]") | ||
| Seq(false, true).foreach { omitNull => | ||
| withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> omitNull.toString) { | ||
| val ret3 = cast(Literal.create(Array("ab", null, "c")), StringType) | ||
| checkEvaluation(ret3, s"[ab,${if (omitNull) "" else " null"}, c]") | ||
| } | ||
| } | ||
| val ret4 = | ||
| cast(Literal.create(Array("ab".getBytes, "cde".getBytes, "f".getBytes)), StringType) | ||
| checkEvaluation(ret4, "[ab, cde, f]") | ||
| val ret5 = cast( | ||
| Literal.create(Array("2014-12-03", "2014-12-04", "2014-12-06").map(Date.valueOf)), | ||
| StringType) | ||
| checkEvaluation(ret5, "[2014-12-03, 2014-12-04, 2014-12-06]") | ||
| val ret6 = cast( | ||
| Literal.create(Array("2014-12-03 13:01:00", "2014-12-04 15:05:00") | ||
| .map(Timestamp.valueOf)), | ||
| StringType) | ||
| checkEvaluation(ret6, "[2014-12-03 13:01:00, 2014-12-04 15:05:00]") | ||
| val ret7 = cast(Literal.create(Array(Array(1, 2, 3), Array(4, 5))), StringType) | ||
| checkEvaluation(ret7, "[[1, 2, 3], [4, 5]]") | ||
| val ret8 = cast( | ||
| Literal.create(Array(Array(Array("a"), Array("b", "c")), Array(Array("d")))), | ||
| StringType) | ||
| checkEvaluation(ret8, "[[[a], [b, c]], [[d]]]") | ||
| } | ||
|
|
||
| test("SPARK-33291: Cast array with null elements to string") { | ||
| Seq(false, true).foreach { omitNull => | ||
| withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> omitNull.toString) { | ||
| val ret1 = cast(Literal.create(Array(null, null)), StringType) | ||
| checkEvaluation( | ||
| ret1, | ||
| s"[${if (omitNull) "" else "null"},${if (omitNull) "" else " null"}]") | ||
| } | ||
| } | ||
| } | ||
|
|
||
| test("SPARK-22973 Cast map to string") { | ||
| Seq( | ||
| false -> ("{", "}"), | ||
| true -> ("[", "]")).foreach { case (legacyCast, (lb, rb)) => | ||
| withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> legacyCast.toString) { | ||
| val ret1 = cast(Literal.create(Map(1 -> "a", 2 -> "b", 3 -> "c")), StringType) | ||
| checkEvaluation(ret1, s"${lb}1 -> a, 2 -> b, 3 -> c$rb") | ||
| val ret2 = cast( | ||
| Literal.create(Map("1" -> "a".getBytes, "2" -> null, "3" -> "c".getBytes)), | ||
| StringType) | ||
| checkEvaluation(ret2, s"${lb}1 -> a, 2 ->${if (legacyCast) "" else " null"}, 3 -> c$rb") | ||
| val ret3 = cast( | ||
| Literal.create(Map( | ||
| 1 -> Date.valueOf("2014-12-03"), | ||
| 2 -> Date.valueOf("2014-12-04"), | ||
| 3 -> Date.valueOf("2014-12-05"))), | ||
| StringType) | ||
| checkEvaluation(ret3, s"${lb}1 -> 2014-12-03, 2 -> 2014-12-04, 3 -> 2014-12-05$rb") | ||
| val ret4 = cast( | ||
| Literal.create(Map( | ||
| 1 -> Timestamp.valueOf("2014-12-03 13:01:00"), | ||
| 2 -> Timestamp.valueOf("2014-12-04 15:05:00"))), | ||
| StringType) | ||
| checkEvaluation(ret4, s"${lb}1 -> 2014-12-03 13:01:00, 2 -> 2014-12-04 15:05:00$rb") | ||
| val ret5 = cast( | ||
| Literal.create(Map( | ||
| 1 -> Array(1, 2, 3), | ||
| 2 -> Array(4, 5, 6))), | ||
| StringType) | ||
| checkEvaluation(ret5, s"${lb}1 -> [1, 2, 3], 2 -> [4, 5, 6]$rb") | ||
| } | ||
| } | ||
| } | ||
|
|
||
| test("SPARK-22981 Cast struct to string") { | ||
| Seq( | ||
| false -> ("{", "}"), | ||
| true -> ("[", "]")).foreach { case (legacyCast, (lb, rb)) => | ||
| withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> legacyCast.toString) { | ||
| val ret1 = cast(Literal.create((1, "a", 0.1)), StringType) | ||
| checkEvaluation(ret1, s"${lb}1, a, 0.1$rb") | ||
| val ret2 = cast(Literal.create(Tuple3[Int, String, String](1, null, "a")), StringType) | ||
| checkEvaluation(ret2, s"${lb}1,${if (legacyCast) "" else " null"}, a$rb") | ||
| val ret3 = cast(Literal.create( | ||
| (Date.valueOf("2014-12-03"), Timestamp.valueOf("2014-12-03 15:05:00"))), StringType) | ||
| checkEvaluation(ret3, s"${lb}2014-12-03, 2014-12-03 15:05:00$rb") | ||
| val ret4 = cast(Literal.create(((1, "a"), 5, 0.1)), StringType) | ||
| checkEvaluation(ret4, s"$lb${lb}1, a$rb, 5, 0.1$rb") | ||
| val ret5 = cast(Literal.create((Seq(1, 2, 3), "a", 0.1)), StringType) | ||
| checkEvaluation(ret5, s"$lb[1, 2, 3], a, 0.1$rb") | ||
| val ret6 = cast(Literal.create((1, Map(1 -> "a", 2 -> "b", 3 -> "c"))), StringType) | ||
| checkEvaluation(ret6, s"${lb}1, ${lb}1 -> a, 2 -> b, 3 -> c$rb$rb") | ||
| } | ||
| } | ||
| } | ||
|
|
||
| test("SPARK-33291: Cast struct with null elements to string") { | ||
| Seq( | ||
| false -> ("{", "}"), | ||
| true -> ("[", "]")).foreach { case (legacyCast, (lb, rb)) => | ||
| withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> legacyCast.toString) { | ||
| val ret1 = cast(Literal.create(Tuple2[String, String](null, null)), StringType) | ||
| checkEvaluation( | ||
| ret1, | ||
| s"$lb${if (legacyCast) "" else "null"},${if (legacyCast) "" else " null"}$rb") | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| abstract class AnsiCastSuiteBase extends CastSuiteBase { | ||
|
|
@@ -851,12 +962,6 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase { | |
| assert(cast(booleanLiteral, DateType).checkInputDataTypes().isFailure) | ||
| } | ||
|
|
||
| test("ANSI mode: disallow casting complex types as String type") { | ||
|
||
| verifyCastFailure(cast(Literal.create(Array(1, 2, 3, 4, 5)), StringType)) | ||
| verifyCastFailure(cast(Literal.create(Map(1 -> "a")), StringType)) | ||
| verifyCastFailure(cast(Literal.create((1, "a", 0.1)), StringType)) | ||
| } | ||
|
|
||
| test("cast from invalid string to numeric should throw NumberFormatException") { | ||
| // cast to IntegerType | ||
| Seq(IntegerType, ShortType, ByteType, LongType).foreach { dataType => | ||
|
|
@@ -1569,117 +1674,6 @@ class CastSuite extends CastSuiteBase { | |
| checkEvaluation(cast("abcd", DecimalType(38, 1)), null) | ||
| } | ||
|
|
||
| test("SPARK-22825 Cast array to string") { | ||
| val ret1 = cast(Literal.create(Array(1, 2, 3, 4, 5)), StringType) | ||
| checkEvaluation(ret1, "[1, 2, 3, 4, 5]") | ||
| val ret2 = cast(Literal.create(Array("ab", "cde", "f")), StringType) | ||
| checkEvaluation(ret2, "[ab, cde, f]") | ||
| Seq(false, true).foreach { omitNull => | ||
| withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> omitNull.toString) { | ||
| val ret3 = cast(Literal.create(Array("ab", null, "c")), StringType) | ||
| checkEvaluation(ret3, s"[ab,${if (omitNull) "" else " null"}, c]") | ||
| } | ||
| } | ||
| val ret4 = | ||
| cast(Literal.create(Array("ab".getBytes, "cde".getBytes, "f".getBytes)), StringType) | ||
| checkEvaluation(ret4, "[ab, cde, f]") | ||
| val ret5 = cast( | ||
| Literal.create(Array("2014-12-03", "2014-12-04", "2014-12-06").map(Date.valueOf)), | ||
| StringType) | ||
| checkEvaluation(ret5, "[2014-12-03, 2014-12-04, 2014-12-06]") | ||
| val ret6 = cast( | ||
| Literal.create(Array("2014-12-03 13:01:00", "2014-12-04 15:05:00") | ||
| .map(Timestamp.valueOf)), | ||
| StringType) | ||
| checkEvaluation(ret6, "[2014-12-03 13:01:00, 2014-12-04 15:05:00]") | ||
| val ret7 = cast(Literal.create(Array(Array(1, 2, 3), Array(4, 5))), StringType) | ||
| checkEvaluation(ret7, "[[1, 2, 3], [4, 5]]") | ||
| val ret8 = cast( | ||
| Literal.create(Array(Array(Array("a"), Array("b", "c")), Array(Array("d")))), | ||
| StringType) | ||
| checkEvaluation(ret8, "[[[a], [b, c]], [[d]]]") | ||
| } | ||
|
|
||
| test("SPARK-33291: Cast array with null elements to string") { | ||
| Seq(false, true).foreach { omitNull => | ||
| withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> omitNull.toString) { | ||
| val ret1 = cast(Literal.create(Array(null, null)), StringType) | ||
| checkEvaluation( | ||
| ret1, | ||
| s"[${if (omitNull) "" else "null"},${if (omitNull) "" else " null"}]") | ||
| } | ||
| } | ||
| } | ||
|
|
||
| test("SPARK-22973 Cast map to string") { | ||
| Seq( | ||
| false -> ("{", "}"), | ||
| true -> ("[", "]")).foreach { case (legacyCast, (lb, rb)) => | ||
| withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> legacyCast.toString) { | ||
| val ret1 = cast(Literal.create(Map(1 -> "a", 2 -> "b", 3 -> "c")), StringType) | ||
| checkEvaluation(ret1, s"${lb}1 -> a, 2 -> b, 3 -> c$rb") | ||
| val ret2 = cast( | ||
| Literal.create(Map("1" -> "a".getBytes, "2" -> null, "3" -> "c".getBytes)), | ||
| StringType) | ||
| checkEvaluation(ret2, s"${lb}1 -> a, 2 ->${if (legacyCast) "" else " null"}, 3 -> c$rb") | ||
| val ret3 = cast( | ||
| Literal.create(Map( | ||
| 1 -> Date.valueOf("2014-12-03"), | ||
| 2 -> Date.valueOf("2014-12-04"), | ||
| 3 -> Date.valueOf("2014-12-05"))), | ||
| StringType) | ||
| checkEvaluation(ret3, s"${lb}1 -> 2014-12-03, 2 -> 2014-12-04, 3 -> 2014-12-05$rb") | ||
| val ret4 = cast( | ||
| Literal.create(Map( | ||
| 1 -> Timestamp.valueOf("2014-12-03 13:01:00"), | ||
| 2 -> Timestamp.valueOf("2014-12-04 15:05:00"))), | ||
| StringType) | ||
| checkEvaluation(ret4, s"${lb}1 -> 2014-12-03 13:01:00, 2 -> 2014-12-04 15:05:00$rb") | ||
| val ret5 = cast( | ||
| Literal.create(Map( | ||
| 1 -> Array(1, 2, 3), | ||
| 2 -> Array(4, 5, 6))), | ||
| StringType) | ||
| checkEvaluation(ret5, s"${lb}1 -> [1, 2, 3], 2 -> [4, 5, 6]$rb") | ||
| } | ||
| } | ||
| } | ||
|
|
||
| test("SPARK-22981 Cast struct to string") { | ||
| Seq( | ||
| false -> ("{", "}"), | ||
| true -> ("[", "]")).foreach { case (legacyCast, (lb, rb)) => | ||
| withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> legacyCast.toString) { | ||
| val ret1 = cast(Literal.create((1, "a", 0.1)), StringType) | ||
| checkEvaluation(ret1, s"${lb}1, a, 0.1$rb") | ||
| val ret2 = cast(Literal.create(Tuple3[Int, String, String](1, null, "a")), StringType) | ||
| checkEvaluation(ret2, s"${lb}1,${if (legacyCast) "" else " null"}, a$rb") | ||
| val ret3 = cast(Literal.create( | ||
| (Date.valueOf("2014-12-03"), Timestamp.valueOf("2014-12-03 15:05:00"))), StringType) | ||
| checkEvaluation(ret3, s"${lb}2014-12-03, 2014-12-03 15:05:00$rb") | ||
| val ret4 = cast(Literal.create(((1, "a"), 5, 0.1)), StringType) | ||
| checkEvaluation(ret4, s"$lb${lb}1, a$rb, 5, 0.1$rb") | ||
| val ret5 = cast(Literal.create((Seq(1, 2, 3), "a", 0.1)), StringType) | ||
| checkEvaluation(ret5, s"$lb[1, 2, 3], a, 0.1$rb") | ||
| val ret6 = cast(Literal.create((1, Map(1 -> "a", 2 -> "b", 3 -> "c"))), StringType) | ||
| checkEvaluation(ret6, s"${lb}1, ${lb}1 -> a, 2 -> b, 3 -> c$rb$rb") | ||
| } | ||
| } | ||
| } | ||
|
|
||
| test("SPARK-33291: Cast struct with null elements to string") { | ||
| Seq( | ||
| false -> ("{", "}"), | ||
| true -> ("[", "]")).foreach { case (legacyCast, (lb, rb)) => | ||
| withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> legacyCast.toString) { | ||
| val ret1 = cast(Literal.create(Tuple2[String, String](null, null)), StringType) | ||
| checkEvaluation( | ||
| ret1, | ||
| s"$lb${if (legacyCast) "" else "null"},${if (legacyCast) "" else " null"}$rb") | ||
| } | ||
| } | ||
| } | ||
|
|
||
| test("data type casting II") { | ||
| checkEvaluation( | ||
| cast(cast(cast(cast(cast(cast("5", ByteType), TimestampType), | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does this affect the coming year-month and day-time interval?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@cloud-fan . Usually, we do the explicitly allowed-list approach in case of types. Is this change okay?
If this PR aims for
complex typeonly, why don't we add them explicitly instead of doing this widely.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also, cc @MaxGekk
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
df.showneeds to cast the column to string, I think we need to support casting from all the data types here, otherwisedf.showmay still be broken under some cases.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So far, we don't support such casting. I opened the JIRAs for that: SPARK-34667 and SPARK-34668