diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index 3de9f5a951fe..4c5419961ee9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -620,6 +620,12 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String // We can return what the children return. Same thing should happen in the codegen path. if (DataType.equalsStructurally(from, to)) { identity + } else if (from == NullType) { + // According to `canCast`, NullType can be casted to any type. + // For primitive types, we don't reach here because the guard of `nullSafeEval`. + // But for nested types like struct, we might reach here for nested null type field. + // We won't call the returned function actually, but returns a placeholder. + _ => throw new SparkException(s"should not directly cast from NullType to $to.") } else { to match { case dt if dt == from => identity[Any] diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index 1b7f2581f895..f6a1d00c519c 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -990,4 +990,19 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper { } } } + + test("SPARK-27671: cast from nested null type in struct") { + import DataTypeTestUtils._ + + atomicTypes.foreach { atomicType => + val struct = Literal.create( + InternalRow(null), + StructType(Seq(StructField("a", NullType, nullable = true)))) + + val ret = cast(struct, StructType(Seq( + StructField("a", atomicType, nullable = true)))) + assert(ret.resolved) + checkEvaluation(ret, InternalRow(null)) + } + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index 8c859f7dba5e..dd3740d24f72 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -2157,4 +2157,13 @@ class DataFrameSuite extends QueryTest with SharedSQLContext { |*(1) Range (0, 10, step=1, splits=2)""".stripMargin)) } } + + test("SPARK-27671: Fix analysis exception when casting null in nested field in struct") { + val df = sql("SELECT * FROM VALUES (('a', (10, null))), (('b', (10, 50))), " + + "(('c', null)) AS tab(x, y)") + checkAnswer(df, Row("a", Row(10, null)) :: Row("b", Row(10, 50)) :: Row("c", null) :: Nil) + + val cast = sql("SELECT cast(struct(1, null) AS struct)") + checkAnswer(cast, Row(Row(1, null)) :: Nil) + } }