diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index d03ca663e8e3..bfc0f72f3151 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -217,6 +217,8 @@ license: | For example `SELECT timestamp 'tomorrow';`. - Since Spark 3.0, the `size` function returns `NULL` for the `NULL` input. In Spark version 2.4 and earlier, this function gives `-1` for the same input. To restore the behavior before Spark 3.0, you can set `spark.sql.legacy.sizeOfNull` to `true`. + + - Since Spark 3.0, when `array` function is called without parameters, it returns an empty array with `NullType` data type. In Spark version 2.4 and earlier, the data type of the result is `StringType`. ## Upgrading from Spark SQL 2.4 to 2.4.1 diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala index 3f722e8537c3..8e00e32a719e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala @@ -47,7 +47,7 @@ case class CreateArray(children: Seq[Expression]) extends Expression { override def dataType: ArrayType = { ArrayType( TypeCoercion.findCommonTypeDifferentOnlyInNullFlags(children.map(_.dataType)) - .getOrElse(StringType), + .getOrElse(NullType), containsNull = children.exists(_.nullable)) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala index 06484908f5e7..1dc7d34b5227 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala @@ -3400,12 +3400,9 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession { ).foreach(assertValuesDoNotChangeAfterCoalesceOrUnion(_)) } - test("SPARK-21281 use string types by default if array and map have no argument") { + test("SPARK-21281 use string types by default if map have no argument") { val ds = spark.range(1) var expectedSchema = new StructType() - .add("x", ArrayType(StringType, containsNull = false), nullable = false) - assert(ds.select(array().as("x")).schema == expectedSchema) - expectedSchema = new StructType() .add("x", MapType(StringType, StringType, valueContainsNull = false), nullable = false) assert(ds.select(map().as("x")).schema == expectedSchema) } @@ -3463,6 +3460,13 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession { checkAnswer(df.select("x").filter("exists(i, x -> x % d == 0)"), Seq(Row(1))) } + + test("SPARK-29462: Use null type by default if array have no argument") { + val ds = spark.range(1) + var expectedSchema = new StructType() + .add("x", ArrayType(NullType, containsNull = false), nullable = false) + assert(ds.select(array().as("x")).schema == expectedSchema) + } } object DataFrameFunctionsSuite {