diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala index 1a3a7207c6ca9..a6d67f4683c15 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala @@ -356,7 +356,7 @@ private[sql] object CatalogV2Util { } if (containsNullType(dt)) { throw new AnalysisException( - s"Cannot create tables with ${NullType.simpleString} type.") + s"Cannot create tables/views with ${NullType.simpleString} type.") } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index bc3f38a35834d..3701cf42914f5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -529,6 +529,9 @@ class ResolveSessionCatalog( partitionSpec) case AlterViewAsStatement(name, originalText, query) => + if (query.resolved) { + assertNoNullTypeInSchema(query.schema) + } val viewName = parseTempViewOrV1Table(name, "ALTER VIEW QUERY") AlterViewAsCommand( viewName.asTableIdentifier, @@ -538,7 +541,9 @@ class ResolveSessionCatalog( case CreateViewStatement( tbl, userSpecifiedColumns, comment, properties, originalText, child, allowExisting, replace, viewType) => - + if (child.resolved) { + assertNoNullTypeInSchema(child.schema) + } val v1TableName = if (viewType != PersistedView) { // temp view doesn't belong to any catalog and we shouldn't resolve catalog in the name. tbl diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala index 23f1d6c983413..6586a22b90396 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala @@ -27,6 +27,7 @@ import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeRef import org.apache.spark.sql.catalyst.plans.QueryPlan import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, View} import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.NamespaceHelper +import org.apache.spark.sql.connector.catalog.CatalogV2Util.assertNoNullTypeInSchema import org.apache.spark.sql.internal.StaticSQLConf import org.apache.spark.sql.types.{BooleanType, MetadataBuilder, StringType} import org.apache.spark.sql.util.SchemaUtils @@ -96,6 +97,7 @@ case class CreateViewCommand( qe.assertAnalyzed() val analyzedPlan = qe.analyzed + assertNoNullTypeInSchema(analyzedPlan.schema) if (userSpecifiedColumns.nonEmpty && userSpecifiedColumns.length != analyzedPlan.output.length) { throw new AnalysisException(s"The number of columns produced by the SELECT clause " + diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part2.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part2.sql index a8af1db77563c..c79c31921ce14 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part2.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part2.sql @@ -115,9 +115,9 @@ SELECT -- [SPARK-27880] Implement boolean aggregates(BOOL_AND, BOOL_OR and EVERY) CREATE OR REPLACE TEMPORARY VIEW bool_test AS SELECT * FROM VALUES - (TRUE, null, FALSE, null), - (FALSE, TRUE, null, null), - (null, TRUE, FALSE, null) AS bool_test(b1, b2, b3, b4); + (TRUE, null, FALSE), + (FALSE, TRUE, null), + (null, TRUE, FALSE) AS bool_test(b1, b2, b3); -- empty case SELECT BOOL_AND(b1) AS n1, BOOL_OR(b3) AS n2 FROM bool_test WHERE 1 = 0; @@ -126,7 +126,6 @@ SELECT BOOL_AND(b1) AS f1, BOOL_AND(b2) AS t2, BOOL_AND(b3) AS f3, - BOOL_AND(b4) AS n4, BOOL_AND(NOT b2) AS f5, BOOL_AND(NOT b3) AS t6 FROM bool_test; @@ -135,7 +134,6 @@ SELECT EVERY(b1) AS f1, EVERY(b2) AS t2, EVERY(b3) AS f3, - EVERY(b4) AS n4, EVERY(NOT b2) AS f5, EVERY(NOT b3) AS t6 FROM bool_test; @@ -144,7 +142,6 @@ SELECT BOOL_OR(b1) AS t1, BOOL_OR(b2) AS t2, BOOL_OR(b3) AS f3, - BOOL_OR(b4) AS n4, BOOL_OR(NOT b2) AS f5, BOOL_OR(NOT b3) AS t6 FROM bool_test; diff --git a/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-aggregates_part2.sql b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-aggregates_part2.sql index b4054850062b7..936234fdddb02 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-aggregates_part2.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-aggregates_part2.sql @@ -103,9 +103,9 @@ SELECT -- [SPARK-27880] Implement boolean aggregates(BOOL_AND, BOOL_OR and EVERY) CREATE OR REPLACE TEMPORARY VIEW bool_test AS SELECT * FROM VALUES - (TRUE, null, FALSE, null), - (FALSE, TRUE, null, null), - (null, TRUE, FALSE, null) AS bool_test(b1, b2, b3, b4); + (TRUE, null, FALSE), + (FALSE, TRUE, null), + (null, TRUE, FALSE) AS bool_test(b1, b2, b3); -- empty case SELECT BOOL_AND(b1) AS n1, BOOL_OR(b3) AS n2 FROM bool_test WHERE 1 = 0; diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part2.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part2.sql.out index 6633bf5d114ed..ff401f111a879 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part2.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part2.sql.out @@ -135,9 +135,9 @@ true false true false true true true true true -- !query CREATE OR REPLACE TEMPORARY VIEW bool_test AS SELECT * FROM VALUES - (TRUE, null, FALSE, null), - (FALSE, TRUE, null, null), - (null, TRUE, FALSE, null) AS bool_test(b1, b2, b3, b4) + (TRUE, null, FALSE), + (FALSE, TRUE, null), + (null, TRUE, FALSE) AS bool_test(b1, b2, b3, b4) -- !query schema struct<> -- !query output @@ -157,14 +157,13 @@ SELECT BOOL_AND(b1) AS f1, BOOL_AND(b2) AS t2, BOOL_AND(b3) AS f3, - BOOL_AND(b4) AS n4, BOOL_AND(NOT b2) AS f5, BOOL_AND(NOT b3) AS t6 FROM bool_test -- !query schema struct -- !query output -false true false NULL false true +false true false false true -- !query @@ -172,14 +171,13 @@ SELECT EVERY(b1) AS f1, EVERY(b2) AS t2, EVERY(b3) AS f3, - EVERY(b4) AS n4, EVERY(NOT b2) AS f5, EVERY(NOT b3) AS t6 FROM bool_test -- !query schema struct -- !query output -false true false NULL false true +false true false false true -- !query @@ -187,14 +185,13 @@ SELECT BOOL_OR(b1) AS t1, BOOL_OR(b2) AS t2, BOOL_OR(b3) AS f3, - BOOL_OR(b4) AS n4, BOOL_OR(NOT b2) AS f5, BOOL_OR(NOT b3) AS t6 FROM bool_test -- !query schema struct -- !query output -true true false NULL false true +true true false false true -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part2.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part2.sql.out index d4941d0a0b768..ebf95b03fe892 100644 --- a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part2.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part2.sql.out @@ -96,9 +96,9 @@ true false true false true true true true true -- !query CREATE OR REPLACE TEMPORARY VIEW bool_test AS SELECT * FROM VALUES - (TRUE, null, FALSE, null), - (FALSE, TRUE, null, null), - (null, TRUE, FALSE, null) AS bool_test(b1, b2, b3, b4) + (TRUE, null, FALSE), + (FALSE, TRUE, null), + (null, TRUE, FALSE) AS bool_test(b1, b2, b3) -- !query schema struct<> -- !query output @@ -118,14 +118,13 @@ SELECT BOOL_AND(b1) AS f1, BOOL_AND(b2) AS t2, BOOL_AND(b3) AS f3, - BOOL_AND(b4) AS n4, BOOL_AND(NOT b2) AS f5, BOOL_AND(NOT b3) AS t6 FROM bool_test -- !query schema struct -- !query output -false true false NULL false true +false true false false true -- !query @@ -133,14 +132,13 @@ SELECT EVERY(b1) AS f1, EVERY(b2) AS t2, EVERY(b3) AS f3, - EVERY(b4) AS n4, EVERY(NOT b2) AS f5, EVERY(NOT b3) AS t6 FROM bool_test -- !query schema struct -- !query output -false true false NULL false true +false true false false true -- !query @@ -148,14 +146,13 @@ SELECT BOOL_OR(b1) AS t1, BOOL_OR(b2) AS t2, BOOL_OR(b3) AS f3, - BOOL_OR(b4) AS n4, BOOL_OR(NOT b2) AS f5, BOOL_OR(NOT b3) AS t6 FROM bool_test -- !query schema struct -- !query output -true true false NULL false true +true true false false true -- !query diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala index d428b7ebc0e91..8790d05b6c7b7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala @@ -118,7 +118,6 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper { } val simpleTypes = - NullType :: BooleanType :: ByteType :: ShortType :: diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala index 575efec364812..b43e51fb6b25c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala @@ -22,6 +22,7 @@ import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis.NoSuchTableException import org.apache.spark.sql.internal.SQLConf.MAX_NESTED_VIEW_DEPTH import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils} +import org.apache.spark.sql.types.NullType class SimpleSQLViewSuite extends SQLViewSuite with SharedSparkSession @@ -735,4 +736,30 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils { } } } + + test("SPARK-32356: Forbid create view with null type in sql side") { + // test sql + val sql1 = "create view v as select null as c" + val sql2 = "alter view v as select null as c" + val sql3 = "create temporary view v as select null as c" + val sql4 = "create global temporary view v as select null as c" + Seq(sql1, sql2, sql3, sql4).foreach { input => + val msg = intercept[AnalysisException] { + sql(input) + }.getMessage + assert(msg.contains(s"Cannot create tables/views with ${NullType.simpleString} type.")) + } + } + + test("SPARK-32356: Forbid create view with null type in dataset side") { + // test df.createTempView + val msg = intercept[AnalysisException] { + sql("select null as c").createTempView("null_type_view") + }.getMessage + assert(msg.contains(s"Cannot create tables/views with ${NullType.simpleString} type.")) + val msg2 = intercept[AnalysisException] { + sql("select null as c").createGlobalTempView("null_type_view") + }.getMessage + assert(msg2.contains(s"Cannot create tables/views with ${NullType.simpleString} type.")) + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala index 18f29f7b90ad5..b2808b33962fa 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala @@ -106,15 +106,11 @@ class InMemoryColumnarQuerySuite extends QueryTest with SharedSparkSession { } test("non-primitive type with nullability:true") { - val schemaNull = StructType(Seq(StructField("col", NullType, true))) - val rddNull = spark.sparkContext.parallelize((1 to 10).map(i => Row(null))) - cachePrimitiveTest(spark.createDataFrame(rddNull, schemaNull), "Null") - tesNonPrimitiveType(true) } test("non-primitive type with nullability:false") { - tesNonPrimitiveType(false) + tesNonPrimitiveType(false) } test("simple columnar query") { @@ -248,7 +244,7 @@ class InMemoryColumnarQuerySuite extends QueryTest with SharedSparkSession { StructField("f1", FloatType, true) :: StructField("f2", ArrayType(BooleanType), true) :: Nil) val dataTypes = - Seq(StringType, BinaryType, NullType, BooleanType, + Seq(StringType, BinaryType, BooleanType, ByteType, ShortType, IntegerType, LongType, FloatType, DoubleType, DecimalType(25, 5), DecimalType(6, 5), DateType, TimestampType, ArrayType(IntegerType), struct) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index fbd1fc1ea98df..c9e4145c7ccec 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -2314,6 +2314,7 @@ class HiveDDLSuite } test("SPARK-20680: do not support for null column datatype") { + val errMsg = s"Cannot create tables/views with ${NullType.simpleString} type." withTable("t") { withView("tabNullType") { hiveClient.runSqlHive("CREATE TABLE t (t1 int)") @@ -2331,17 +2332,17 @@ class HiveDDLSuite val e1 = intercept[AnalysisException] { spark.sql("CREATE TABLE t1 USING PARQUET AS SELECT null as null_col") }.getMessage - assert(e1.contains("Cannot create tables with null type")) + assert(e1.contains(errMsg)) val e2 = intercept[AnalysisException] { spark.sql("CREATE TABLE t2 AS SELECT null as null_col") }.getMessage - assert(e2.contains("Cannot create tables with null type")) + assert(e2.contains(errMsg)) val e3 = intercept[AnalysisException] { spark.sql("CREATE TABLE t3 STORED AS PARQUET AS SELECT null as null_col") }.getMessage - assert(e3.contains("Cannot create tables with null type")) + assert(e3.contains(errMsg)) } // Forbid Replace table AS SELECT with null type @@ -2350,7 +2351,7 @@ class HiveDDLSuite val e = intercept[AnalysisException] { spark.sql(s"CREATE OR REPLACE TABLE t USING $v2Source AS SELECT null as null_col") }.getMessage - assert(e.contains("Cannot create tables with null type")) + assert(e.contains(errMsg)) } // Forbid creating table with VOID type in Spark @@ -2358,19 +2359,19 @@ class HiveDDLSuite val e1 = intercept[AnalysisException] { spark.sql(s"CREATE TABLE t1 (v VOID) USING PARQUET") }.getMessage - assert(e1.contains("Cannot create tables with null type")) + assert(e1.contains(errMsg)) val e2 = intercept[AnalysisException] { spark.sql(s"CREATE TABLE t2 (v VOID) USING hive") }.getMessage - assert(e2.contains("Cannot create tables with null type")) + assert(e2.contains(errMsg)) val e3 = intercept[AnalysisException] { spark.sql(s"CREATE TABLE t3 (v VOID)") }.getMessage - assert(e3.contains("Cannot create tables with null type")) + assert(e3.contains(errMsg)) val e4 = intercept[AnalysisException] { spark.sql(s"CREATE TABLE t4 (v VOID) STORED AS PARQUET") }.getMessage - assert(e4.contains("Cannot create tables with null type")) + assert(e4.contains(errMsg)) } // Forbid Replace table with VOID type @@ -2379,7 +2380,7 @@ class HiveDDLSuite val e = intercept[AnalysisException] { spark.sql(s"CREATE OR REPLACE TABLE t (v VOID) USING $v2Source") }.getMessage - assert(e.contains("Cannot create tables with null type")) + assert(e.contains(errMsg)) } // Make sure spark.catalog.createTable with null type will fail @@ -2416,7 +2417,7 @@ class HiveDDLSuite schema = schema, options = Map("fileFormat" -> "parquet")) }.getMessage - assert(e.contains("Cannot create tables with null type")) + assert(e.contains(s"Cannot create tables/views with ${NullType.simpleString} type.")) } } @@ -2429,7 +2430,7 @@ class HiveDDLSuite schema = schema, options = Map.empty[String, String]) }.getMessage - assert(e.contains("Cannot create tables with null type")) + assert(e.contains(s"Cannot create tables/views with ${NullType.simpleString} type.")) } }