From 933e661bf6fdae8e13278e300ce3cbea8e9387e0 Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Wed, 8 Jul 2020 22:49:17 +0900 Subject: [PATCH 1/2] [SPARK-20680][SQL][FOLLOW-UP] Revert NullType.simpleString from 'unknown' to 'null' --- python/pyspark/sql/types.py | 3 -- .../sql/connector/catalog/CatalogV2Util.scala | 2 +- .../org/apache/spark/sql/types/NullType.scala | 4 --- .../sql-functions/sql-expression-schema.md | 2 +- .../sql-tests/results/ansi/literals.sql.out | 2 +- .../sql-tests/results/inline-table.sql.out | 2 +- .../sql-tests/results/literals.sql.out | 2 +- .../sql-tests/results/misc-functions.sql.out | 2 +- .../results/postgreSQL/select.sql.out | 4 +-- .../sql-compatibility-functions.sql.out | 6 ++-- .../results/udf/udf-inline-table.sql.out | 2 +- .../spark/sql/FileBasedDataSourceSuite.scala | 2 +- .../sql/hive/execution/HiveDDLSuite.scala | 32 +++++++++---------- 13 files changed, 29 insertions(+), 36 deletions(-) diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py index ddd13ca3a01be..320a68dffe7a3 100644 --- a/python/pyspark/sql/types.py +++ b/python/pyspark/sql/types.py @@ -116,9 +116,6 @@ class NullType(DataType): __metaclass__ = DataTypeSingleton - def simpleString(self): - return 'unknown' - class AtomicType(DataType): """An internal type used to represent everything that is not diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala index d130a13282cc8..1a3a7207c6ca9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala @@ -356,7 +356,7 @@ private[sql] object CatalogV2Util { } if (containsNullType(dt)) { throw new AnalysisException( - "Cannot create tables with unknown type.") + s"Cannot create tables with ${NullType.simpleString} type.") } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/NullType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/NullType.scala index 6c9a1d69ca681..14097a5280d50 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/NullType.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/NullType.scala @@ -32,10 +32,6 @@ class NullType private() extends DataType { override def defaultSize: Int = 1 private[spark] override def asNullable: NullType = this - - // "null" is mainly used to represent a literal in Spark, - // it's better to avoid using it for data types. - override def simpleString: String = "unknown" } /** diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index c39adac4ac680..8898a11ec08fb 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -34,7 +34,7 @@ | org.apache.spark.sql.catalyst.expressions.Ascii | ascii | SELECT ascii('222') | struct | | org.apache.spark.sql.catalyst.expressions.Asin | asin | SELECT asin(0) | struct | | org.apache.spark.sql.catalyst.expressions.Asinh | asinh | SELECT asinh(0) | struct | -| org.apache.spark.sql.catalyst.expressions.AssertTrue | assert_true | SELECT assert_true(0 < 1) | struct | +| org.apache.spark.sql.catalyst.expressions.AssertTrue | assert_true | SELECT assert_true(0 < 1) | struct | | org.apache.spark.sql.catalyst.expressions.Atan | atan | SELECT atan(0) | struct | | org.apache.spark.sql.catalyst.expressions.Atan2 | atan2 | SELECT atan2(0, 0) | struct | | org.apache.spark.sql.catalyst.expressions.Atanh | atanh | SELECT atanh(0) | struct | diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/literals.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/literals.sql.out index 02747718c91df..f6720f6c5faa4 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/literals.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/literals.sql.out @@ -5,7 +5,7 @@ -- !query select null, Null, nUll -- !query schema -struct +struct -- !query output NULL NULL NULL diff --git a/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out b/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out index 2dd6960682740..9943b93c431df 100644 --- a/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out @@ -49,7 +49,7 @@ two 2 -- !query select * from values ("one", null), ("two", null) as data(a, b) -- !query schema -struct +struct -- !query output one NULL two NULL diff --git a/sql/core/src/test/resources/sql-tests/results/literals.sql.out b/sql/core/src/test/resources/sql-tests/results/literals.sql.out index 02747718c91df..f6720f6c5faa4 100644 --- a/sql/core/src/test/resources/sql-tests/results/literals.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/literals.sql.out @@ -5,7 +5,7 @@ -- !query select null, Null, nUll -- !query schema -struct +struct -- !query output NULL NULL NULL diff --git a/sql/core/src/test/resources/sql-tests/results/misc-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/misc-functions.sql.out index 8d34bf293ef2b..bd8ffb82ee129 100644 --- a/sql/core/src/test/resources/sql-tests/results/misc-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/misc-functions.sql.out @@ -7,7 +7,7 @@ select typeof(null) -- !query schema struct -- !query output -unknown +null -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/select.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select.sql.out index 8b32bd6ce1995..1e59036b979b4 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/select.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select.sql.out @@ -308,7 +308,7 @@ struct<1:int> -- !query select foo.* from (select null) as foo -- !query schema -struct +struct -- !query output NULL @@ -316,7 +316,7 @@ NULL -- !query select foo.* from (select 'xyzzy',1,null) as foo -- !query schema -struct +struct -- !query output xyzzy 1 NULL diff --git a/sql/core/src/test/resources/sql-tests/results/sql-compatibility-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/sql-compatibility-functions.sql.out index b905f9e038619..26a44a85841e0 100644 --- a/sql/core/src/test/resources/sql-tests/results/sql-compatibility-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/sql-compatibility-functions.sql.out @@ -5,7 +5,7 @@ -- !query SELECT ifnull(null, 'x'), ifnull('y', 'x'), ifnull(null, null) -- !query schema -struct +struct -- !query output x y NULL @@ -21,7 +21,7 @@ NULL x -- !query SELECT nvl(null, 'x'), nvl('y', 'x'), nvl(null, null) -- !query schema -struct +struct -- !query output x y NULL @@ -29,7 +29,7 @@ x y NULL -- !query SELECT nvl2(null, 'x', 'y'), nvl2('n', 'x', 'y'), nvl2(null, null, null) -- !query schema -struct +struct -- !query output y x NULL diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-inline-table.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-inline-table.sql.out index 0680a873fbf8f..d78d347bc9802 100644 --- a/sql/core/src/test/resources/sql-tests/results/udf/udf-inline-table.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-inline-table.sql.out @@ -49,7 +49,7 @@ two 2 -- !query select udf(a), b from values ("one", null), ("two", null) as data(a, b) -- !query schema -struct +struct -- !query output one NULL two NULL diff --git a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala index daa262d581cb0..231a8f2aa7ddd 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala @@ -405,7 +405,7 @@ class FileBasedDataSourceSuite extends QueryTest "" } def errorMessage(format: String): String = { - s"$format data source does not support unknown data type." + s"$format data source does not support null data type." } withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> useV1List) { withTempDir { dir => diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index 774fb5b4b9ad5..c9c71200ccc06 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -2310,44 +2310,44 @@ class HiveDDLSuite } } - test("SPARK-20680: Spark-sql do not support for unknown column datatype") { + test("SPARK-20680: do not support for null column datatype") { withTable("t") { - withView("tabUnknownType") { + withView("tabNullType") { hiveClient.runSqlHive("CREATE TABLE t (t1 int)") hiveClient.runSqlHive("INSERT INTO t VALUES (3)") - hiveClient.runSqlHive("CREATE VIEW tabUnknownType AS SELECT NULL AS col FROM t") - checkAnswer(spark.table("tabUnknownType"), Row(null)) + hiveClient.runSqlHive("CREATE VIEW tabNullType AS SELECT NULL AS col FROM t") + checkAnswer(spark.table("tabNullType"), Row(null)) // No exception shows - val desc = spark.sql("DESC tabUnknownType").collect().toSeq + val desc = spark.sql("DESC tabNullType").collect().toSeq assert(desc.contains(Row("col", NullType.simpleString, null))) } } - // Forbid CTAS with unknown type + // Forbid CTAS with null type withTable("t1", "t2", "t3") { val e1 = intercept[AnalysisException] { spark.sql("CREATE TABLE t1 USING PARQUET AS SELECT null as null_col") }.getMessage - assert(e1.contains("Cannot create tables with unknown type")) + assert(e1.contains("Cannot create tables with null type")) val e2 = intercept[AnalysisException] { spark.sql("CREATE TABLE t2 AS SELECT null as null_col") }.getMessage - assert(e2.contains("Cannot create tables with unknown type")) + assert(e2.contains("Cannot create tables with null type")) val e3 = intercept[AnalysisException] { spark.sql("CREATE TABLE t3 STORED AS PARQUET AS SELECT null as null_col") }.getMessage - assert(e3.contains("Cannot create tables with unknown type")) + assert(e3.contains("Cannot create tables with null type")) } - // Forbid Replace table AS SELECT with unknown type + // Forbid Replace table AS SELECT with null type withTable("t") { val v2Source = classOf[FakeV2Provider].getName val e = intercept[AnalysisException] { spark.sql(s"CREATE OR REPLACE TABLE t USING $v2Source AS SELECT null as null_col") }.getMessage - assert(e.contains("Cannot create tables with unknown type")) + assert(e.contains("Cannot create tables with null type")) } // Forbid creating table with VOID type in Spark @@ -2355,19 +2355,19 @@ class HiveDDLSuite val e1 = intercept[AnalysisException] { spark.sql(s"CREATE TABLE t1 (v VOID) USING PARQUET") }.getMessage - assert(e1.contains("Cannot create tables with unknown type")) + assert(e1.contains("Cannot create tables with null type")) val e2 = intercept[AnalysisException] { spark.sql(s"CREATE TABLE t2 (v VOID) USING hive") }.getMessage - assert(e2.contains("Cannot create tables with unknown type")) + assert(e2.contains("Cannot create tables with null type")) val e3 = intercept[AnalysisException] { spark.sql(s"CREATE TABLE t3 (v VOID)") }.getMessage - assert(e3.contains("Cannot create tables with unknown type")) + assert(e3.contains("Cannot create tables with null type")) val e4 = intercept[AnalysisException] { spark.sql(s"CREATE TABLE t4 (v VOID) STORED AS PARQUET") }.getMessage - assert(e4.contains("Cannot create tables with unknown type")) + assert(e4.contains("Cannot create tables with null type")) } // Forbid Replace table with VOID type @@ -2376,7 +2376,7 @@ class HiveDDLSuite val e = intercept[AnalysisException] { spark.sql(s"CREATE OR REPLACE TABLE t (v VOID) USING $v2Source") }.getMessage - assert(e.contains("Cannot create tables with unknown type")) + assert(e.contains("Cannot create tables with null type")) } // Make sure spark.catalog.createTable with null type will fail From 474cc702794644a5d70ef9e097b982e15152465c Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Thu, 9 Jul 2020 12:00:28 +0900 Subject: [PATCH 2/2] Missing reverts in tests --- .../org/apache/spark/sql/hive/execution/HiveDDLSuite.scala | 4 ++-- .../org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index c9c71200ccc06..6b1c30deec352 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -2413,7 +2413,7 @@ class HiveDDLSuite schema = schema, options = Map("fileFormat" -> "parquet")) }.getMessage - assert(e.contains("Cannot create tables with unknown type")) + assert(e.contains("Cannot create tables with null type")) } } @@ -2426,7 +2426,7 @@ class HiveDDLSuite schema = schema, options = Map.empty[String, String]) }.getMessage - assert(e.contains("Cannot create tables with unknown type")) + assert(e.contains("Cannot create tables with null type")) } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala index 61c48c6f9c115..91fd8a47339fc 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala @@ -121,7 +121,7 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton { msg = intercept[AnalysisException] { sql("select null").write.mode("overwrite").orc(orcDir) }.getMessage - assert(msg.contains("ORC data source does not support unknown data type.")) + assert(msg.contains("ORC data source does not support null data type.")) msg = intercept[AnalysisException] { spark.udf.register("testType", () => new IntervalData())