diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala index 6cca0a772222..acb861d7679d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala @@ -21,8 +21,11 @@ import scala.collection.JavaConverters._ import scala.collection.mutable.ArrayBuffer import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.catalog.CatalogTableType import org.apache.spark.sql.catalyst.expressions.Attribute -import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SupportsMetadataColumns, Table} +import org.apache.spark.sql.catalyst.util.quoteIfNeeded +import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SupportsMetadataColumns, Table, TableCatalog} +import org.apache.spark.sql.connector.expressions.IdentityTransform case class DescribeTableExec( output: Seq[Attribute], @@ -45,11 +48,19 @@ case class DescribeTableExec( rows += toCatalystRow("# Detailed Table Information", "", "") rows += toCatalystRow("Name", table.name(), "") - CatalogV2Util.TABLE_RESERVED_PROPERTIES.foreach(propKey => { - if (table.properties.containsKey(propKey)) { - rows += toCatalystRow(propKey.capitalize, table.properties.get(propKey), "") - } - }) + val tableType = if (table.properties().containsKey(TableCatalog.PROP_EXTERNAL)) { + CatalogTableType.EXTERNAL.name + } else { + CatalogTableType.MANAGED.name + } + rows += toCatalystRow("Type", tableType, "") + CatalogV2Util.TABLE_RESERVED_PROPERTIES + .filterNot(_ == TableCatalog.PROP_EXTERNAL) + .foreach(propKey => { + if (table.properties.containsKey(propKey)) { + rows += toCatalystRow(propKey.capitalize, table.properties.get(propKey), "") + } + }) val properties = conf.redactOptions(table.properties.asScala.toMap).toList .filter(kv => !CatalogV2Util.TABLE_RESERVED_PROPERTIES.contains(kv._1)) @@ -62,7 +73,7 @@ case class DescribeTableExec( private def addSchema(rows: ArrayBuffer[InternalRow]): Unit = { rows ++= table.schema.map{ column => toCatalystRow( - column.name, column.dataType.simpleString, column.getComment().getOrElse("")) + column.name, column.dataType.simpleString, column.getComment().orNull) } } @@ -80,13 +91,31 @@ case class DescribeTableExec( } private def addPartitioning(rows: ArrayBuffer[InternalRow]): Unit = { - rows += emptyRow() - rows += toCatalystRow("# Partitioning", "", "") - if (table.partitioning.isEmpty) { - rows += toCatalystRow("Not partitioned", "", "") - } else { - rows ++= table.partitioning.zipWithIndex.map { - case (transform, index) => toCatalystRow(s"Part $index", transform.describe(), "") + if (table.partitioning.nonEmpty) { + val partitionColumnsOnly = table.partitioning.forall(t => t.isInstanceOf[IdentityTransform]) + if (partitionColumnsOnly) { + rows += toCatalystRow("# Partition Information", "", "") + rows += toCatalystRow(s"# ${output(0).name}", output(1).name, output(2).name) + rows ++= table.partitioning + .map(_.asInstanceOf[IdentityTransform].ref.fieldNames()) + .map { fieldNames => + val nestedField = table.schema.findNestedField(fieldNames) + assert(nestedField.isDefined, + s"Not found the partition column ${fieldNames.map(quoteIfNeeded).mkString(".")} " + + s"in the table schema ${table.schema().catalogString}.") + nestedField.get + }.map { case (path, field) => + toCatalystRow( + (path :+ field.name).map(quoteIfNeeded(_)).mkString("."), + field.dataType.simpleString, + field.getComment().orNull) + } + } else { + rows += emptyRow() + rows += toCatalystRow("# Partitioning", "", "") + rows ++= table.partitioning.zipWithIndex.map { + case (transform, index) => toCatalystRow(s"Part $index", transform.describe(), "") + } } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeTableSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeTableSuiteBase.scala index 0cf062fb34e6..7ecc38848cf2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeTableSuiteBase.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeTableSuiteBase.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.execution.command -import org.apache.spark.sql.{AnalysisException, QueryTest} +import org.apache.spark.sql.{AnalysisException, QueryTest, Row} import org.apache.spark.sql.types.{BooleanType, MetadataBuilder, StringType, StructType} /** @@ -44,6 +44,42 @@ trait DescribeTableSuiteBase extends QueryTest with DDLCommandTestUtils { } } + test("DESCRIBE TABLE of a non-partitioned table") { + withNamespaceAndTable("ns", "table") { tbl => + spark.sql(s"CREATE TABLE $tbl (id bigint, data string) $defaultUsing") + val descriptionDf = spark.sql(s"DESCRIBE TABLE $tbl") + assert(descriptionDf.schema.map(field => (field.name, field.dataType)) === + Seq( + ("col_name", StringType), + ("data_type", StringType), + ("comment", StringType))) + QueryTest.checkAnswer( + descriptionDf, + Seq( + Row("data", "string", null), + Row("id", "bigint", null))) + } + } + + test("DESCRIBE TABLE of a partitioned table") { + withNamespaceAndTable("ns", "table") { tbl => + spark.sql(s"CREATE TABLE $tbl (id bigint, data string) $defaultUsing PARTITIONED BY (id)") + val descriptionDf = spark.sql(s"DESCRIBE TABLE $tbl") + assert(descriptionDf.schema.map(field => (field.name, field.dataType)) === Seq( + ("col_name", StringType), + ("data_type", StringType), + ("comment", StringType))) + QueryTest.checkAnswer( + descriptionDf.filter("col_name != 'Created Time'"), + Seq( + Row("data", "string", null), + Row("id", "bigint", null), + Row("# Partition Information", "", ""), + Row("# col_name", "data_type", "comment"), + Row("id", "bigint", null))) + } + } + test("SPARK-34561: drop/add columns to a dataset of `DESCRIBE TABLE`") { withNamespaceAndTable("ns", "table") { tbl => sql(s"CREATE TABLE $tbl (c0 INT) $defaultUsing") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeTableSuite.scala index 01b7aefdd786..9ea0d4bacd29 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeTableSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeTableSuite.scala @@ -17,6 +17,8 @@ package org.apache.spark.sql.execution.command.v1 +import java.util.Locale + import org.apache.spark.sql.{AnalysisException, QueryTest, Row} import org.apache.spark.sql.execution.command import org.apache.spark.sql.types.StringType @@ -33,24 +35,9 @@ import org.apache.spark.sql.types.StringType trait DescribeTableSuiteBase extends command.DescribeTableSuiteBase with command.TestsV1AndV2Commands { - test("DESCRIBE TABLE with non-'partitioned-by' clause") { - withNamespaceAndTable("ns", "table") { tbl => - spark.sql(s"CREATE TABLE $tbl (id bigint, data string) $defaultUsing") - val descriptionDf = spark.sql(s"DESCRIBE TABLE $tbl") - assert(descriptionDf.schema.map(field => (field.name, field.dataType)) === - Seq( - ("col_name", StringType), - ("data_type", StringType), - ("comment", StringType))) - QueryTest.checkAnswer( - descriptionDf, - Seq( - Row("data", "string", null), - Row("id", "bigint", null))) - } - } + def getProvider(): String = defaultUsing.stripPrefix("USING").trim.toLowerCase(Locale.ROOT) - test("Describing a partition is not supported") { + test("Describing of a non-existent partition") { withNamespaceAndTable("ns", "table") { tbl => spark.sql(s"CREATE TABLE $tbl (id bigint, data string) $defaultUsing " + "PARTITIONED BY (id)") @@ -96,7 +83,7 @@ class DescribeTableSuite extends DescribeTableSuiteBase with CommandSuiteBase { Row("Last Access", "UNKNOWN", ""), Row("Created By", "Spark 3.4.0-SNAPSHOT", ""), Row("Type", "EXTERNAL", ""), - Row("Provider", "parquet", ""), + Row("Provider", getProvider(), ""), Row("Comment", "this is a test table", ""), Row("Table Properties", "[bar=baz]", ""), Row("Location", "file:/tmp/testcat/table_name", ""), diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DescribeTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DescribeTableSuite.scala index ee614b87718c..b09abec6bc33 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DescribeTableSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DescribeTableSuite.scala @@ -28,26 +28,6 @@ import org.apache.spark.util.Utils */ class DescribeTableSuite extends command.DescribeTableSuiteBase with CommandSuiteBase { - test("DESCRIBE TABLE with non-'partitioned-by' clause") { - withNamespaceAndTable("ns", "table") { tbl => - spark.sql(s"CREATE TABLE $tbl (id bigint, data string) $defaultUsing") - val descriptionDf = spark.sql(s"DESCRIBE TABLE $tbl") - assert(descriptionDf.schema.map(field => (field.name, field.dataType)) === - Seq( - ("col_name", StringType), - ("data_type", StringType), - ("comment", StringType))) - QueryTest.checkAnswer( - descriptionDf, - Seq( - Row("data", "string", ""), - Row("id", "bigint", ""), - Row("", "", ""), - Row("# Partitioning", "", ""), - Row("Not partitioned", "", ""))) - } - } - test("Describing a partition is not supported") { withNamespaceAndTable("ns", "table") { tbl => spark.sql(s"CREATE TABLE $tbl (id bigint, data string) $defaultUsing " + @@ -59,6 +39,23 @@ class DescribeTableSuite extends command.DescribeTableSuiteBase with CommandSuit } } + test("DESCRIBE TABLE of a partitioned table by nested columns") { + withNamespaceAndTable("ns", "table") { tbl => + sql(s"CREATE TABLE $tbl (s struct, data string) " + + s"$defaultUsing PARTITIONED BY (s.id, s.a)") + val descriptionDf = sql(s"DESCRIBE TABLE $tbl") + QueryTest.checkAnswer( + descriptionDf.filter("col_name != 'Created Time'"), + Seq( + Row("data", "string", null), + Row("s", "struct", null), + Row("# Partition Information", "", ""), + Row("# col_name", "data_type", "comment"), + Row("s.id", "int", null), + Row("s.a", "bigint", null))) + } + } + test("DESCRIBE TABLE EXTENDED of a partitioned table") { withNamespaceAndTable("ns", "table") { tbl => spark.sql(s"CREATE TABLE $tbl (id bigint, data string) $defaultUsing" + @@ -74,11 +71,11 @@ class DescribeTableSuite extends command.DescribeTableSuiteBase with CommandSuit QueryTest.checkAnswer( descriptionDf, Seq( - Row("id", "bigint", ""), - Row("data", "string", ""), - Row("", "", ""), - Row("# Partitioning", "", ""), - Row("Part 0", "id", ""), + Row("id", "bigint", null), + Row("data", "string", null), + Row("# Partition Information", "", ""), + Row("# col_name", "data_type", "comment"), + Row("id", "bigint", null), Row("", "", ""), Row("# Metadata Columns", "", ""), Row("index", "int", "Metadata column used to conflict with a data column"), @@ -86,6 +83,7 @@ class DescribeTableSuite extends command.DescribeTableSuiteBase with CommandSuit Row("", "", ""), Row("# Detailed Table Information", "", ""), Row("Name", tbl, ""), + Row("Type", "MANAGED", ""), Row("Comment", "this is a test table", ""), Row("Location", "file:/tmp/testcat/table_name", ""), Row("Provider", "_", ""), diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/DescribeTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/DescribeTableSuite.scala index 455a2c8a307e..783f12dd81d6 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/DescribeTableSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/DescribeTableSuite.scala @@ -72,7 +72,7 @@ class DescribeTableSuite extends v1.DescribeTableSuiteBase with CommandSuiteBase Row("Last Access", "UNKNOWN", ""), Row("Created By", "Spark 3.4.0-SNAPSHOT", ""), Row("Type", "EXTERNAL", ""), - Row("Provider", "hive", ""), + Row("Provider", getProvider(), ""), Row("Comment", "this is a test table", ""), Row("Location", "file:/tmp/testcat/table_name", ""), Row("Serde Library", "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", ""),