apache · MaxGekk · Jun 21, 2022 · Jun 21, 2022 · Jun 21, 2022 · Jun 22, 2022
diff --git a/...core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala b/...core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala
@@ -21,8 +21,11 @@ import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.catalog.CatalogTableType
 import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SupportsMetadataColumns, Table}
+import org.apache.spark.sql.catalyst.util.quoteIfNeeded
+import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SupportsMetadataColumns, Table, TableCatalog}
+import org.apache.spark.sql.connector.expressions.IdentityTransform
 
 case class DescribeTableExec(
     output: Seq[Attribute],
@@ -45,11 +48,19 @@ case class DescribeTableExec(
     rows += toCatalystRow("# Detailed Table Information", "", "")
     rows += toCatalystRow("Name", table.name(), "")
 
-    CatalogV2Util.TABLE_RESERVED_PROPERTIES.foreach(propKey => {
-      if (table.properties.containsKey(propKey)) {
-        rows += toCatalystRow(propKey.capitalize, table.properties.get(propKey), "")
-      }
-    })
+    val tableType = if (table.properties().containsKey(TableCatalog.PROP_EXTERNAL)) {
+      CatalogTableType.EXTERNAL.name
+    } else {
+      CatalogTableType.MANAGED.name
+    }
+    rows += toCatalystRow("Type", tableType, "")
+    CatalogV2Util.TABLE_RESERVED_PROPERTIES
+      .filterNot(_ == TableCatalog.PROP_EXTERNAL)
+      .foreach(propKey => {
+        if (table.properties.containsKey(propKey)) {
+          rows += toCatalystRow(propKey.capitalize, table.properties.get(propKey), "")
+        }
+      })
     val properties =
       conf.redactOptions(table.properties.asScala.toMap).toList
         .filter(kv => !CatalogV2Util.TABLE_RESERVED_PROPERTIES.contains(kv._1))
@@ -62,7 +73,7 @@ case class DescribeTableExec(
   private def addSchema(rows: ArrayBuffer[InternalRow]): Unit = {
     rows ++= table.schema.map{ column =>
       toCatalystRow(
-        column.name, column.dataType.simpleString, column.getComment().getOrElse(""))
+        column.name, column.dataType.simpleString, column.getComment().orNull)
     }
   }
 
@@ -80,13 +91,31 @@ case class DescribeTableExec(
   }
 
   private def addPartitioning(rows: ArrayBuffer[InternalRow]): Unit = {
-    rows += emptyRow()
-    rows += toCatalystRow("# Partitioning", "", "")
-    if (table.partitioning.isEmpty) {
-      rows += toCatalystRow("Not partitioned", "", "")
-    } else {
-      rows ++= table.partitioning.zipWithIndex.map {
-        case (transform, index) => toCatalystRow(s"Part $index", transform.describe(), "")
+    if (table.partitioning.nonEmpty) {
+      val partitionColumnsOnly = table.partitioning.forall(t => t.isInstanceOf[IdentityTransform])
+      if (partitionColumnsOnly) {
+        rows += toCatalystRow("# Partition Information", "", "")
+        rows += toCatalystRow(s"# ${output(0).name}", output(1).name, output(2).name)
+        rows ++= table.partitioning
+          .map(_.asInstanceOf[IdentityTransform].ref.fieldNames())
+          .map { fieldNames =>
+            val nestedField = table.schema.findNestedField(fieldNames)
+            assert(nestedField.isDefined,
+              s"Not found the partition column ${fieldNames.map(quoteIfNeeded).mkString(".")} " +
+              s"in the table schema ${table.schema().catalogString}.")
+            nestedField.get
+          }.map { case (path, field) =>
+            toCatalystRow(
+              (path :+ field.name).map(quoteIfNeeded(_)).mkString("."),
+              field.dataType.simpleString,
+              field.getComment().orNull)
+          }
+      } else {
+        rows += emptyRow()
+        rows += toCatalystRow("# Partitioning", "", "")
+        rows ++= table.partitioning.zipWithIndex.map {
+          case (transform, index) => toCatalystRow(s"Part $index", transform.describe(), "")
+        }
       }
     }
   }

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeTableSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeTableSuiteBase.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.execution.command
 
-import org.apache.spark.sql.{AnalysisException, QueryTest}
+import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
 import org.apache.spark.sql.types.{BooleanType, MetadataBuilder, StringType, StructType}
 
 /**
@@ -44,6 +44,42 @@ trait DescribeTableSuiteBase extends QueryTest with DDLCommandTestUtils {
     }
   }
 
+  test("DESCRIBE TABLE of a non-partitioned table") {
+    withNamespaceAndTable("ns", "table") { tbl =>
+      spark.sql(s"CREATE TABLE $tbl (id bigint, data string) $defaultUsing")
+      val descriptionDf = spark.sql(s"DESCRIBE TABLE $tbl")
+      assert(descriptionDf.schema.map(field => (field.name, field.dataType)) ===
+        Seq(
+          ("col_name", StringType),
+          ("data_type", StringType),
+          ("comment", StringType)))
+      QueryTest.checkAnswer(
+        descriptionDf,
+        Seq(
+          Row("data", "string", null),
+          Row("id", "bigint", null)))
+    }
+  }
+
+  test("DESCRIBE TABLE of a partitioned table") {
+    withNamespaceAndTable("ns", "table") { tbl =>
+      spark.sql(s"CREATE TABLE $tbl (id bigint, data string) $defaultUsing PARTITIONED BY (id)")
+      val descriptionDf = spark.sql(s"DESCRIBE TABLE $tbl")
+      assert(descriptionDf.schema.map(field => (field.name, field.dataType)) === Seq(
+        ("col_name", StringType),
+        ("data_type", StringType),
+        ("comment", StringType)))
+      QueryTest.checkAnswer(
+        descriptionDf.filter("col_name != 'Created Time'"),
+        Seq(
+          Row("data", "string", null),
+          Row("id", "bigint", null),
+          Row("# Partition Information", "", ""),
+          Row("# col_name", "data_type", "comment"),
+          Row("id", "bigint", null)))
+    }
+  }
+
   test("SPARK-34561: drop/add columns to a dataset of `DESCRIBE TABLE`") {
     withNamespaceAndTable("ns", "table") { tbl =>
       sql(s"CREATE TABLE $tbl (c0 INT) $defaultUsing")

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeTableSuite.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.execution.command.v1
 
+import java.util.Locale
+
 import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
 import org.apache.spark.sql.execution.command
 import org.apache.spark.sql.types.StringType
@@ -33,24 +35,9 @@ import org.apache.spark.sql.types.StringType
 trait DescribeTableSuiteBase extends command.DescribeTableSuiteBase
   with command.TestsV1AndV2Commands {
 
-  test("DESCRIBE TABLE with non-'partitioned-by' clause") {
-    withNamespaceAndTable("ns", "table") { tbl =>
-      spark.sql(s"CREATE TABLE $tbl (id bigint, data string) $defaultUsing")
-      val descriptionDf = spark.sql(s"DESCRIBE TABLE $tbl")
-      assert(descriptionDf.schema.map(field => (field.name, field.dataType)) ===
-        Seq(
-          ("col_name", StringType),
-          ("data_type", StringType),
-          ("comment", StringType)))
-      QueryTest.checkAnswer(
-        descriptionDf,
-        Seq(
-          Row("data", "string", null),
-          Row("id", "bigint", null)))
-    }
-  }
+  def getProvider(): String = defaultUsing.stripPrefix("USING").trim.toLowerCase(Locale.ROOT)
 
-  test("Describing a partition is not supported") {
+  test("Describing of a non-existent partition") {
     withNamespaceAndTable("ns", "table") { tbl =>
       spark.sql(s"CREATE TABLE $tbl (id bigint, data string) $defaultUsing " +
         "PARTITIONED BY (id)")
@@ -96,7 +83,7 @@ class DescribeTableSuite extends DescribeTableSuiteBase with CommandSuiteBase {
           Row("Last Access", "UNKNOWN", ""),
           Row("Created By", "Spark 3.4.0-SNAPSHOT", ""),
           Row("Type", "EXTERNAL", ""),
-          Row("Provider", "parquet", ""),
+          Row("Provider", getProvider(), ""),
           Row("Comment", "this is a test table", ""),
           Row("Table Properties", "[bar=baz]", ""),
           Row("Location", "file:/tmp/testcat/table_name", ""),

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DescribeTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DescribeTableSuite.scala
@@ -28,26 +28,6 @@ import org.apache.spark.util.Utils
  */
 class DescribeTableSuite extends command.DescribeTableSuiteBase with CommandSuiteBase {
 
-  test("DESCRIBE TABLE with non-'partitioned-by' clause") {
-    withNamespaceAndTable("ns", "table") { tbl =>
-      spark.sql(s"CREATE TABLE $tbl (id bigint, data string) $defaultUsing")
-      val descriptionDf = spark.sql(s"DESCRIBE TABLE $tbl")
-      assert(descriptionDf.schema.map(field => (field.name, field.dataType)) ===
-        Seq(
-          ("col_name", StringType),
-          ("data_type", StringType),
-          ("comment", StringType)))
-      QueryTest.checkAnswer(
-        descriptionDf,
-        Seq(
-          Row("data", "string", ""),
-          Row("id", "bigint", ""),
-          Row("", "", ""),
-          Row("# Partitioning", "", ""),
-          Row("Not partitioned", "", "")))
-    }
-  }
-
   test("Describing a partition is not supported") {
     withNamespaceAndTable("ns", "table") { tbl =>
       spark.sql(s"CREATE TABLE $tbl (id bigint, data string) $defaultUsing " +
@@ -59,6 +39,23 @@ class DescribeTableSuite extends command.DescribeTableSuiteBase with CommandSuit
     }
   }
 
+  test("DESCRIBE TABLE of a partitioned table by nested columns") {
+    withNamespaceAndTable("ns", "table") { tbl =>
+      sql(s"CREATE TABLE $tbl (s struct<id:INT, a:BIGINT>, data string) " +
+        s"$defaultUsing PARTITIONED BY (s.id, s.a)")
+      val descriptionDf = sql(s"DESCRIBE TABLE $tbl")
+      QueryTest.checkAnswer(
+        descriptionDf.filter("col_name != 'Created Time'"),
+        Seq(
+          Row("data", "string", null),
+          Row("s", "struct<id:int,a:bigint>", null),
+          Row("# Partition Information", "", ""),
+          Row("# col_name", "data_type", "comment"),
+          Row("s.id", "int", null),
+          Row("s.a", "bigint", null)))
+    }
+  }
+
   test("DESCRIBE TABLE EXTENDED of a partitioned table") {
     withNamespaceAndTable("ns", "table") { tbl =>
       spark.sql(s"CREATE TABLE $tbl (id bigint, data string) $defaultUsing" +
@@ -74,18 +71,19 @@ class DescribeTableSuite extends command.DescribeTableSuiteBase with CommandSuit
       QueryTest.checkAnswer(
         descriptionDf,
         Seq(
-          Row("id", "bigint", ""),
-          Row("data", "string", ""),
-          Row("", "", ""),
-          Row("# Partitioning", "", ""),
-          Row("Part 0", "id", ""),
+          Row("id", "bigint", null),
+          Row("data", "string", null),
+          Row("# Partition Information", "", ""),
+          Row("# col_name", "data_type", "comment"),
+          Row("id", "bigint", null),
           Row("", "", ""),
           Row("# Metadata Columns", "", ""),
           Row("index", "int", "Metadata column used to conflict with a data column"),
           Row("_partition", "string", "Partition key used to store the row"),
           Row("", "", ""),
           Row("# Detailed Table Information", "", ""),
           Row("Name", tbl, ""),
+          Row("Type", "MANAGED", ""),
           Row("Comment", "this is a test table", ""),
           Row("Location", "file:/tmp/testcat/table_name", ""),
           Row("Provider", "_", ""),

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/DescribeTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/DescribeTableSuite.scala
@@ -72,7 +72,7 @@ class DescribeTableSuite extends v1.DescribeTableSuiteBase with CommandSuiteBase
           Row("Last Access", "UNKNOWN", ""),
           Row("Created By", "Spark 3.4.0-SNAPSHOT", ""),
           Row("Type", "EXTERNAL", ""),
-          Row("Provider", "hive", ""),
+          Row("Provider", getProvider(), ""),
           Row("Comment", "this is a test table", ""),
           Row("Location", "file:/tmp/testcat/table_name", ""),
           Row("Serde Library", "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", ""),