Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,11 @@ import scala.collection.JavaConverters._
import scala.collection.mutable.ArrayBuffer

import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.catalog.CatalogTableType
import org.apache.spark.sql.catalyst.expressions.Attribute
import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SupportsMetadataColumns, Table}
import org.apache.spark.sql.catalyst.util.quoteIfNeeded
import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SupportsMetadataColumns, Table, TableCatalog}
import org.apache.spark.sql.connector.expressions.IdentityTransform

case class DescribeTableExec(
output: Seq[Attribute],
Expand All @@ -45,11 +48,19 @@ case class DescribeTableExec(
rows += toCatalystRow("# Detailed Table Information", "", "")
rows += toCatalystRow("Name", table.name(), "")

CatalogV2Util.TABLE_RESERVED_PROPERTIES.foreach(propKey => {
if (table.properties.containsKey(propKey)) {
rows += toCatalystRow(propKey.capitalize, table.properties.get(propKey), "")
}
})
val tableType = if (table.properties().containsKey(TableCatalog.PROP_EXTERNAL)) {
CatalogTableType.EXTERNAL.name
} else {
CatalogTableType.MANAGED.name
}
rows += toCatalystRow("Type", tableType, "")
CatalogV2Util.TABLE_RESERVED_PROPERTIES
.filterNot(_ == TableCatalog.PROP_EXTERNAL)
.foreach(propKey => {
if (table.properties.containsKey(propKey)) {
rows += toCatalystRow(propKey.capitalize, table.properties.get(propKey), "")
}
})
val properties =
conf.redactOptions(table.properties.asScala.toMap).toList
.filter(kv => !CatalogV2Util.TABLE_RESERVED_PROPERTIES.contains(kv._1))
Expand All @@ -62,7 +73,7 @@ case class DescribeTableExec(
private def addSchema(rows: ArrayBuffer[InternalRow]): Unit = {
rows ++= table.schema.map{ column =>
toCatalystRow(
column.name, column.dataType.simpleString, column.getComment().getOrElse(""))
column.name, column.dataType.simpleString, column.getComment().orNull)
}
}

Expand All @@ -80,13 +91,31 @@ case class DescribeTableExec(
}

private def addPartitioning(rows: ArrayBuffer[InternalRow]): Unit = {
rows += emptyRow()
rows += toCatalystRow("# Partitioning", "", "")
if (table.partitioning.isEmpty) {
rows += toCatalystRow("Not partitioned", "", "")
} else {
rows ++= table.partitioning.zipWithIndex.map {
case (transform, index) => toCatalystRow(s"Part $index", transform.describe(), "")
if (table.partitioning.nonEmpty) {
val partitionColumnsOnly = table.partitioning.forall(t => t.isInstanceOf[IdentityTransform])
if (partitionColumnsOnly) {
rows += toCatalystRow("# Partition Information", "", "")
rows += toCatalystRow(s"# ${output(0).name}", output(1).name, output(2).name)
rows ++= table.partitioning
.map(_.asInstanceOf[IdentityTransform].ref.fieldNames())
.map { fieldNames =>
val nestedField = table.schema.findNestedField(fieldNames)
assert(nestedField.isDefined,
s"Not found the partition column ${fieldNames.map(quoteIfNeeded).mkString(".")} " +
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: we can reuse MultipartIdentifierHelper.quoted

s"in the table schema ${table.schema().catalogString}.")
nestedField.get
}.map { case (path, field) =>
toCatalystRow(
(path :+ field.name).map(quoteIfNeeded(_)).mkString("."),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto

field.dataType.simpleString,
field.getComment().orNull)
}
} else {
rows += emptyRow()
rows += toCatalystRow("# Partitioning", "", "")
rows ++= table.partitioning.zipWithIndex.map {
case (transform, index) => toCatalystRow(s"Part $index", transform.describe(), "")
}
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

package org.apache.spark.sql.execution.command

import org.apache.spark.sql.{AnalysisException, QueryTest}
import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
import org.apache.spark.sql.types.{BooleanType, MetadataBuilder, StringType, StructType}

/**
Expand All @@ -44,6 +44,42 @@ trait DescribeTableSuiteBase extends QueryTest with DDLCommandTestUtils {
}
}

test("DESCRIBE TABLE of a non-partitioned table") {
withNamespaceAndTable("ns", "table") { tbl =>
spark.sql(s"CREATE TABLE $tbl (id bigint, data string) $defaultUsing")
val descriptionDf = spark.sql(s"DESCRIBE TABLE $tbl")
assert(descriptionDf.schema.map(field => (field.name, field.dataType)) ===
Seq(
("col_name", StringType),
("data_type", StringType),
("comment", StringType)))
QueryTest.checkAnswer(
descriptionDf,
Seq(
Row("data", "string", null),
Row("id", "bigint", null)))
}
}

test("DESCRIBE TABLE of a partitioned table") {
withNamespaceAndTable("ns", "table") { tbl =>
spark.sql(s"CREATE TABLE $tbl (id bigint, data string) $defaultUsing PARTITIONED BY (id)")
val descriptionDf = spark.sql(s"DESCRIBE TABLE $tbl")
assert(descriptionDf.schema.map(field => (field.name, field.dataType)) === Seq(
("col_name", StringType),
("data_type", StringType),
("comment", StringType)))
QueryTest.checkAnswer(
descriptionDf.filter("col_name != 'Created Time'"),
Seq(
Row("data", "string", null),
Row("id", "bigint", null),
Row("# Partition Information", "", ""),
Row("# col_name", "data_type", "comment"),
Row("id", "bigint", null)))
}
}

test("SPARK-34561: drop/add columns to a dataset of `DESCRIBE TABLE`") {
withNamespaceAndTable("ns", "table") { tbl =>
sql(s"CREATE TABLE $tbl (c0 INT) $defaultUsing")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

package org.apache.spark.sql.execution.command.v1

import java.util.Locale

import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
import org.apache.spark.sql.execution.command
import org.apache.spark.sql.types.StringType
Expand All @@ -33,24 +35,9 @@ import org.apache.spark.sql.types.StringType
trait DescribeTableSuiteBase extends command.DescribeTableSuiteBase
with command.TestsV1AndV2Commands {

test("DESCRIBE TABLE with non-'partitioned-by' clause") {
withNamespaceAndTable("ns", "table") { tbl =>
spark.sql(s"CREATE TABLE $tbl (id bigint, data string) $defaultUsing")
val descriptionDf = spark.sql(s"DESCRIBE TABLE $tbl")
assert(descriptionDf.schema.map(field => (field.name, field.dataType)) ===
Seq(
("col_name", StringType),
("data_type", StringType),
("comment", StringType)))
QueryTest.checkAnswer(
descriptionDf,
Seq(
Row("data", "string", null),
Row("id", "bigint", null)))
}
}
def getProvider(): String = defaultUsing.stripPrefix("USING").trim.toLowerCase(Locale.ROOT)

test("Describing a partition is not supported") {
test("Describing of a non-existent partition") {
withNamespaceAndTable("ns", "table") { tbl =>
spark.sql(s"CREATE TABLE $tbl (id bigint, data string) $defaultUsing " +
"PARTITIONED BY (id)")
Expand Down Expand Up @@ -96,7 +83,7 @@ class DescribeTableSuite extends DescribeTableSuiteBase with CommandSuiteBase {
Row("Last Access", "UNKNOWN", ""),
Row("Created By", "Spark 3.4.0-SNAPSHOT", ""),
Row("Type", "EXTERNAL", ""),
Row("Provider", "parquet", ""),
Row("Provider", getProvider(), ""),
Row("Comment", "this is a test table", ""),
Row("Table Properties", "[bar=baz]", ""),
Row("Location", "file:/tmp/testcat/table_name", ""),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,26 +28,6 @@ import org.apache.spark.util.Utils
*/
class DescribeTableSuite extends command.DescribeTableSuiteBase with CommandSuiteBase {

test("DESCRIBE TABLE with non-'partitioned-by' clause") {
withNamespaceAndTable("ns", "table") { tbl =>
spark.sql(s"CREATE TABLE $tbl (id bigint, data string) $defaultUsing")
val descriptionDf = spark.sql(s"DESCRIBE TABLE $tbl")
assert(descriptionDf.schema.map(field => (field.name, field.dataType)) ===
Seq(
("col_name", StringType),
("data_type", StringType),
("comment", StringType)))
QueryTest.checkAnswer(
descriptionDf,
Seq(
Row("data", "string", ""),
Row("id", "bigint", ""),
Row("", "", ""),
Row("# Partitioning", "", ""),
Row("Not partitioned", "", "")))
}
}

test("Describing a partition is not supported") {
withNamespaceAndTable("ns", "table") { tbl =>
spark.sql(s"CREATE TABLE $tbl (id bigint, data string) $defaultUsing " +
Expand All @@ -59,6 +39,23 @@ class DescribeTableSuite extends command.DescribeTableSuiteBase with CommandSuit
}
}

test("DESCRIBE TABLE of a partitioned table by nested columns") {
withNamespaceAndTable("ns", "table") { tbl =>
sql(s"CREATE TABLE $tbl (s struct<id:INT, a:BIGINT>, data string) " +
s"$defaultUsing PARTITIONED BY (s.id, s.a)")
val descriptionDf = sql(s"DESCRIBE TABLE $tbl")
QueryTest.checkAnswer(
descriptionDf.filter("col_name != 'Created Time'"),
Seq(
Row("data", "string", null),
Row("s", "struct<id:int,a:bigint>", null),
Row("# Partition Information", "", ""),
Row("# col_name", "data_type", "comment"),
Row("s.id", "int", null),
Row("s.a", "bigint", null)))
}
}

test("DESCRIBE TABLE EXTENDED of a partitioned table") {
withNamespaceAndTable("ns", "table") { tbl =>
spark.sql(s"CREATE TABLE $tbl (id bigint, data string) $defaultUsing" +
Expand All @@ -74,18 +71,19 @@ class DescribeTableSuite extends command.DescribeTableSuiteBase with CommandSuit
QueryTest.checkAnswer(
descriptionDf,
Seq(
Row("id", "bigint", ""),
Row("data", "string", ""),
Row("", "", ""),
Row("# Partitioning", "", ""),
Row("Part 0", "id", ""),
Row("id", "bigint", null),
Row("data", "string", null),
Row("# Partition Information", "", ""),
Row("# col_name", "data_type", "comment"),
Row("id", "bigint", null),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just for curiosity: what's different between v1 and v2 DESC TABLE for this test DESCRIBE TABLE EXTENDED of a partitioned table?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

v2 (after the PR):

+----------------------------+----------------------------+---------------------------------------------------+
|col_name                    |data_type                   |comment                                            |
+----------------------------+----------------------------+---------------------------------------------------+
|id                          |bigint                      |null                                               |
|data                        |string                      |null                                               |
|# Partition Information     |                            |                                                   |
|# col_name                  |data_type                   |comment                                            |
|id                          |bigint                      |null                                               |
|                            |                            |                                                   |
|# Metadata Columns          |                            |                                                   |
|index                       |int                         |Metadata column used to conflict with a data column|
|_partition                  |string                      |Partition key used to store the row                |
|                            |                            |                                                   |
|# Detailed Table Information|                            |                                                   |
|Name                        |test_catalog.ns.table       |                                                   |
|Comment                     |this is a test table        |                                                   |
|Location                    |file:/tmp/testcat/table_name|                                                   |
|Provider                    |_                           |                                                   |
|Owner                       |maximgekk                   |                                                   |
|Table Properties            |[bar=baz]                   |                                                   |
+----------------------------+----------------------------+---------------------------------------------------+

v1 in memory:

+----------------------------+----------------------------+-------+
|col_name                    |data_type                   |comment|
+----------------------------+----------------------------+-------+
|data                        |string                      |null   |
|id                          |bigint                      |null   |
|# Partition Information     |                            |       |
|# col_name                  |data_type                   |comment|
|id                          |bigint                      |null   |
|                            |                            |       |
|# Detailed Table Information|                            |       |
|Database                    |ns                          |       |
|Table                       |table                       |       |
|Created Time                |Wed Jun 22 09:37:48 PDT 2022|       |
|Last Access                 |UNKNOWN                     |       |
|Created By                  |Spark 3.4.0-SNAPSHOT        |       |
|Type                        |EXTERNAL                    |       |
|Provider                    |parquet                     |       |
|Comment                     |this is a test table        |       |
|Table Properties            |[bar=baz]                   |       |
|Location                    |file:/tmp/testcat/table_name|       |
|Partition Provider          |Catalog                     |       |
+----------------------------+----------------------------+-------+

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

v1 (hive):

+----------------------------+----------------------------------------------------------+-------+
|col_name                    |data_type                                                 |comment|
+----------------------------+----------------------------------------------------------+-------+
|data                        |string                                                    |null   |
|id                          |bigint                                                    |null   |
|# Partition Information     |                                                          |       |
|# col_name                  |data_type                                                 |comment|
|id                          |bigint                                                    |null   |
|                            |                                                          |       |
|# Detailed Table Information|                                                          |       |
|Database                    |ns                                                        |       |
|Table                       |table                                                     |       |
|Owner                       |maximgekk                                                 |       |
|Created Time                |Wed Jun 22 09:39:42 PDT 2022                              |       |
|Last Access                 |UNKNOWN                                                   |       |
|Created By                  |Spark 3.4.0-SNAPSHOT                                      |       |
|Type                        |EXTERNAL                                                  |       |
|Provider                    |hive                                                      |       |
|Comment                     |this is a test table                                      |       |
|Table Properties            |[transient_lastDdlTime=1655915982]                        |       |
|Location                    |file:/tmp/testcat/table_name                              |       |
|Serde Library               |org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe        |       |
|InputFormat                 |org.apache.hadoop.mapred.TextInputFormat                  |       |
|OutputFormat                |org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat|       |
|Storage Properties          |[serialization.format=1]                                  |       |
|Partition Provider          |Catalog                                                   |       |
+----------------------------+----------------------------------------------------------+-------+

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see, can we at least include Table Type in v2 command? It's simply checking if the table has a reserved EXTERNAL table property.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

Row("", "", ""),
Row("# Metadata Columns", "", ""),
Row("index", "int", "Metadata column used to conflict with a data column"),
Row("_partition", "string", "Partition key used to store the row"),
Row("", "", ""),
Row("# Detailed Table Information", "", ""),
Row("Name", tbl, ""),
Row("Type", "MANAGED", ""),
Row("Comment", "this is a test table", ""),
Row("Location", "file:/tmp/testcat/table_name", ""),
Row("Provider", "_", ""),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ class DescribeTableSuite extends v1.DescribeTableSuiteBase with CommandSuiteBase
Row("Last Access", "UNKNOWN", ""),
Row("Created By", "Spark 3.4.0-SNAPSHOT", ""),
Row("Type", "EXTERNAL", ""),
Row("Provider", "hive", ""),
Row("Provider", getProvider(), ""),
Row("Comment", "this is a test table", ""),
Row("Location", "file:/tmp/testcat/table_name", ""),
Row("Serde Library", "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", ""),
Expand Down