Skip to content
Closed
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import scala.collection.mutable.ArrayBuffer
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.Attribute
import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SupportsMetadataColumns, Table}
import org.apache.spark.sql.connector.expressions.IdentityTransform

case class DescribeTableExec(
output: Seq[Attribute],
Expand Down Expand Up @@ -62,7 +63,7 @@ case class DescribeTableExec(
private def addSchema(rows: ArrayBuffer[InternalRow]): Unit = {
rows ++= table.schema.map{ column =>
toCatalystRow(
column.name, column.dataType.simpleString, column.getComment().getOrElse(""))
column.name, column.dataType.simpleString, column.getComment().orNull)
}
}

Expand All @@ -80,13 +81,25 @@ case class DescribeTableExec(
}

private def addPartitioning(rows: ArrayBuffer[InternalRow]): Unit = {
rows += emptyRow()
rows += toCatalystRow("# Partitioning", "", "")
if (table.partitioning.isEmpty) {
rows += toCatalystRow("Not partitioned", "", "")
} else {
rows ++= table.partitioning.zipWithIndex.map {
case (transform, index) => toCatalystRow(s"Part $index", transform.describe(), "")
if (table.partitioning.nonEmpty) {
val partitionColumnsOnly = table.partitioning.forall(t => t.isInstanceOf[IdentityTransform])
if (partitionColumnsOnly) {
rows += toCatalystRow("# Partition Information", "", "")
rows += toCatalystRow(s"# ${output(0).name}", output(1).name, output(2).name)
val nameToField = table.schema.map(f => (f.name, f)).toMap
rows ++= table.partitioning
.map(_.asInstanceOf[IdentityTransform])
.flatMap(_.ref.fieldNames())
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's more tricky here, as the reference can be a nested field, e.g. a.b. We can still keep the output in a v1 compatible way, but the code to find its type and comment will be a bit more complicated, as we need to use StructType.findNestedField

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added a test for v2 implementation: partitioning by nested columns. Just in case, v1 doesn't support partitioning by nested columns. Also I fixed v2 impl to pass the new test.

.map { name =>
val field = nameToField(name)
toCatalystRow(name, field.dataType.simpleString, field.getComment().orNull)
}
} else {
rows += emptyRow()
rows += toCatalystRow("# Partitioning", "", "")
rows ++= table.partitioning.zipWithIndex.map {
case (transform, index) => toCatalystRow(s"Part $index", transform.describe(), "")
}
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

package org.apache.spark.sql.execution.command

import org.apache.spark.sql.{AnalysisException, QueryTest}
import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
import org.apache.spark.sql.types.{BooleanType, MetadataBuilder, StringType, StructType}

/**
Expand All @@ -44,6 +44,42 @@ trait DescribeTableSuiteBase extends QueryTest with DDLCommandTestUtils {
}
}

test("DESCRIBE TABLE of a non-partitioned table") {
withNamespaceAndTable("ns", "table") { tbl =>
spark.sql(s"CREATE TABLE $tbl (id bigint, data string) $defaultUsing")
val descriptionDf = spark.sql(s"DESCRIBE TABLE $tbl")
assert(descriptionDf.schema.map(field => (field.name, field.dataType)) ===
Seq(
("col_name", StringType),
("data_type", StringType),
("comment", StringType)))
QueryTest.checkAnswer(
descriptionDf,
Seq(
Row("data", "string", null),
Row("id", "bigint", null)))
}
}

test("DESCRIBE TABLE of a partitioned table") {
withNamespaceAndTable("ns", "table") { tbl =>
spark.sql(s"CREATE TABLE $tbl (id bigint, data string) $defaultUsing PARTITIONED BY (id)")
val descriptionDf = spark.sql(s"DESCRIBE TABLE $tbl")
assert(descriptionDf.schema.map(field => (field.name, field.dataType)) === Seq(
("col_name", StringType),
("data_type", StringType),
("comment", StringType)))
QueryTest.checkAnswer(
descriptionDf.filter("col_name != 'Created Time'"),
Seq(
Row("data", "string", null),
Row("id", "bigint", null),
Row("# Partition Information", "", ""),
Row("# col_name", "data_type", "comment"),
Row("id", "bigint", null)))
}
}

test("SPARK-34561: drop/add columns to a dataset of `DESCRIBE TABLE`") {
withNamespaceAndTable("ns", "table") { tbl =>
sql(s"CREATE TABLE $tbl (c0 INT) $defaultUsing")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

package org.apache.spark.sql.execution.command.v1

import java.util.Locale

import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
import org.apache.spark.sql.execution.command
import org.apache.spark.sql.types.StringType
Expand All @@ -33,24 +35,9 @@ import org.apache.spark.sql.types.StringType
trait DescribeTableSuiteBase extends command.DescribeTableSuiteBase
with command.TestsV1AndV2Commands {

test("DESCRIBE TABLE with non-'partitioned-by' clause") {
withNamespaceAndTable("ns", "table") { tbl =>
spark.sql(s"CREATE TABLE $tbl (id bigint, data string) $defaultUsing")
val descriptionDf = spark.sql(s"DESCRIBE TABLE $tbl")
assert(descriptionDf.schema.map(field => (field.name, field.dataType)) ===
Seq(
("col_name", StringType),
("data_type", StringType),
("comment", StringType)))
QueryTest.checkAnswer(
descriptionDf,
Seq(
Row("data", "string", null),
Row("id", "bigint", null)))
}
}
def getProvider(): String = defaultUsing.stripPrefix("USING").trim.toLowerCase(Locale.ROOT)

test("Describing a partition is not supported") {
test("Describing of a non-existent partition") {
withNamespaceAndTable("ns", "table") { tbl =>
spark.sql(s"CREATE TABLE $tbl (id bigint, data string) $defaultUsing " +
"PARTITIONED BY (id)")
Expand Down Expand Up @@ -96,7 +83,7 @@ class DescribeTableSuite extends DescribeTableSuiteBase with CommandSuiteBase {
Row("Last Access", "UNKNOWN", ""),
Row("Created By", "Spark 3.4.0-SNAPSHOT", ""),
Row("Type", "EXTERNAL", ""),
Row("Provider", "parquet", ""),
Row("Provider", getProvider(), ""),
Row("Comment", "this is a test table", ""),
Row("Table Properties", "[bar=baz]", ""),
Row("Location", "file:/tmp/testcat/table_name", ""),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,26 +28,6 @@ import org.apache.spark.util.Utils
*/
class DescribeTableSuite extends command.DescribeTableSuiteBase with CommandSuiteBase {

test("DESCRIBE TABLE with non-'partitioned-by' clause") {
withNamespaceAndTable("ns", "table") { tbl =>
spark.sql(s"CREATE TABLE $tbl (id bigint, data string) $defaultUsing")
val descriptionDf = spark.sql(s"DESCRIBE TABLE $tbl")
assert(descriptionDf.schema.map(field => (field.name, field.dataType)) ===
Seq(
("col_name", StringType),
("data_type", StringType),
("comment", StringType)))
QueryTest.checkAnswer(
descriptionDf,
Seq(
Row("data", "string", ""),
Row("id", "bigint", ""),
Row("", "", ""),
Row("# Partitioning", "", ""),
Row("Not partitioned", "", "")))
}
}

test("Describing a partition is not supported") {
withNamespaceAndTable("ns", "table") { tbl =>
spark.sql(s"CREATE TABLE $tbl (id bigint, data string) $defaultUsing " +
Expand All @@ -74,11 +54,11 @@ class DescribeTableSuite extends command.DescribeTableSuiteBase with CommandSuit
QueryTest.checkAnswer(
descriptionDf,
Seq(
Row("id", "bigint", ""),
Row("data", "string", ""),
Row("", "", ""),
Row("# Partitioning", "", ""),
Row("Part 0", "id", ""),
Row("id", "bigint", null),
Row("data", "string", null),
Row("# Partition Information", "", ""),
Row("# col_name", "data_type", "comment"),
Row("id", "bigint", null),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just for curiosity: what's different between v1 and v2 DESC TABLE for this test DESCRIBE TABLE EXTENDED of a partitioned table?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

v2 (after the PR):

+----------------------------+----------------------------+---------------------------------------------------+
|col_name                    |data_type                   |comment                                            |
+----------------------------+----------------------------+---------------------------------------------------+
|id                          |bigint                      |null                                               |
|data                        |string                      |null                                               |
|# Partition Information     |                            |                                                   |
|# col_name                  |data_type                   |comment                                            |
|id                          |bigint                      |null                                               |
|                            |                            |                                                   |
|# Metadata Columns          |                            |                                                   |
|index                       |int                         |Metadata column used to conflict with a data column|
|_partition                  |string                      |Partition key used to store the row                |
|                            |                            |                                                   |
|# Detailed Table Information|                            |                                                   |
|Name                        |test_catalog.ns.table       |                                                   |
|Comment                     |this is a test table        |                                                   |
|Location                    |file:/tmp/testcat/table_name|                                                   |
|Provider                    |_                           |                                                   |
|Owner                       |maximgekk                   |                                                   |
|Table Properties            |[bar=baz]                   |                                                   |
+----------------------------+----------------------------+---------------------------------------------------+

v1 in memory:

+----------------------------+----------------------------+-------+
|col_name                    |data_type                   |comment|
+----------------------------+----------------------------+-------+
|data                        |string                      |null   |
|id                          |bigint                      |null   |
|# Partition Information     |                            |       |
|# col_name                  |data_type                   |comment|
|id                          |bigint                      |null   |
|                            |                            |       |
|# Detailed Table Information|                            |       |
|Database                    |ns                          |       |
|Table                       |table                       |       |
|Created Time                |Wed Jun 22 09:37:48 PDT 2022|       |
|Last Access                 |UNKNOWN                     |       |
|Created By                  |Spark 3.4.0-SNAPSHOT        |       |
|Type                        |EXTERNAL                    |       |
|Provider                    |parquet                     |       |
|Comment                     |this is a test table        |       |
|Table Properties            |[bar=baz]                   |       |
|Location                    |file:/tmp/testcat/table_name|       |
|Partition Provider          |Catalog                     |       |
+----------------------------+----------------------------+-------+

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

v1 (hive):

+----------------------------+----------------------------------------------------------+-------+
|col_name                    |data_type                                                 |comment|
+----------------------------+----------------------------------------------------------+-------+
|data                        |string                                                    |null   |
|id                          |bigint                                                    |null   |
|# Partition Information     |                                                          |       |
|# col_name                  |data_type                                                 |comment|
|id                          |bigint                                                    |null   |
|                            |                                                          |       |
|# Detailed Table Information|                                                          |       |
|Database                    |ns                                                        |       |
|Table                       |table                                                     |       |
|Owner                       |maximgekk                                                 |       |
|Created Time                |Wed Jun 22 09:39:42 PDT 2022                              |       |
|Last Access                 |UNKNOWN                                                   |       |
|Created By                  |Spark 3.4.0-SNAPSHOT                                      |       |
|Type                        |EXTERNAL                                                  |       |
|Provider                    |hive                                                      |       |
|Comment                     |this is a test table                                      |       |
|Table Properties            |[transient_lastDdlTime=1655915982]                        |       |
|Location                    |file:/tmp/testcat/table_name                              |       |
|Serde Library               |org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe        |       |
|InputFormat                 |org.apache.hadoop.mapred.TextInputFormat                  |       |
|OutputFormat                |org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat|       |
|Storage Properties          |[serialization.format=1]                                  |       |
|Partition Provider          |Catalog                                                   |       |
+----------------------------+----------------------------------------------------------+-------+

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see, can we at least include Table Type in v2 command? It's simply checking if the table has a reserved EXTERNAL table property.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

Row("", "", ""),
Row("# Metadata Columns", "", ""),
Row("index", "int", "Metadata column used to conflict with a data column"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ class DescribeTableSuite extends v1.DescribeTableSuiteBase with CommandSuiteBase
Row("Last Access", "UNKNOWN", ""),
Row("Created By", "Spark 3.4.0-SNAPSHOT", ""),
Row("Type", "EXTERNAL", ""),
Row("Provider", "hive", ""),
Row("Provider", getProvider(), ""),
Row("Comment", "this is a test table", ""),
Row("Location", "file:/tmp/testcat/table_name", ""),
Row("Serde Library", "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", ""),
Expand Down