-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-14127][SQL] Native "DESC [EXTENDED | FORMATTED] <table>" DDL command #12844
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
89e0aea
718da25
9194fe1
a66885a
18b9bb5
b5dbc15
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -240,10 +240,13 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder { | |
| override def visitDescribeTable(ctx: DescribeTableContext): LogicalPlan = withOrigin(ctx) { | ||
| // FORMATTED and columns are not supported. Return null and let the parser decide what to do | ||
| // with this (create an exception or pass it on to a different system). | ||
| if (ctx.describeColName != null || ctx.FORMATTED != null || ctx.partitionSpec != null) { | ||
| if (ctx.describeColName != null || ctx.partitionSpec != null) { | ||
| null | ||
| } else { | ||
| DescribeTableCommand(visitTableIdentifier(ctx.tableIdentifier), ctx.EXTENDED != null) | ||
| DescribeTableCommand( | ||
| visitTableIdentifier(ctx.tableIdentifier), | ||
| ctx.EXTENDED != null, | ||
| ctx.FORMATTED() != null) | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -867,6 +870,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder { | |
| // Note: Keep this unspecified because we use the presence of the serde to decide | ||
| // whether to convert a table created by CTAS to a datasource table. | ||
| serde = None, | ||
| compressed = false, | ||
| serdeProperties = Map()) | ||
| } | ||
| val fileStorage = Option(ctx.createFileFormat).map(visitCreateFileFormat) | ||
|
|
@@ -878,6 +882,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder { | |
| inputFormat = fileStorage.inputFormat.orElse(defaultStorage.inputFormat), | ||
| outputFormat = fileStorage.outputFormat.orElse(defaultStorage.outputFormat), | ||
| serde = rowStorage.serde.orElse(fileStorage.serde).orElse(defaultStorage.serde), | ||
| compressed = false, | ||
| serdeProperties = rowStorage.serdeProperties ++ fileStorage.serdeProperties) | ||
|
|
||
| // TODO support the sql text - have a proper location for this! | ||
|
|
@@ -931,7 +936,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder { | |
| } | ||
|
|
||
| /** Empty storage format for default values and copies. */ | ||
| private val EmptyStorageFormat = CatalogStorageFormat(None, None, None, None, Map.empty) | ||
| private val EmptyStorageFormat = CatalogStorageFormat(None, None, None, None, false, Map.empty) | ||
|
|
||
| /** | ||
| * Create a [[CatalogStorageFormat]]. | ||
|
|
@@ -1012,6 +1017,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder { | |
| entry("field.delim", ctx.fieldsTerminatedBy) ++ | ||
| entry("serialization.format", ctx.fieldsTerminatedBy) ++ | ||
| entry("escape.delim", ctx.escapedBy) ++ | ||
| // The following typo is inherited from Hive... | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1 for this comment. Totally got me the last time. |
||
| entry("colelction.delim", ctx.collectionItemsTerminatedBy) ++ | ||
| entry("mapkey.delim", ctx.keysTerminatedBy) ++ | ||
| Option(ctx.linesSeparatedBy).toSeq.map { token => | ||
|
|
@@ -1151,7 +1157,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder { | |
|
|
||
| case c: RowFormatSerdeContext => | ||
| // Use a serde format. | ||
| val CatalogStorageFormat(None, None, None, Some(name), props) = visitRowFormatSerde(c) | ||
| val CatalogStorageFormat(None, None, None, Some(name), _, props) = visitRowFormatSerde(c) | ||
|
|
||
| // SPARK-10310: Special cases LazySimpleSerDe | ||
| val recordHandler = if (name == "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe") { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,16 +19,17 @@ package org.apache.spark.sql.execution.command | |
|
|
||
| import java.io.File | ||
| import java.net.URI | ||
| import java.util.Date | ||
|
|
||
| import scala.collection.mutable.ArrayBuffer | ||
|
|
||
| import org.apache.spark.sql.{AnalysisException, Row, SparkSession} | ||
| import org.apache.spark.sql.catalyst.TableIdentifier | ||
| import org.apache.spark.sql.catalyst.catalog.{CatalogRelation, CatalogTable, CatalogTableType} | ||
| import org.apache.spark.sql.catalyst.catalog.{CatalogColumn, CatalogRelation, CatalogTable, CatalogTableType} | ||
| import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec | ||
| import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} | ||
| import org.apache.spark.sql.catalyst.plans.logical.{Command, LogicalPlan, UnaryNode} | ||
| import org.apache.spark.sql.types.{BooleanType, MetadataBuilder, StringType} | ||
| import org.apache.spark.sql.types.{BooleanType, MetadataBuilder, StringType, StructType} | ||
| import org.apache.spark.util.Utils | ||
|
|
||
| case class CreateTableAsSelectLogicalPlan( | ||
|
|
@@ -270,10 +271,10 @@ case class LoadData( | |
| /** | ||
| * Command that looks like | ||
| * {{{ | ||
| * DESCRIBE (EXTENDED) table_name; | ||
| * DESCRIBE [EXTENDED|FORMATTED] table_name; | ||
| * }}} | ||
| */ | ||
| case class DescribeTableCommand(table: TableIdentifier, isExtended: Boolean) | ||
| case class DescribeTableCommand(table: TableIdentifier, isExtended: Boolean, isFormatted: Boolean) | ||
| extends RunnableCommand { | ||
|
|
||
| override val output: Seq[Attribute] = Seq( | ||
|
|
@@ -290,29 +291,92 @@ case class DescribeTableCommand(table: TableIdentifier, isExtended: Boolean) | |
| val result = new ArrayBuffer[Row] | ||
| sparkSession.sessionState.catalog.lookupRelation(table) match { | ||
| case catalogRelation: CatalogRelation => | ||
| catalogRelation.catalogTable.schema.foreach { column => | ||
| result += Row(column.name, column.dataType, column.comment.orNull) | ||
| } | ||
|
|
||
| if (catalogRelation.catalogTable.partitionColumns.nonEmpty) { | ||
| result += Row("# Partition Information", "", "") | ||
| result += Row(s"# ${output(0).name}", output(1).name, output(2).name) | ||
|
|
||
| catalogRelation.catalogTable.partitionColumns.foreach { col => | ||
| result += Row(col.name, col.dataType, col.comment.orNull) | ||
| } | ||
| if (isExtended) { | ||
| describeExtended(catalogRelation, result) | ||
| } else if (isFormatted) { | ||
| describeFormatted(catalogRelation, result) | ||
| } else { | ||
| describe(catalogRelation, result) | ||
| } | ||
|
|
||
| case relation => | ||
| relation.schema.fields.foreach { field => | ||
| val comment = | ||
| if (field.metadata.contains("comment")) field.metadata.getString("comment") else "" | ||
| result += Row(field.name, field.dataType.simpleString, comment) | ||
| } | ||
| describeSchema(relation.schema, result) | ||
| } | ||
|
|
||
| result | ||
| } | ||
|
|
||
| // Shows data columns and partitioned columns (if any) | ||
| private def describe(relation: CatalogRelation, buffer: ArrayBuffer[Row]): Unit = { | ||
| describeSchema(relation.catalogTable.schema, buffer) | ||
|
|
||
| if (relation.catalogTable.partitionColumns.nonEmpty) { | ||
| append(buffer, "# Partition Information", "", "") | ||
| append(buffer, s"# ${output(0).name}", output(1).name, output(2).name) | ||
| describeSchema(relation.catalogTable.partitionColumns, buffer) | ||
| } | ||
| } | ||
|
|
||
| private def describeExtended(relation: CatalogRelation, buffer: ArrayBuffer[Row]): Unit = { | ||
| describe(relation, buffer) | ||
|
|
||
| append(buffer, "", "", "") | ||
| append(buffer, "# Detailed Table Information", relation.catalogTable.toString, "") | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @liancheng To improve the output of I checked what Hive did for the command
Basically, in the implementation of |
||
| } | ||
|
|
||
| private def describeFormatted(relation: CatalogRelation, buffer: ArrayBuffer[Row]): Unit = { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We could also use some sort of an
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Makes sense, actually there were several times that I forgot to add the trailing empty string(s) while working on this PR. Thanks! |
||
| describe(relation, buffer) | ||
|
|
||
| val table = relation.catalogTable | ||
|
|
||
| append(buffer, "", "", "") | ||
| append(buffer, "# Detailed Table Information", "", "") | ||
| append(buffer, "Database:", table.database, "") | ||
| append(buffer, "Owner:", table.owner, "") | ||
| append(buffer, "Create Time:", new Date(table.createTime).toString, "") | ||
| append(buffer, "Last Access Time:", new Date(table.lastAccessTime).toString, "") | ||
| append(buffer, "Location:", table.storage.locationUri.getOrElse(""), "") | ||
| append(buffer, "Table Type:", table.tableType.name, "") | ||
|
|
||
| append(buffer, "Table Parameters:", "", "") | ||
| table.properties.foreach { case (key, value) => | ||
| append(buffer, s" $key", value, "") | ||
| } | ||
|
|
||
| append(buffer, "", "", "") | ||
| append(buffer, "# Storage Information", "", "") | ||
| table.storage.serde.foreach(serdeLib => append(buffer, "SerDe Library:", serdeLib, "")) | ||
| table.storage.inputFormat.foreach(format => append(buffer, "InputFormat:", format, "")) | ||
| table.storage.outputFormat.foreach(format => append(buffer, "OutputFormat:", format, "")) | ||
| append(buffer, "Compressed:", if (table.storage.compressed) "Yes" else "No", "") | ||
| append(buffer, "Num Buckets:", table.numBuckets.toString, "") | ||
| append(buffer, "Bucket Columns:", table.bucketColumnNames.mkString("[", ", ", "]"), "") | ||
| append(buffer, "Sort Columns:", table.sortColumnNames.mkString("[", ", ", "]"), "") | ||
|
|
||
| append(buffer, "Storage Desc Parameters:", "", "") | ||
| table.storage.serdeProperties.foreach { case (key, value) => | ||
| append(buffer, s" $key", value, "") | ||
| } | ||
| } | ||
|
|
||
| private def describeSchema(schema: StructType, buffer: ArrayBuffer[Row]): Unit = { | ||
| schema.foreach { column => | ||
| val comment = | ||
| if (column.metadata.contains("comment")) column.metadata.getString("comment") else "" | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just an idea: So 9/10 times I use
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the metadata is heavily used in ML code. Another thing is that the data type API is already public for a long time. We probably don't want to change it unless there are particular good reasons.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not saying that we should (re)move it. I am only suggesting that it might be easier if we had such an accessor in StructField. |
||
| append(buffer, column.name, column.dataType.simpleString, comment) | ||
| } | ||
| } | ||
|
|
||
| private def describeSchema(schema: Seq[CatalogColumn], buffer: ArrayBuffer[Row]): Unit = { | ||
| schema.foreach { column => | ||
| append(buffer, column.name, column.dataType.toLowerCase, column.comment.orNull) | ||
| } | ||
| } | ||
|
|
||
| private def append( | ||
| buffer: ArrayBuffer[Row], column: String, dataType: String, comment: String): Unit = { | ||
| buffer += Row(column, dataType, comment) | ||
| } | ||
| } | ||
|
|
||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this ever true? If it isn't we could leave it out.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nvm. Hive can pass compressed tables.