-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-2177][SQL] describe table result contains only one column #1118
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
bb8bbef
725e88c
342fdf7
74bd1d4
366f891
83adb2f
f1a417e
440c5af
9787fff
8003cf3
6387217
656b068
e7c4e72
b9b9aa5
fd2534c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -60,3 +60,16 @@ case class ExplainCommand(plan: LogicalPlan) extends Command { | |
| * Returned for the "CACHE TABLE tableName" and "UNCACHE TABLE tableName" command. | ||
| */ | ||
| case class CacheCommand(tableName: String, doCache: Boolean) extends Command | ||
|
|
||
| /** | ||
| * Returned for the "Describe tableName" command. | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. to be consistent either lowercase D, or uppercase the whole DESCRIBE |
||
| */ | ||
| case class DescribeCommand( | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. would be great to explain isFormatted / isExtended in @param. |
||
| table: LogicalPlan, | ||
| isFormatted: Boolean, | ||
| isExtended: Boolean) extends Command { | ||
| override def output = Seq( | ||
| BoundReference(0, AttributeReference("name", StringType, nullable = false)()), | ||
| BoundReference(1, AttributeReference("type", StringType, nullable = false)()), | ||
| BoundReference(2, AttributeReference("comment", StringType, nullable = false)())) | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -52,7 +52,6 @@ private[hive] case class AddFile(filePath: String) extends Command | |
| private[hive] object HiveQl { | ||
| protected val nativeCommands = Seq( | ||
| "TOK_DESCFUNCTION", | ||
| "TOK_DESCTABLE", | ||
| "TOK_DESCDATABASE", | ||
| "TOK_SHOW_TABLESTATUS", | ||
| "TOK_SHOWDATABASES", | ||
|
|
@@ -120,6 +119,12 @@ private[hive] object HiveQl { | |
| "TOK_SWITCHDATABASE" | ||
| ) | ||
|
|
||
| // Commands that we do not need to explain. | ||
| protected val noExplainCommands = Seq( | ||
| "TOK_CREATETABLE", | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. does ctas fall in here?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. noExplainCommands is for those commands which we do not need to explain. For example, we will basically do nothing for "EXPLAIN CTAS". A regular CTAS query will not fall in here. |
||
| "TOK_DESCTABLE" | ||
| ) ++ nativeCommands | ||
|
|
||
| /** | ||
| * A set of implicit transformations that allow Hive ASTNodes to be rewritten by transformations | ||
| * similar to [[catalyst.trees.TreeNode]]. | ||
|
|
@@ -362,13 +367,19 @@ private[hive] object HiveQl { | |
| } | ||
| } | ||
|
|
||
| protected def extractDbNameTableName(tableNameParts: Node): (Option[String], String) = { | ||
| val (db, tableName) = | ||
| tableNameParts.getChildren.map{ case Token(part, Nil) => cleanIdentifier(part)} match { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. space after map, and before the closing } |
||
| case Seq(tableOnly) => (None, tableOnly) | ||
| case Seq(databaseName, table) => (Some(databaseName), table) | ||
| } | ||
|
|
||
| (db, tableName) | ||
| } | ||
|
|
||
| protected def nodeToPlan(node: Node): LogicalPlan = node match { | ||
| // Just fake explain for any of the native commands. | ||
| case Token("TOK_EXPLAIN", explainArgs) if nativeCommands contains explainArgs.head.getText => | ||
| ExplainCommand(NoRelation) | ||
| // Create tables aren't native commands due to CTAS queries, but we still don't need to | ||
| // explain them. | ||
| case Token("TOK_EXPLAIN", explainArgs) if explainArgs.head.getText == "TOK_CREATETABLE" => | ||
| case Token("TOK_EXPLAIN", explainArgs) if noExplainCommands contains explainArgs.head.getText => | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. avoid infix contains here, i.e. noExplainCommands.contains(explainArgs.head.getText) |
||
| ExplainCommand(NoRelation) | ||
| case Token("TOK_EXPLAIN", explainArgs) => | ||
| // Ignore FORMATTED if present. | ||
|
|
@@ -377,6 +388,34 @@ private[hive] object HiveQl { | |
| // TODO: support EXTENDED? | ||
| ExplainCommand(nodeToPlan(query)) | ||
|
|
||
| case Token("TOK_DESCTABLE", describeArgs) => | ||
| // Reference: https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL | ||
| val Some(tableType) :: formatted :: extended :: _ :: Nil = | ||
| getClauses(Seq("TOK_TABTYPE", "FORMATTED", "EXTENDED", "PRETTY"), describeArgs) | ||
| // TODO: support PRETTY? | ||
| tableType match { | ||
| case Token("TOK_TABTYPE", nameParts) if nameParts.size == 1 => { | ||
| nameParts.head match { | ||
| case Token(".", dbName :: tableName :: Nil) => | ||
| // It is describing a table with the format like "describe db.table". | ||
| val (db, tableName) = extractDbNameTableName(nameParts.head) | ||
| DescribeCommand( | ||
| UnresolvedRelation(db, tableName, None), formatted.isDefined, extended.isDefined) | ||
| case Token(".", dbName :: tableName :: colName :: Nil) => | ||
| // It is describing a column with the format like "describe db.table column". | ||
| NativePlaceholder | ||
| case tableName => | ||
| // It is describing a table with the format like "describe table". | ||
| DescribeCommand( | ||
| UnresolvedRelation(None, tableName.getText, None), | ||
| formatted.isDefined, | ||
| extended.isDefined) | ||
| } | ||
| } | ||
| // All other cases. | ||
| case _ => NativePlaceholder | ||
| } | ||
|
|
||
| case Token("TOK_CREATETABLE", children) | ||
| if children.collect { case t@Token("TOK_QUERY", _) => t }.nonEmpty => | ||
| // TODO: Parse other clauses. | ||
|
|
@@ -414,11 +453,8 @@ private[hive] object HiveQl { | |
| s"Unhandled clauses: ${notImplemented.flatten.map(dumpTree(_)).mkString("\n")}") | ||
| } | ||
|
|
||
| val (db, tableName) = | ||
| tableNameParts.getChildren.map{ case Token(part, Nil) => cleanIdentifier(part)} match { | ||
| case Seq(tableOnly) => (None, tableOnly) | ||
| case Seq(databaseName, table) => (Some(databaseName), table) | ||
| } | ||
| val (db, tableName) = extractDbNameTableName(tableNameParts) | ||
|
|
||
| InsertIntoCreatedTable(db, tableName, nodeToPlan(query)) | ||
|
|
||
| // If its not a "CREATE TABLE AS" like above then just pass it back to hive as a native command. | ||
|
|
@@ -736,11 +772,7 @@ private[hive] object HiveQl { | |
| val Some(tableNameParts) :: partitionClause :: Nil = | ||
| getClauses(Seq("TOK_TABNAME", "TOK_PARTSPEC"), tableArgs) | ||
|
|
||
| val (db, tableName) = | ||
| tableNameParts.getChildren.map{ case Token(part, Nil) => cleanIdentifier(part)} match { | ||
| case Seq(tableOnly) => (None, tableOnly) | ||
| case Seq(databaseName, table) => (Some(databaseName), table) | ||
| } | ||
| val (db, tableName) = extractDbNameTableName(tableNameParts) | ||
|
|
||
| val partitionKeys = partitionClause.map(_.getChildren.map { | ||
| // Parse partitions. We also make keys case insensitive. | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,7 +17,7 @@ | |
|
|
||
| package org.apache.spark.sql.hive | ||
|
|
||
| import org.apache.spark.sql.SQLContext | ||
| import org.apache.spark.sql.{SQLContext} | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no need to change this |
||
| import org.apache.spark.sql.catalyst.expressions._ | ||
| import org.apache.spark.sql.catalyst.planning._ | ||
| import org.apache.spark.sql.catalyst.plans._ | ||
|
|
@@ -81,6 +81,20 @@ private[hive] trait HiveStrategies { | |
| def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match { | ||
| case logical.NativeCommand(sql) => | ||
| NativeCommand(sql, plan.output)(context) :: Nil | ||
| case describe: logical.DescribeCommand => { | ||
| val resolvedTable = context.executePlan(describe.table).analyzed | ||
| resolvedTable match { | ||
| case t: MetastoreRelation => | ||
| Seq(DescribeHiveTableCommand( | ||
| t, describe.output, describe.isFormatted, describe.isExtended)(context)) | ||
| case o: LogicalPlan => | ||
| if (describe.isFormatted) | ||
|
||
| logger.info("Formatted is ignored because it is not defined for non-Hive tables.") | ||
| if (describe.isExtended) | ||
| logger.info("Extended is ignored because it is not defined for non-Hive tables.") | ||
| Seq(DescribeCommand(planLater(o), describe.output)(context)) | ||
| } | ||
| } | ||
| case _ => Nil | ||
| } | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,8 +19,10 @@ package org.apache.spark.sql.hive.execution | |
|
|
||
| import org.apache.hadoop.hive.common.`type`.{HiveDecimal, HiveVarchar} | ||
| import org.apache.hadoop.hive.conf.HiveConf | ||
| import org.apache.hadoop.hive.metastore.api.FieldSchema | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. api should go after MetaStoreUtils since api is a package |
||
| import org.apache.hadoop.hive.metastore.MetaStoreUtils | ||
| import org.apache.hadoop.hive.ql.Context | ||
| import org.apache.hadoop.hive.ql.metadata.formatting.MetaDataFormatUtils | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this one also should go after the next line |
||
| import org.apache.hadoop.hive.ql.metadata.{Partition => HivePartition, Hive} | ||
| import org.apache.hadoop.hive.ql.plan.{TableDesc, FileSinkDesc} | ||
| import org.apache.hadoop.hive.serde.serdeConstants | ||
|
|
@@ -452,3 +454,48 @@ case class NativeCommand( | |
|
|
||
| override def otherCopyArgs = context :: Nil | ||
| } | ||
|
|
||
| /** | ||
| * :: DeveloperApi :: | ||
| */ | ||
| @DeveloperApi | ||
| case class DescribeHiveTableCommand( | ||
| table: MetastoreRelation, | ||
| output: Seq[Attribute], | ||
| isFormatted: Boolean, | ||
| isExtended: Boolean)( | ||
| @transient context: HiveContext) | ||
| extends LeafNode with Command { | ||
|
|
||
| override protected[sql] lazy val sideEffectResult: Seq[(String, String, String)] = { | ||
| val cols: Seq[FieldSchema] = table.hiveQlTable.getCols | ||
| val parCols: Seq[FieldSchema] = table.hiveQlTable.getPartCols | ||
| val columnInfo = cols.map(field => (field.getName, field.getType, field.getComment)) | ||
| val partColumnInfo = parCols.map(field => (field.getName, field.getType, field.getComment)) | ||
|
|
||
| val formattedPart = if (isFormatted) { | ||
| (MetaDataFormatUtils.getTableInformation(table.hiveQlTable), null, null) :: Nil | ||
| } else { | ||
| Nil | ||
| } | ||
|
|
||
| val extendedPart = if (isExtended) { | ||
| ("Detailed Table Information", table.hiveQlTable.getTTable.toString, null) :: Nil | ||
| } else { | ||
| Nil | ||
| } | ||
|
|
||
| // Trying to mimic the format of Hive's output. But not 100% the same. | ||
| columnInfo ++ partColumnInfo ++ Seq(("# Partition Information", null, null)) ++ | ||
| partColumnInfo ++ formattedPart ++ extendedPart | ||
| } | ||
|
|
||
| override def execute(): RDD[Row] = { | ||
| val rows = sideEffectResult.map { | ||
| case (name, dataType, comment) => new GenericRow(Array[Any](name, dataType, comment)) | ||
| } | ||
| context.sparkContext.parallelize(rows, 1) | ||
| } | ||
|
|
||
| override def otherCopyArgs = context :: Nil | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -234,13 +234,6 @@ class HiveQuerySuite extends HiveComparisonTest { | |
| .map(_.getString(0)) | ||
| .contains(tableName)) | ||
|
|
||
| assertResult(Array(Array("key", "int", "None"), Array("value", "string", "None"))) { | ||
| hql(s"DESCRIBE $tableName") | ||
| .select('result) | ||
| .collect() | ||
| .map(_.getString(0).split("\t").map(_.trim)) | ||
| } | ||
|
|
||
| assert(isExplanation(hql(s"EXPLAIN SELECT key, COUNT(*) FROM $tableName GROUP BY key"))) | ||
|
|
||
| TestHive.reset() | ||
|
|
@@ -257,6 +250,88 @@ class HiveQuerySuite extends HiveComparisonTest { | |
| assert(Try(q0.count()).isSuccess) | ||
| } | ||
|
|
||
| test("Describe commands") { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. to be consistent either lowercase D, or uppercase the whole DESCRIBE |
||
| hql(s"CREATE TABLE test_describe_commands (key INT, value STRING) PARTITIONED BY (dt STRING)") | ||
|
|
||
| hql( | ||
| """FROM src INSERT OVERWRITE TABLE test_describe_commands PARTITION (dt='2008-06-08') | ||
| |SELECT key, value | ||
| """.stripMargin) | ||
|
|
||
| // Describe a table | ||
| assertResult( | ||
| Array( | ||
| Array("key", "int", null), | ||
| Array("value", "string", null), | ||
| Array("dt", "string", null), | ||
| Array("# Partition Information", null, null), | ||
| Array("dt", "string", null)) | ||
| ) { | ||
| hql("DESCRIBE test_describe_commands") | ||
| .select('name, 'type, 'comment) | ||
| .collect() | ||
| } | ||
|
|
||
| // Describe a table with keyword FORMATTED | ||
| // We only | ||
| assertResult(6) { | ||
| hql("DESCRIBE FORMATTED test_describe_commands").count() | ||
| } | ||
|
|
||
| // Describe a table | ||
| assertResult(6) { | ||
| hql("DESCRIBE EXTENDED test_describe_commands").count() | ||
| } | ||
|
|
||
| // Describe a table with a fully qualified table name | ||
| assertResult( | ||
| Array( | ||
| Array("key", "int", null), | ||
| Array("value", "string", null), | ||
| Array("dt", "string", null), | ||
| Array("# Partition Information", null, null), | ||
| Array("dt", "string", null)) | ||
| ) { | ||
| hql("DESCRIBE default.test_describe_commands") | ||
| .select('name, 'type, 'comment) | ||
| .collect() | ||
| } | ||
|
|
||
| // Describe a column is a native command | ||
| assertResult(Array(Array("value", "string", "from deserializer"))) { | ||
| hql("DESCRIBE test_describe_commands value") | ||
| .select('result) | ||
| .collect() | ||
| .map(_.getString(0).split("\t").map(_.trim)) | ||
| } | ||
|
|
||
| // Describe a column is a native command | ||
| assertResult(Array(Array("value", "string", "from deserializer"))) { | ||
| hql("DESCRIBE default.test_describe_commands value") | ||
| .select('result) | ||
| .collect() | ||
| .map(_.getString(0).split("\t").map(_.trim)) | ||
| } | ||
|
|
||
| // Describe a partition is a native command | ||
| assertResult( | ||
| Array( | ||
| Array("key", "int", "None"), | ||
| Array("value", "string", "None"), | ||
| Array("dt", "string", "None"), | ||
| Array("", "", ""), | ||
| Array("# Partition Information", "", ""), | ||
| Array("# col_name", "data_type", "comment"), | ||
| Array("", "", ""), | ||
| Array("dt", "string", "None")) | ||
| ) { | ||
| hql("DESCRIBE test_describe_commands PARTITION (dt='2008-06-08')") | ||
| .select('result) | ||
| .collect() | ||
| .map(_.getString(0).split("\t").map(_.trim)) | ||
| } | ||
| } | ||
|
|
||
| test("parse HQL set commands") { | ||
| // Adapted from its SQL counterpart. | ||
| val testKey = "spark.sql.key.usedfortestonly" | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
remove this block