@@ -20,15 +20,17 @@ package org.apache.spark.sql.execution.command
2020import java .io .File
2121import java .net .URI
2222
23+ import scala .collection .mutable .ArrayBuffer
24+
2325import org .apache .spark .sql .{AnalysisException , Row , SparkSession }
2426import org .apache .spark .sql .catalyst .TableIdentifier
25- import org .apache .spark .sql .catalyst .catalog .{CatalogTable , CatalogTableType , ExternalCatalog }
26- import org .apache .spark .sql .catalyst .catalog .ExternalCatalog .TablePartitionSpec
27- import org .apache .spark .sql .catalyst .catalog .{CatalogRelation , CatalogTable , CatalogTableType }
27+ import org .apache .spark .sql .catalyst .catalog ._
2828import org .apache .spark .sql .catalyst .catalog .CatalogTypes .TablePartitionSpec
2929import org .apache .spark .sql .catalyst .expressions .{Attribute , AttributeReference }
30+ import org .apache .spark .sql .catalyst .parser .CatalystSqlParser
3031import org .apache .spark .sql .catalyst .plans .logical .{Command , LogicalPlan , UnaryNode }
31- import org .apache .spark .sql .types .{BooleanType , MetadataBuilder , StringType }
32+ import org .apache .spark .sql .execution .datasources .PartitioningUtils
33+ import org .apache .spark .sql .types ._
3234import org .apache .spark .util .Utils
3335
3436case class CreateTableAsSelectLogicalPlan (
@@ -274,6 +276,7 @@ case class LoadData(
274276 * {{{
275277 * DESCRIBE [EXTENDED|FORMATTED] [db_name.]table_name [column_name] [PARTITION partition_spec]
276278 * }}}
279+ * Note : FORMATTED option is not supported.
277280 * @param table table to be described.
278281 * @param partSpec spec If specified, the specified partition is described. It is effective only
279282 * when the table is a Hive table
@@ -289,7 +292,7 @@ case class DescribeTableCommand(
289292 isExtended : Boolean )
290293 extends RunnableCommand {
291294
292- override val output : Seq [Attribute ] = Seq (
295+ override val output : Seq [Attribute ] = Seq (
293296 // Column names are based on Hive.
294297 AttributeReference (" col_name" , StringType , nullable = false ,
295298 new MetadataBuilder ().putString(" comment" , " name of the column" ).build())(),
@@ -299,28 +302,161 @@ case class DescribeTableCommand(
299302 new MetadataBuilder ().putString(" comment" , " comment of the column" ).build())()
300303 )
301304
302- override def run (sparkSession : SparkSession ): Seq [Row ] = {
303- val catalog = sparkSession.sessionState.catalog
304- // Check to make sure supplied partition are valid partition columns. .
305- if (partSpec.isDefined && ! catalog.isTemporaryTable(table)) {
306- val tab = catalog.getTableMetadata(table)
307- val badColumns = partSpec.get.keySet.filterNot(tab.partitionColumns.map(_.name).contains)
308- if (badColumns.nonEmpty) {
309- throw new AnalysisException (
310- s " Non-partitioned column(s) [ ${badColumns.mkString(" , " )}] are " +
311- s " specified for DESCRIBE command " )
305+ private def formatColumns (cols : Seq [CatalogColumn ]): String = {
306+ cols.map { col =>
307+ s """
308+ | ${col.getClass.getSimpleName}
309+ |(name: ${col.name}
310+ |type: ${col.dataType}
311+ |comment: ${col.comment.orNull}
312+ """ .stripMargin
313+ }.mkString(" ," )
314+ }
315+
316+ private def formatProperties (props : Map [String , String ]): String = {
317+ props.map {
318+ case (k, v) => s " $k= $v"
319+ }.mkString(" {" , " , " , " }" )
320+ }
321+
322+ private def getPartValues (part : CatalogTablePartition , cols : Seq [String ]): String = {
323+ cols.map { name =>
324+ PartitioningUtils .escapePathName(part.spec(name))
325+ }.mkString(" , " )
326+ }
327+
328+ private def descColPath (table : CatalogTable , colPath : String ): Array [Row ] = {
329+ val names = colPath.split(" \\ ." );
330+ val lastName = names(names.length - 1 )
331+ val fields = table.schema.map {c =>
332+ StructField (c.name, CatalystSqlParser .parseDataType(c.dataType), c.nullable)
333+ }
334+ var dataType : DataType = StructType (fields)
335+ for (i <- 0 to names.length - 1 ) {
336+ dataType match {
337+ case s : StructType =>
338+ try {
339+ dataType = s.apply(names(i)).dataType
340+ } catch {
341+ case e : Exception =>
342+ throw new AnalysisException (s " Column name/path: ${colPath} does not exist. " )
343+ }
344+ case m : MapType if names(i) == " $key$" => dataType = m.keyType
345+ case m : MapType if names(i) == " $value$" => dataType = m.valueType
346+ case a : ArrayType if names(i) == " $value$" => dataType = a.elementType
347+ case _ => throw new AnalysisException (" Column name/path: ${colPath} does not exist" )
312348 }
313349 }
314350
315- val results =
316- sparkSession.sessionState.catalog.describeTable(table, partSpec, colPath, isExtended, output)
317- val rows = results.map { case (name, dataType, comment) =>
318- Row (name, dataType, comment)
351+ val result : Seq [Row ] = dataType match {
352+ case s : StructType =>
353+ s.map { f =>
354+ Row (f.name, f.dataType.simpleString, " from deserializer" )}
355+ case d : DataType => Seq (Row (lastName, dataType.simpleString, " from deserializer" ))
319356 }
320- rows
357+ result.toArray
358+ }
359+
360+ private def descStorageFormat (
361+ table : CatalogTable ,
362+ storage : CatalogStorageFormat ): String = {
363+ // TODO - check with Lian - from StorageDesc - compress, skewedInfo, storedAsSubDirectories
364+ // are not availble. So these are dropped from the output.
365+ val storageLocationStr =
366+ s """
367+ | ${storage.getClass.getSimpleName}(location: ${storage.locationUri.orNull},
368+ | inputFormat: ${storage.inputFormat.orNull},
369+ | outputFormat: ${storage.outputFormat.orNull},
370+ | numBuckets: ${table.numBuckets},
371+ | serializationLib= ${storage.serde.orNull},
372+ | parameters= ${formatProperties(storage.serdeProperties)},
373+ | bucketCols:[ ${formatColumns(table.bucketColumns)}],
374+ | sortCols=[ ${formatColumns(table.sortColumns)}])
375+ """ .stripMargin.replaceAll(" \n " , " " ).trim
376+ storageLocationStr
377+ }
378+
379+ private def descPartExtended (table : CatalogTable , part : CatalogTablePartition ): String = {
380+ val result = StringBuilder .newBuilder
381+ val clsName = part.getClass.getSimpleName
382+ result ++= s " ${clsName}(values:[ ${getPartValues(part, table.partitionColumnNames)}], "
383+ result ++= s " dbName: ${table.database}, "
384+ // TODO - check with Lian - no owner info available.
385+ result ++= s " createTime: ${table.createTime}, "
386+ result ++= s " lastAccessTime: ${table.lastAccessTime}, "
387+ // TODO - check with Lian - no retention info available.
388+
389+ result ++= s " sd: ${descStorageFormat(table, part.storage)}, "
390+ // TODO Check with Lian - Hive prints partition keys here. Since we output paritioning keys and
391+ // schema already at the start i don't output it here again.
392+ result ++= s " parameters: ${formatProperties(table.properties)}, "
393+ result ++= s " viewOriginalText: ${table.viewOriginalText.orNull}, "
394+ result ++= s " viewExpandedText: ${table.viewText.orNull}, "
395+ result ++= s " tableType: ${table.tableType}) "
396+ result.toString
321397 }
322- }
323398
399+ private def descTableExtended (table : CatalogTable ): String = {
400+ val result = StringBuilder .newBuilder
401+ result ++= s " ${table.getClass.getSimpleName}(tableName: ${table.identifier.table}, "
402+ result ++= s " dbName: ${table.database}, "
403+ // TODO - check with Lian - no owner info available.
404+ result ++= s " createTime: ${table.createTime}, "
405+ result ++= s " lastAccessTime: ${table.lastAccessTime}, "
406+ // TODO - check with Lian - no retention info available.
407+
408+ result ++= s " sd: ${descStorageFormat(table, table.storage)}, "
409+ // TODO Check with Lian - Hive prints partition keys here. Since we output paritioning keys
410+ // and schema already i don't output it here again.
411+ result ++= s " parameters: ${formatProperties(table.properties)}, "
412+ result ++= s " viewOriginalText: ${table.viewOriginalText.orNull}, "
413+ result ++= s " viewExpandedText: ${table.viewText.orNull}, "
414+ result ++= s " tableType: ${table.tableType}) "
415+ result.toString
416+ }
417+
418+ override def run (sparkSession : SparkSession ): Seq [Row ] = {
419+ val result = new ArrayBuffer [Row ]
420+ val catalog = sparkSession.sessionState.catalog
421+ catalog.lookupRelation(table) match {
422+ case catalogRelation : CatalogRelation =>
423+ val tab = catalogRelation.catalogTable
424+ val part = partSpec.map(p => Option (catalog.getPartition(table, p))).getOrElse(None )
425+ if (colPath.nonEmpty) {
426+ result ++= descColPath(tab, colPath.get)
427+ } else {
428+ catalogRelation.catalogTable.schema.foreach { column =>
429+ result += Row (column.name, column.dataType, column.comment.orNull)
430+ }
431+ if (tab.partitionColumns.nonEmpty) {
432+ result += Row (" # Partition Information" , " " , " " )
433+ result += Row (s " # ${output(0 ).name}" , output(1 ).name, output(2 ).name)
434+
435+ tab.partitionColumns.foreach { col =>
436+ result += Row (col.name, col.dataType, col.comment.orNull)
437+ }
438+ }
439+ if (isExtended) {
440+ if (partSpec.isEmpty) {
441+ result += Row (" Detailed Table Information" , descTableExtended(tab), " " )
442+ } else {
443+ result +=
444+ Row (" Detailed Partition Information" , descPartExtended(tab, part.get), " " )
445+ }
446+ }
447+ }
448+
449+ case relation =>
450+ relation.schema.fields.foreach { field =>
451+ val comment =
452+ if (field.metadata.contains(" comment" )) field.metadata.getString(" comment" ) else " "
453+ result += Row (field.name, field.dataType.simpleString, comment)
454+ }
455+ }
456+
457+ result
458+ }
459+ }
324460
325461/**
326462 * A command for users to get tables in the given database.
0 commit comments