Skip to content

Commit 8003cf3

Browse files
committed
Generate strings with the format like Hive for unit tests.
1 parent 9787fff commit 8003cf3

File tree

8 files changed

+96
-53
lines changed

8 files changed

+96
-53
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -68,17 +68,15 @@ case class CacheCommand(tableName: String, doCache: Boolean) extends Command
6868
/**
6969
* Returned for the "DESCRIBE tableName" command.
7070
* @param table The table to be described.
71-
* @param isFormatted True if "DESCRIBE FORMATTED" is used. Otherwise, false.
72-
* It is effective only when the table is a Hive table.
7371
* @param isExtended True if "DESCRIBE EXTENDED" is used. Otherwise, false.
7472
* It is effective only when the table is a Hive table.
7573
*/
7674
case class DescribeCommand(
7775
table: LogicalPlan,
78-
isFormatted: Boolean,
7976
isExtended: Boolean) extends Command {
8077
override def output = Seq(
81-
BoundReference(0, AttributeReference("name", StringType, nullable = false)()),
82-
BoundReference(1, AttributeReference("type", StringType, nullable = false)()),
78+
// Column names are based on Hive.
79+
BoundReference(0, AttributeReference("col_name", StringType, nullable = false)()),
80+
BoundReference(1, AttributeReference("data_type", StringType, nullable = false)()),
8381
BoundReference(2, AttributeReference("comment", StringType, nullable = false)()))
8482
}

sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ import org.apache.spark.sql.catalyst.plans.logical._
3838
import org.apache.spark.sql.catalyst.types._
3939
import org.apache.spark.sql.execution.QueryExecutionException
4040
import org.apache.spark.sql.execution.{Command => PhysicalCommand}
41+
import org.apache.spark.sql.hive.execution.DescribeHiveTableCommand
4142

4243
/**
4344
* Starts up an instance of hive where metadata is stored locally. An in-process metadata data is
@@ -291,6 +292,10 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
291292
* execution is simply passed back to Hive.
292293
*/
293294
def stringResult(): Seq[String] = executedPlan match {
295+
case describeHiveTableCommand: DescribeHiveTableCommand =>
296+
// If it is a describe command for a Hive table, we want to have the output format
297+
// be similar with Hive.
298+
describeHiveTableCommand.hiveString
294299
case command: PhysicalCommand =>
295300
command.sideEffectResult.map(_.toString)
296301

sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala

Lines changed: 25 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -391,30 +391,34 @@ private[hive] object HiveQl {
391391

392392
case Token("TOK_DESCTABLE", describeArgs) =>
393393
// Reference: https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL
394-
val Some(tableType) :: formatted :: extended :: _ :: Nil =
394+
val Some(tableType) :: formatted :: extended :: pretty :: Nil =
395395
getClauses(Seq("TOK_TABTYPE", "FORMATTED", "EXTENDED", "PRETTY"), describeArgs)
396-
// TODO: support PRETTY?
397-
tableType match {
398-
case Token("TOK_TABTYPE", nameParts) if nameParts.size == 1 => {
399-
nameParts.head match {
400-
case Token(".", dbName :: tableName :: Nil) =>
401-
// It is describing a table with the format like "describe db.table".
402-
val (db, tableName) = extractDbNameTableName(nameParts.head)
403-
DescribeCommand(
404-
UnresolvedRelation(db, tableName, None), formatted.isDefined, extended.isDefined)
405-
case Token(".", dbName :: tableName :: colName :: Nil) =>
406-
// It is describing a column with the format like "describe db.table column".
407-
NativePlaceholder
408-
case tableName =>
409-
// It is describing a table with the format like "describe table".
410-
DescribeCommand(
411-
UnresolvedRelation(None, tableName.getText, None),
412-
formatted.isDefined,
413-
extended.isDefined)
396+
if (formatted.isDefined || pretty.isDefined) {
397+
// FORMATTED and PRETTY are not supported and this statement will be treated as
398+
// a Hive native command.
399+
NativePlaceholder
400+
} else {
401+
tableType match {
402+
case Token("TOK_TABTYPE", nameParts) if nameParts.size == 1 => {
403+
nameParts.head match {
404+
case Token(".", dbName :: tableName :: Nil) =>
405+
// It is describing a table with the format like "describe db.table".
406+
val (db, tableName) = extractDbNameTableName(nameParts.head)
407+
DescribeCommand(
408+
UnresolvedRelation(db, tableName, None), extended.isDefined)
409+
case Token(".", dbName :: tableName :: colName :: Nil) =>
410+
// It is describing a column with the format like "describe db.table column".
411+
NativePlaceholder
412+
case tableName =>
413+
// It is describing a table with the format like "describe table".
414+
DescribeCommand(
415+
UnresolvedRelation(None, tableName.getText, None),
416+
extended.isDefined)
417+
}
414418
}
419+
// All other cases.
420+
case _ => NativePlaceholder
415421
}
416-
// All other cases.
417-
case _ => NativePlaceholder
418422
}
419423

420424
case Token("TOK_CREATETABLE", children)

sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ private[hive] trait HiveStrategies {
8686
resolvedTable match {
8787
case t: MetastoreRelation =>
8888
Seq(DescribeHiveTableCommand(
89-
t, describe.output, describe.isFormatted, describe.isExtended)(context))
89+
t, describe.output, describe.isExtended)(context))
9090
case o: LogicalPlan =>
9191
Seq(DescribeCommand(planLater(o), describe.output)(context))
9292
}

sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala

Lines changed: 28 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -462,32 +462,43 @@ case class NativeCommand(
462462
case class DescribeHiveTableCommand(
463463
table: MetastoreRelation,
464464
output: Seq[Attribute],
465-
isFormatted: Boolean,
466465
isExtended: Boolean)(
467466
@transient context: HiveContext)
468467
extends LeafNode with Command {
469468

470-
override protected[sql] lazy val sideEffectResult: Seq[(String, String, String)] = {
471-
val cols: Seq[FieldSchema] = table.hiveQlTable.getCols
472-
val parCols: Seq[FieldSchema] = table.hiveQlTable.getPartCols
473-
val columnInfo = cols.map(field => (field.getName, field.getType, field.getComment))
474-
val partColumnInfo = parCols.map(field => (field.getName, field.getType, field.getComment))
469+
// Strings with the format like Hive. It is used for result comparison in our unit tests.
470+
lazy val hiveString: Seq[String] = {
471+
val alignment = 20
472+
val delim = "\t"
475473

476-
val formattedPart = if (isFormatted) {
477-
(MetaDataFormatUtils.getTableInformation(table.hiveQlTable), null, null) :: Nil
478-
} else {
479-
Nil
474+
sideEffectResult.map {
475+
case (name, dataType, comment) =>
476+
String.format("%-" + alignment + "s", name) + delim +
477+
String.format("%-" + alignment + "s", dataType) + delim +
478+
String.format("%-" + alignment + "s", Option(comment).getOrElse("None"))
480479
}
480+
}
481481

482-
val extendedPart = if (isExtended) {
483-
("Detailed Table Information", table.hiveQlTable.getTTable.toString, null) :: Nil
484-
} else {
485-
Nil
482+
override protected[sql] lazy val sideEffectResult: Seq[(String, String, String)] = {
483+
// Trying to mimic the format of Hive's output. But not exactly the same.
484+
var results: Seq[(String, String, String)] = Nil
485+
486+
val columns: Seq[FieldSchema] = table.hiveQlTable.getCols
487+
val partitionColumns: Seq[FieldSchema] = table.hiveQlTable.getPartCols
488+
results ++= columns.map(field => (field.getName, field.getType, field.getComment))
489+
if (!partitionColumns.isEmpty) {
490+
val partColumnInfo =
491+
partitionColumns.map(field => (field.getName, field.getType, field.getComment))
492+
results ++=
493+
partColumnInfo ++ Seq(("# Partition Information", "", "")) ++
494+
Seq((s"# ${output.get(0).name}", output.get(1).name, output.get(2).name)) ++ partColumnInfo
495+
}
496+
497+
if (isExtended) {
498+
results ++= Seq(("Detailed Table Information", table.hiveQlTable.getTTable.toString, ""))
486499
}
487500

488-
// Trying to mimic the format of Hive's output. But not 100% the same.
489-
columnInfo ++ partColumnInfo ++ Seq(("# Partition Information", null, null)) ++
490-
partColumnInfo ++ formattedPart ++ extendedPart
501+
results
491502
}
492503

493504
override def execute(): RDD[Row] = {

sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,10 @@ abstract class HiveComparisonTest
144144
case _: SetCommand => Seq("0")
145145
case _: LogicalNativeCommand => answer.filterNot(nonDeterministicLine).filterNot(_ == "")
146146
case _: ExplainCommand => answer
147+
case _: DescribeCommand =>
148+
answer.filterNot(
149+
r => nonDeterministicLine(r) || ignoredLine(r)).map(_.trim).filterNot(
150+
r => r == "" || r == "\n")
147151
case plan => if (isSorted(plan)) answer else answer.sorted
148152
}
149153
orderedAnswer.map(cleanPaths)
@@ -169,6 +173,16 @@ abstract class HiveComparisonTest
169173
protected def nonDeterministicLine(line: String) =
170174
nonDeterministicLineIndicators.exists(line contains _)
171175

176+
// This list contains indicators for those lines which do not have actual results and we
177+
// want to ignore.
178+
lazy val ignoredLineIndicators = Seq(
179+
"# Partition Information",
180+
"# col_name"
181+
)
182+
183+
protected def ignoredLine(line: String) =
184+
ignoredLineIndicators.exists(line contains _)
185+
172186
/**
173187
* Removes non-deterministic paths from `str` so cached answers will compare correctly.
174188
*/
@@ -329,11 +343,17 @@ abstract class HiveComparisonTest
329343

330344
if ((!hiveQuery.logical.isInstanceOf[ExplainCommand]) && preparedHive != catalyst) {
331345

332-
val hivePrintOut = s"== HIVE - ${hive.size} row(s) ==" +: preparedHive
346+
val hivePrintOut = s"== HIVE - ${preparedHive.size} row(s) ==" +: preparedHive
333347
val catalystPrintOut = s"== CATALYST - ${catalyst.size} row(s) ==" +: catalyst
334348

335349
val resultComparison = sideBySide(hivePrintOut, catalystPrintOut).mkString("\n")
336350

351+
println("hive output")
352+
hive.foreach(println)
353+
354+
println("catalyst printout")
355+
catalyst.foreach(println)
356+
337357
if (recomputeCache) {
338358
logger.warn(s"Clearing cache files for failed test $testCaseName")
339359
hiveCacheFiles.foreach(_.delete())

sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,16 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
177177
// After stop taking the `stringOrError` route, exceptions are thrown from these cases.
178178
// See SPARK-2129 for details.
179179
"join_view",
180-
"mergejoins_mixed"
180+
"mergejoins_mixed",
181+
182+
// Returning the result of a describe state as a JSON object is not supported.
183+
"describe_table_json",
184+
"describe_database_json",
185+
"describe_formatted_view_partitioned_json",
186+
187+
// Hive returns the results of describe as plain text. Comments with multiple lines
188+
// introduce extra in the Hive results, which make the result comparison fail.
189+
"describe_comment_indent"
181190
)
182191

183192
/**
@@ -292,11 +301,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
292301
"default_partition_name",
293302
"delimiter",
294303
"desc_non_existent_tbl",
295-
"describe_comment_indent",
296-
"describe_database_json",
297304
"describe_formatted_view_partitioned",
298-
"describe_formatted_view_partitioned_json",
299-
"describe_table_json",
300305
"diff_part_input_formats",
301306
"disable_file_format_check",
302307
"drop_function",

sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,7 @@ class HiveQuerySuite extends HiveComparisonTest {
270270
Array("dt", "string", null))
271271
) {
272272
hql("DESCRIBE test_describe_commands1")
273-
.select('name, 'type, 'comment)
273+
.select('col_name, 'data_type, 'comment)
274274
.collect()
275275
}
276276

@@ -295,7 +295,7 @@ class HiveQuerySuite extends HiveComparisonTest {
295295
Array("dt", "string", null))
296296
) {
297297
hql("DESCRIBE default.test_describe_commands1")
298-
.select('name, 'type, 'comment)
298+
.select('col_name, 'data_type, 'comment)
299299
.collect()
300300
}
301301

@@ -347,7 +347,7 @@ class HiveQuerySuite extends HiveComparisonTest {
347347
Array("b", "StringType", null))
348348
) {
349349
hql("DESCRIBE test_describe_commands2")
350-
.select('name, 'type, 'comment)
350+
.select('col_name, 'data_type, 'comment)
351351
.collect()
352352
}
353353
}

0 commit comments

Comments
 (0)