From 01334635c5433f0515beb92660b79796c97677d5 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Sun, 15 Jan 2017 20:52:55 -0800 Subject: [PATCH 1/4] fix --- .../sql/execution/command/DDLSuite.scala | 530 +++++++++++------- .../sql/hive/execution/HiveDDLSuite.scala | 48 +- 2 files changed, 366 insertions(+), 212 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index ac3878e84905..a951b94eb49d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -31,13 +31,12 @@ import org.apache.spark.sql.catalyst.catalog.{CatalogTablePartition, SessionCata import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION -import org.apache.spark.sql.test.SharedSQLContext +import org.apache.spark.sql.test.{SharedSQLContext, SQLTestUtils} import org.apache.spark.sql.types._ import org.apache.spark.util.Utils -class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { - private val escapedIdentifier = "`(.+)`".r +class InMemoryCatalogedDDLSuite extends DDLSuite with SharedSQLContext with BeforeAndAfterEach { override def afterEach(): Unit = { try { // drop all databases, tables and functions after each test @@ -48,34 +47,9 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { } } - /** - * Strip backticks, if any, from the string. - */ - private def cleanIdentifier(ident: String): String = { - ident match { - case escapedIdentifier(i) => i - case plainIdent => plainIdent - } - } - - private def assertUnsupported(query: String): Unit = { - val e = intercept[AnalysisException] { - sql(query) - } - assert(e.getMessage.toLowerCase.contains("operation not allowed")) - } - - private def maybeWrapException[T](expectException: Boolean)(body: => T): Unit = { - if (expectException) intercept[AnalysisException] { body } else body - } - - private def createDatabase(catalog: SessionCatalog, name: String): Unit = { - catalog.createDatabase( - CatalogDatabase(name, "", spark.sessionState.conf.warehousePath, Map()), - ignoreIfExists = false) - } - - private def generateTable(catalog: SessionCatalog, name: TableIdentifier): CatalogTable = { + protected override def generateTable( + catalog: SessionCatalog, + name: TableIdentifier): CatalogTable = { val storage = CatalogStorageFormat( locationUri = Some(catalog.defaultTablePath(name)), @@ -102,6 +76,198 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { tracksPartitionsInCatalog = true) } + test("desc table for parquet data source table using in-memory catalog") { + val tabName = "tab1" + withTable(tabName) { + sql(s"CREATE TABLE $tabName(a int comment 'test') USING parquet ") + + checkAnswer( + sql(s"DESC $tabName").select("col_name", "data_type", "comment"), + Row("a", "int", "test") + ) + } + } + + test("select/insert into the managed table") { + val tabName = "tbl" + withTable(tabName) { + sql(s"CREATE TABLE $tabName (i INT, j STRING)") + val catalogTable = + spark.sessionState.catalog.getTableMetadata(TableIdentifier(tabName, Some("default"))) + assert(catalogTable.tableType == CatalogTableType.MANAGED) + + var message = intercept[AnalysisException] { + sql(s"INSERT OVERWRITE TABLE $tabName SELECT 1, 'a'") + }.getMessage + assert(message.contains("Hive support is required to insert into the following tables")) + message = intercept[AnalysisException] { + sql(s"SELECT * FROM $tabName") + }.getMessage + assert(message.contains("Hive support is required to select over the following tables")) + } + } + + test("select/insert into external table") { + withTempDir { tempDir => + val tabName = "tbl" + withTable(tabName) { + sql( + s""" + |CREATE EXTERNAL TABLE $tabName (i INT, j STRING) + |ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' + |LOCATION '$tempDir' + """.stripMargin) + val catalogTable = + spark.sessionState.catalog.getTableMetadata(TableIdentifier(tabName, Some("default"))) + assert(catalogTable.tableType == CatalogTableType.EXTERNAL) + + var message = intercept[AnalysisException] { + sql(s"INSERT OVERWRITE TABLE $tabName SELECT 1, 'a'") + }.getMessage + assert(message.contains("Hive support is required to insert into the following tables")) + message = intercept[AnalysisException] { + sql(s"SELECT * FROM $tabName") + }.getMessage + assert(message.contains("Hive support is required to select over the following tables")) + } + } + } + + test("Create Hive Table As Select") { + import testImplicits._ + withTable("t", "t1") { + var e = intercept[AnalysisException] { + sql("CREATE TABLE t SELECT 1 as a, 1 as b") + }.getMessage + assert(e.contains("Hive support is required to use CREATE Hive TABLE AS SELECT")) + + spark.range(1).select('id as 'a, 'id as 'b).write.saveAsTable("t1") + e = intercept[AnalysisException] { + sql("CREATE TABLE t SELECT a, b from t1") + }.getMessage + assert(e.contains("Hive support is required to use CREATE Hive TABLE AS SELECT")) + } + } + + test("alter table: set location (datasource table)") { + testSetLocation(isDatasourceTable = true) + } + + test("alter table: set properties (datasource table)") { + testSetProperties(isDatasourceTable = true) + } + + test("alter table: unset properties (datasource table)") { + testUnsetProperties(isDatasourceTable = true) + } + + test("alter table: set serde (datasource table)") { + testSetSerde(isDatasourceTable = true) + } + + test("alter table: set serde partition (datasource table)") { + testSetSerdePartition(isDatasourceTable = true) + } + + test("alter table: change column (datasource table)") { + testChangeColumn(isDatasourceTable = true) + } + + test("alter table: add partition (datasource table)") { + testAddPartitions(isDatasourceTable = true) + } + + test("alter table: drop partition (datasource table)") { + testDropPartitions(isDatasourceTable = true) + } + + test("alter table: rename partition (datasource table)") { + testRenamePartitions(isDatasourceTable = true) + } + + test("drop table - data source table") { + testDropTable(isDatasourceTable = true) + } +} + +abstract class DDLSuite extends QueryTest with SQLTestUtils { + + protected def isUsingHiveMetastore: Boolean = { + spark.sparkContext.conf.get(CATALOG_IMPLEMENTATION) == "hive" + } + + protected def generateTable(catalog: SessionCatalog, name: TableIdentifier): CatalogTable + + private val escapedIdentifier = "`(.+)`".r + + def normalizeCatalogTable(table: CatalogTable): CatalogTable = { + val nondeterministicProps = Set( + "CreateTime", + "transient_lastDdlTime", + "grantTime", + "lastUpdateTime", + "last_modified_by", + "last_modified_time", + "Owner:", + "COLUMN_STATS_ACCURATE", + // The following are hive specific schema parameters which we do not need to match exactly. + "numFiles", + "numRows", + "rawDataSize", + "totalSize", + "totalNumberFiles", + "maxFileSize", + "minFileSize", + // EXTERNAL is not non-deterministic, but it is filtered out for external tables. + "EXTERNAL" + ) + + table.copy( + createTime = 0L, + lastAccessTime = 0L, + owner = "", + properties = table.properties.filterKeys(!nondeterministicProps.contains(_)), + // View texts are checked separately + viewOriginalText = None, + viewText = None + ) + } + + def normalizeSerdeProp(props: Map[String, String]): Map[String, String] = { + props.filterNot(p => Seq("serialization.format", "path").contains(p._1)) + } + + private def checkCatalogTables(expected: CatalogTable, actual: CatalogTable): Unit = { + assert(normalizeCatalogTable(actual) == normalizeCatalogTable(expected)) + } + + /** + * Strip backticks, if any, from the string. + */ + private def cleanIdentifier(ident: String): String = { + ident match { + case escapedIdentifier(i) => i + case plainIdent => plainIdent + } + } + + private def assertUnsupported(query: String): Unit = { + val e = intercept[AnalysisException] { + spark.sql(query) + } + assert(e.getMessage.toLowerCase.contains("operation not allowed")) + } + + private def maybeWrapException[T](expectException: Boolean)(body: => T): Unit = { + if (expectException) intercept[AnalysisException] { body } else body + } + + private def createDatabase(catalog: SessionCatalog, name: String): Unit = { + catalog.createDatabase( + CatalogDatabase(name, "", spark.sessionState.conf.warehousePath, Map()), + ignoreIfExists = false) + } + private def createTable(catalog: SessionCatalog, name: TableIdentifier): Unit = { catalog.createTable(generateTable(catalog, name), ignoreIfExists = false) } @@ -115,6 +281,16 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { catalog.createPartitions(tableName, Seq(part), ignoreIfExists = false) } + private def getDatabaseLocation(dbName: String): String = { + val expectedLocation = if (isUsingHiveMetastore) { + val dbPath = new Path(spark.sessionState.conf.warehousePath) + s"${dbPath.toUri.getPath.stripSuffix("/")}/$dbName.db" + } else { + s"spark-warehouse/$dbName.db" + } + makeQualifiedPath(expectedLocation) + } + test("the qualified path of a database is stored in the catalog") { val catalog = spark.sessionState.catalog @@ -145,11 +321,10 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { try { sql(s"CREATE DATABASE $dbName") val db1 = catalog.getDatabaseMetadata(dbName) - val expectedLocation = makeQualifiedPath(s"spark-warehouse/$dbName.db") assert(db1 == CatalogDatabase( dbName, "", - expectedLocation, + getDatabaseLocation(dbName), Map.empty)) sql(s"DROP DATABASE $dbName CASCADE") assert(!catalog.databaseExists(dbName)) @@ -192,15 +367,21 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { val dbNameWithoutBackTicks = cleanIdentifier(dbName) sql(s"CREATE DATABASE $dbName") val db1 = catalog.getDatabaseMetadata(dbNameWithoutBackTicks) - val expectedLocation = makeQualifiedPath(s"spark-warehouse/$dbNameWithoutBackTicks.db") assert(db1 == CatalogDatabase( dbNameWithoutBackTicks, "", - expectedLocation, + getDatabaseLocation(dbNameWithoutBackTicks), Map.empty)) - intercept[DatabaseAlreadyExistsException] { - sql(s"CREATE DATABASE $dbName") + if (isUsingHiveMetastore) { + val e = intercept[AnalysisException] { + sql(s"CREATE DATABASE $dbName") + }.getMessage + assert(e.contains(s"Database $dbNameWithoutBackTicks already exists")) + } else { + intercept[DatabaseAlreadyExistsException] { + sql(s"CREATE DATABASE $dbName") + } } } finally { catalog.reset() @@ -420,19 +601,6 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { } } - test("desc table for parquet data source table using in-memory catalog") { - assume(spark.sparkContext.conf.get(CATALOG_IMPLEMENTATION) == "in-memory") - val tabName = "tab1" - withTable(tabName) { - sql(s"CREATE TABLE $tabName(a int comment 'test') USING parquet ") - - checkAnswer( - sql(s"DESC $tabName").select("col_name", "data_type", "comment"), - Row("a", "int", "test") - ) - } - } - test("Alter/Describe Database") { val catalog = spark.sessionState.catalog val databaseNames = Seq("db1", "`database`") @@ -440,7 +608,7 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { databaseNames.foreach { dbName => try { val dbNameWithoutBackTicks = cleanIdentifier(dbName) - val location = makeQualifiedPath(s"spark-warehouse/$dbNameWithoutBackTicks.db") + val location = getDatabaseLocation(dbNameWithoutBackTicks) sql(s"CREATE DATABASE $dbName") @@ -484,7 +652,11 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { var message = intercept[AnalysisException] { sql(s"DROP DATABASE $dbName") }.getMessage - assert(message.contains(s"Database '$dbNameWithoutBackTicks' not found")) + if (isUsingHiveMetastore) { + assert(message.contains(s"NoSuchObjectException: $dbNameWithoutBackTicks")) + } else { + assert(message.contains(s"Database '$dbNameWithoutBackTicks' not found")) + } message = intercept[AnalysisException] { sql(s"ALTER DATABASE $dbName SET DBPROPERTIES ('d'='d')") @@ -513,7 +685,11 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { val message = intercept[AnalysisException] { sql(s"DROP DATABASE $dbName RESTRICT") }.getMessage - assert(message.contains(s"Database '$dbName' is not empty. One or more tables exist")) + if (isUsingHiveMetastore) { + assert(message.contains(s"Database $dbName is not empty. One or more tables exist")) + } else { + assert(message.contains(s"Database '$dbName' is not empty. One or more tables exist")) + } catalog.dropTable(tableIdent1, ignoreIfNotExists = false, purge = false) @@ -544,7 +720,7 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { createTable(catalog, tableIdent1) val expectedTableIdent = tableIdent1.copy(database = Some("default")) val expectedTable = generateTable(catalog, expectedTableIdent) - assert(catalog.getTableMetadata(tableIdent1) === expectedTable) + checkCatalogTables(expectedTable, catalog.getTableMetadata(tableIdent1)) } test("create table in a specific db") { @@ -553,7 +729,7 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { val tableIdent1 = TableIdentifier("tab1", Some("dbx")) createTable(catalog, tableIdent1) val expectedTable = generateTable(catalog, tableIdent1) - assert(catalog.getTableMetadata(tableIdent1) === expectedTable) + checkCatalogTables(expectedTable, catalog.getTableMetadata(tableIdent1)) } test("create table using") { @@ -738,50 +914,26 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { testSetLocation(isDatasourceTable = false) } - test("alter table: set location (datasource table)") { - testSetLocation(isDatasourceTable = true) - } - test("alter table: set properties") { testSetProperties(isDatasourceTable = false) } - test("alter table: set properties (datasource table)") { - testSetProperties(isDatasourceTable = true) - } - test("alter table: unset properties") { testUnsetProperties(isDatasourceTable = false) } - test("alter table: unset properties (datasource table)") { - testUnsetProperties(isDatasourceTable = true) - } - test("alter table: set serde") { testSetSerde(isDatasourceTable = false) } - test("alter table: set serde (datasource table)") { - testSetSerde(isDatasourceTable = true) - } - test("alter table: set serde partition") { testSetSerdePartition(isDatasourceTable = false) } - test("alter table: set serde partition (datasource table)") { - testSetSerdePartition(isDatasourceTable = true) - } - test("alter table: change column") { testChangeColumn(isDatasourceTable = false) } - test("alter table: change column (datasource table)") { - testChangeColumn(isDatasourceTable = true) - } - test("alter table: bucketing is not supported") { val catalog = spark.sessionState.catalog val tableIdent = TableIdentifier("tab1", Some("dbx")) @@ -810,10 +962,6 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { testAddPartitions(isDatasourceTable = false) } - test("alter table: add partition (datasource table)") { - testAddPartitions(isDatasourceTable = true) - } - test("alter table: recover partitions (sequential)") { withSQLConf("spark.rdd.parallelListingThreshold" -> "10") { testRecoverPartitions() @@ -826,7 +974,7 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { } } - private def testRecoverPartitions() { + protected def testRecoverPartitions() { val catalog = spark.sessionState.catalog // table to alter does not exist intercept[AnalysisException] { @@ -865,8 +1013,10 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { sql("ALTER TABLE tab1 RECOVER PARTITIONS") assert(catalog.listPartitions(tableIdent).map(_.spec).toSet == Set(part1, part2)) - assert(catalog.getPartition(tableIdent, part1).parameters("numFiles") == "1") - assert(catalog.getPartition(tableIdent, part2).parameters("numFiles") == "2") + if (!isUsingHiveMetastore) { + assert(catalog.getPartition(tableIdent, part1).parameters("numFiles") == "1") + assert(catalog.getPartition(tableIdent, part2).parameters("numFiles") == "2") + } } finally { fs.delete(root, true) } @@ -880,10 +1030,6 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { testDropPartitions(isDatasourceTable = false) } - test("alter table: drop partition (datasource table)") { - testDropPartitions(isDatasourceTable = true) - } - test("alter table: drop partition is not supported for views") { assertUnsupported("ALTER VIEW dbx.tab1 DROP IF EXISTS PARTITION (b='2')") } @@ -892,10 +1038,6 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { testRenamePartitions(isDatasourceTable = false) } - test("alter table: rename partition (datasource table)") { - testRenamePartitions(isDatasourceTable = true) - } - test("show table extended") { withTempView("show1a", "show2b") { sql( @@ -976,11 +1118,7 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { testDropTable(isDatasourceTable = false) } - test("drop table - data source table") { - testDropTable(isDatasourceTable = true) - } - - private def testDropTable(isDatasourceTable: Boolean): Unit = { + protected def testDropTable(isDatasourceTable: Boolean): Unit = { val catalog = spark.sessionState.catalog val tableIdent = TableIdentifier("tab1", Some("dbx")) createDatabase(catalog, "dbx") @@ -1016,9 +1154,10 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { tableIdent: TableIdentifier): Unit = { catalog.alterTable(catalog.getTableMetadata(tableIdent).copy( provider = Some("csv"))) + assert(catalog.getTableMetadata(tableIdent).provider == Some("csv")) } - private def testSetProperties(isDatasourceTable: Boolean): Unit = { + protected def testSetProperties(isDatasourceTable: Boolean): Unit = { val catalog = spark.sessionState.catalog val tableIdent = TableIdentifier("tab1", Some("dbx")) createDatabase(catalog, "dbx") @@ -1027,7 +1166,11 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { convertToDatasourceTable(catalog, tableIdent) } def getProps: Map[String, String] = { - catalog.getTableMetadata(tableIdent).properties + if (isUsingHiveMetastore) { + normalizeCatalogTable(catalog.getTableMetadata(tableIdent)).properties + } else { + catalog.getTableMetadata(tableIdent).properties + } } assert(getProps.isEmpty) // set table properties @@ -1043,7 +1186,7 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { } } - private def testUnsetProperties(isDatasourceTable: Boolean): Unit = { + protected def testUnsetProperties(isDatasourceTable: Boolean): Unit = { val catalog = spark.sessionState.catalog val tableIdent = TableIdentifier("tab1", Some("dbx")) createDatabase(catalog, "dbx") @@ -1052,7 +1195,11 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { convertToDatasourceTable(catalog, tableIdent) } def getProps: Map[String, String] = { - catalog.getTableMetadata(tableIdent).properties + if (isUsingHiveMetastore) { + normalizeCatalogTable(catalog.getTableMetadata(tableIdent)).properties + } else { + catalog.getTableMetadata(tableIdent).properties + } } // unset table properties sql("ALTER TABLE dbx.tab1 SET TBLPROPERTIES ('j' = 'am', 'p' = 'an', 'c' = 'lan', 'x' = 'y')") @@ -1076,7 +1223,7 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { assert(getProps == Map("x" -> "y")) } - private def testSetLocation(isDatasourceTable: Boolean): Unit = { + protected def testSetLocation(isDatasourceTable: Boolean): Unit = { val catalog = spark.sessionState.catalog val tableIdent = TableIdentifier("tab1", Some("dbx")) val partSpec = Map("a" -> "1", "b" -> "2") @@ -1087,24 +1234,21 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { convertToDatasourceTable(catalog, tableIdent) } assert(catalog.getTableMetadata(tableIdent).storage.locationUri.isDefined) - assert(catalog.getTableMetadata(tableIdent).storage.properties.isEmpty) + assert(normalizeSerdeProp(catalog.getTableMetadata(tableIdent).storage.properties).isEmpty) assert(catalog.getPartition(tableIdent, partSpec).storage.locationUri.isDefined) - assert(catalog.getPartition(tableIdent, partSpec).storage.properties.isEmpty) + assert( + normalizeSerdeProp(catalog.getPartition(tableIdent, partSpec).storage.properties).isEmpty) + // Verify that the location is set to the expected string def verifyLocation(expected: String, spec: Option[TablePartitionSpec] = None): Unit = { val storageFormat = spec .map { s => catalog.getPartition(tableIdent, s).storage } .getOrElse { catalog.getTableMetadata(tableIdent).storage } - if (isDatasourceTable) { - if (spec.isDefined) { - assert(storageFormat.properties.isEmpty) - assert(storageFormat.locationUri === Some(expected)) - } else { - assert(storageFormat.locationUri === Some(expected)) - } - } else { - assert(storageFormat.locationUri === Some(expected)) - } + // TODO(gatorsmile): fix the bug in alter table set location. + // if (isUsingHiveMetastore) { + // assert(storageFormat.properties.get("path") === expected) + // } + assert(storageFormat.locationUri === Some(expected)) } // set table location sql("ALTER TABLE dbx.tab1 SET LOCATION '/path/to/your/lovely/heart'") @@ -1129,7 +1273,7 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { } } - private def testSetSerde(isDatasourceTable: Boolean): Unit = { + protected def testSetSerde(isDatasourceTable: Boolean): Unit = { val catalog = spark.sessionState.catalog val tableIdent = TableIdentifier("tab1", Some("dbx")) createDatabase(catalog, "dbx") @@ -1137,8 +1281,21 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { if (isDatasourceTable) { convertToDatasourceTable(catalog, tableIdent) } - assert(catalog.getTableMetadata(tableIdent).storage.serde.isEmpty) - assert(catalog.getTableMetadata(tableIdent).storage.properties.isEmpty) + def checkSerdeProps(expectedSerdeProps: Map[String, String]): Unit = { + val serdeProp = catalog.getTableMetadata(tableIdent).storage.properties + if (isUsingHiveMetastore) { + assert(normalizeSerdeProp(serdeProp) == expectedSerdeProps) + } else { + assert(serdeProp == expectedSerdeProps) + } + } + if (isUsingHiveMetastore) { + assert(catalog.getTableMetadata(tableIdent).storage.serde == + Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")) + } else { + assert(catalog.getTableMetadata(tableIdent).storage.serde.isEmpty) + } + checkSerdeProps(Map.empty[String, String]) // set table serde and/or properties (should fail on datasource tables) if (isDatasourceTable) { val e1 = intercept[AnalysisException] { @@ -1151,31 +1308,30 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { assert(e1.getMessage.contains("datasource")) assert(e2.getMessage.contains("datasource")) } else { - sql("ALTER TABLE dbx.tab1 SET SERDE 'org.apache.jadoop'") - assert(catalog.getTableMetadata(tableIdent).storage.serde == Some("org.apache.jadoop")) - assert(catalog.getTableMetadata(tableIdent).storage.properties.isEmpty) - sql("ALTER TABLE dbx.tab1 SET SERDE 'org.apache.madoop' " + + val newSerde = "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe" + sql(s"ALTER TABLE dbx.tab1 SET SERDE '$newSerde'") + assert(catalog.getTableMetadata(tableIdent).storage.serde == Some(newSerde)) + checkSerdeProps(Map.empty[String, String]) + val serde2 = "org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe" + sql(s"ALTER TABLE dbx.tab1 SET SERDE '$serde2' " + "WITH SERDEPROPERTIES ('k' = 'v', 'kay' = 'vee')") - assert(catalog.getTableMetadata(tableIdent).storage.serde == Some("org.apache.madoop")) - assert(catalog.getTableMetadata(tableIdent).storage.properties == - Map("k" -> "v", "kay" -> "vee")) + assert(catalog.getTableMetadata(tableIdent).storage.serde == Some(serde2)) + checkSerdeProps(Map("k" -> "v", "kay" -> "vee")) } // set serde properties only sql("ALTER TABLE dbx.tab1 SET SERDEPROPERTIES ('k' = 'vvv', 'kay' = 'vee')") - assert(catalog.getTableMetadata(tableIdent).storage.properties == - Map("k" -> "vvv", "kay" -> "vee")) + checkSerdeProps(Map("k" -> "vvv", "kay" -> "vee")) // set things without explicitly specifying database catalog.setCurrentDatabase("dbx") sql("ALTER TABLE tab1 SET SERDEPROPERTIES ('kay' = 'veee')") - assert(catalog.getTableMetadata(tableIdent).storage.properties == - Map("k" -> "vvv", "kay" -> "veee")) + checkSerdeProps(Map("k" -> "vvv", "kay" -> "veee")) // table to alter does not exist intercept[AnalysisException] { sql("ALTER TABLE does_not_exist SET SERDEPROPERTIES ('x' = 'y')") } } - private def testSetSerdePartition(isDatasourceTable: Boolean): Unit = { + protected def testSetSerdePartition(isDatasourceTable: Boolean): Unit = { val catalog = spark.sessionState.catalog val tableIdent = TableIdentifier("tab1", Some("dbx")) val spec = Map("a" -> "1", "b" -> "2") @@ -1188,8 +1344,21 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { if (isDatasourceTable) { convertToDatasourceTable(catalog, tableIdent) } - assert(catalog.getPartition(tableIdent, spec).storage.serde.isEmpty) - assert(catalog.getPartition(tableIdent, spec).storage.properties.isEmpty) + def checkPartitionSerdeProps(expectedSerdeProps: Map[String, String]): Unit = { + val serdeProp = catalog.getPartition(tableIdent, spec).storage.properties + if (isUsingHiveMetastore) { + assert(normalizeSerdeProp(serdeProp) == expectedSerdeProps) + } else { + assert(serdeProp == expectedSerdeProps) + } + } + if (isUsingHiveMetastore) { + assert(catalog.getPartition(tableIdent, spec).storage.serde == + Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")) + } else { + assert(catalog.getPartition(tableIdent, spec).storage.serde.isEmpty) + } + checkPartitionSerdeProps(Map.empty[String, String]) // set table serde and/or properties (should fail on datasource tables) if (isDatasourceTable) { val e1 = intercept[AnalysisException] { @@ -1204,26 +1373,23 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { } else { sql("ALTER TABLE dbx.tab1 PARTITION (a=1, b=2) SET SERDE 'org.apache.jadoop'") assert(catalog.getPartition(tableIdent, spec).storage.serde == Some("org.apache.jadoop")) - assert(catalog.getPartition(tableIdent, spec).storage.properties.isEmpty) + checkPartitionSerdeProps(Map.empty[String, String]) sql("ALTER TABLE dbx.tab1 PARTITION (a=1, b=2) SET SERDE 'org.apache.madoop' " + "WITH SERDEPROPERTIES ('k' = 'v', 'kay' = 'vee')") assert(catalog.getPartition(tableIdent, spec).storage.serde == Some("org.apache.madoop")) - assert(catalog.getPartition(tableIdent, spec).storage.properties == - Map("k" -> "v", "kay" -> "vee")) + checkPartitionSerdeProps(Map("k" -> "v", "kay" -> "vee")) } // set serde properties only maybeWrapException(isDatasourceTable) { sql("ALTER TABLE dbx.tab1 PARTITION (a=1, b=2) " + "SET SERDEPROPERTIES ('k' = 'vvv', 'kay' = 'vee')") - assert(catalog.getPartition(tableIdent, spec).storage.properties == - Map("k" -> "vvv", "kay" -> "vee")) + checkPartitionSerdeProps(Map("k" -> "vvv", "kay" -> "vee")) } // set things without explicitly specifying database catalog.setCurrentDatabase("dbx") maybeWrapException(isDatasourceTable) { sql("ALTER TABLE tab1 PARTITION (a=1, b=2) SET SERDEPROPERTIES ('kay' = 'veee')") - assert(catalog.getPartition(tableIdent, spec).storage.properties == - Map("k" -> "vvv", "kay" -> "veee")) + checkPartitionSerdeProps(Map("k" -> "vvv", "kay" -> "veee")) } // table to alter does not exist intercept[AnalysisException] { @@ -1231,7 +1397,7 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { } } - private def testAddPartitions(isDatasourceTable: Boolean): Unit = { + protected def testAddPartitions(isDatasourceTable: Boolean): Unit = { val catalog = spark.sessionState.catalog val tableIdent = TableIdentifier("tab1", Some("dbx")) val part1 = Map("a" -> "1", "b" -> "5") @@ -1252,7 +1418,15 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { "PARTITION (a='2', b='6') LOCATION 'paris' PARTITION (a='3', b='7')") assert(catalog.listPartitions(tableIdent).map(_.spec).toSet == Set(part1, part2, part3)) assert(catalog.getPartition(tableIdent, part1).storage.locationUri.isDefined) - assert(catalog.getPartition(tableIdent, part2).storage.locationUri == Option("paris")) + val partitionLocation = if (isUsingHiveMetastore) { + val tableLocation = catalog.getTableMetadata(tableIdent).storage.locationUri + assert(tableLocation.isDefined) + makeQualifiedPath(tableLocation.get + "/paris") + } else { + "paris" + } + + assert(catalog.getPartition(tableIdent, part2).storage.locationUri == Option(partitionLocation)) assert(catalog.getPartition(tableIdent, part3).storage.locationUri.isDefined) // add partitions without explicitly specifying database @@ -1282,7 +1456,7 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { Set(part1, part2, part3, part4, part5)) } - private def testDropPartitions(isDatasourceTable: Boolean): Unit = { + protected def testDropPartitions(isDatasourceTable: Boolean): Unit = { val catalog = spark.sessionState.catalog val tableIdent = TableIdentifier("tab1", Some("dbx")) val part1 = Map("a" -> "1", "b" -> "5") @@ -1335,7 +1509,7 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { assert(catalog.listPartitions(tableIdent).isEmpty) } - private def testRenamePartitions(isDatasourceTable: Boolean): Unit = { + protected def testRenamePartitions(isDatasourceTable: Boolean): Unit = { val catalog = spark.sessionState.catalog val tableIdent = TableIdentifier("tab1", Some("dbx")) val part1 = Map("a" -> "1", "b" -> "q") @@ -1379,7 +1553,7 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { Set(Map("a" -> "1", "b" -> "p"), Map("a" -> "20", "b" -> "c"), Map("a" -> "3", "b" -> "p"))) } - private def testChangeColumn(isDatasourceTable: Boolean): Unit = { + protected def testChangeColumn(isDatasourceTable: Boolean): Unit = { val catalog = spark.sessionState.catalog val resolver = spark.sessionState.conf.resolver val tableIdent = TableIdentifier("tab1", Some("dbx")) @@ -1479,53 +1653,6 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { ) } - test("select/insert into the managed table") { - assume(spark.sparkContext.conf.get(CATALOG_IMPLEMENTATION) == "in-memory") - val tabName = "tbl" - withTable(tabName) { - sql(s"CREATE TABLE $tabName (i INT, j STRING)") - val catalogTable = - spark.sessionState.catalog.getTableMetadata(TableIdentifier(tabName, Some("default"))) - assert(catalogTable.tableType == CatalogTableType.MANAGED) - - var message = intercept[AnalysisException] { - sql(s"INSERT OVERWRITE TABLE $tabName SELECT 1, 'a'") - }.getMessage - assert(message.contains("Hive support is required to insert into the following tables")) - message = intercept[AnalysisException] { - sql(s"SELECT * FROM $tabName") - }.getMessage - assert(message.contains("Hive support is required to select over the following tables")) - } - } - - test("select/insert into external table") { - assume(spark.sparkContext.conf.get(CATALOG_IMPLEMENTATION) == "in-memory") - withTempDir { tempDir => - val tabName = "tbl" - withTable(tabName) { - sql( - s""" - |CREATE EXTERNAL TABLE $tabName (i INT, j STRING) - |ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' - |LOCATION '$tempDir' - """.stripMargin) - val catalogTable = - spark.sessionState.catalog.getTableMetadata(TableIdentifier(tabName, Some("default"))) - assert(catalogTable.tableType == CatalogTableType.EXTERNAL) - - var message = intercept[AnalysisException] { - sql(s"INSERT OVERWRITE TABLE $tabName SELECT 1, 'a'") - }.getMessage - assert(message.contains("Hive support is required to insert into the following tables")) - message = intercept[AnalysisException] { - sql(s"SELECT * FROM $tabName") - }.getMessage - assert(message.contains("Hive support is required to select over the following tables")) - } - } - } - test("create table using CLUSTERED BY without schema specification") { import testImplicits._ withTempPath { tempDir => @@ -1549,22 +1676,6 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { } } - test("Create Hive Table As Select") { - import testImplicits._ - withTable("t", "t1") { - var e = intercept[AnalysisException] { - sql("CREATE TABLE t SELECT 1 as a, 1 as b") - }.getMessage - assert(e.contains("Hive support is required to use CREATE Hive TABLE AS SELECT")) - - spark.range(1).select('id as 'a, 'id as 'b).write.saveAsTable("t1") - e = intercept[AnalysisException] { - sql("CREATE TABLE t SELECT a, b from t1") - }.getMessage - assert(e.contains("Hive support is required to use CREATE Hive TABLE AS SELECT")) - } - } - test("Create Data Source Table As Select") { import testImplicits._ withTable("t", "t1", "t2") { @@ -1588,7 +1699,8 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { } test("drop default database") { - Seq("true", "false").foreach { caseSensitive => + val caseSensitiveOptions = if (isUsingHiveMetastore) Seq("false") else Seq("true", "false") + caseSensitiveOptions.foreach { caseSensitive => withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive) { var message = intercept[AnalysisException] { sql("DROP DATABASE default") diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index e3f166724968..bc34256eb28f 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -24,16 +24,58 @@ import org.scalatest.BeforeAndAfterEach import org.apache.spark.sql.{AnalysisException, QueryTest, Row, SaveMode} import org.apache.spark.sql.catalyst.analysis.{NoSuchPartitionException, TableAlreadyExistsException} -import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, CatalogTable, CatalogTableType} +import org.apache.spark.sql.catalyst.catalog._ import org.apache.spark.sql.catalyst.TableIdentifier -import org.apache.spark.sql.execution.command.DDLUtils +import org.apache.spark.sql.execution.command.{DDLSuite, DDLUtils} import org.apache.spark.sql.hive.HiveExternalCatalog import org.apache.spark.sql.hive.orc.OrcFileOperator import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION import org.apache.spark.sql.test.SQLTestUtils -import org.apache.spark.sql.types.StructType +import org.apache.spark.sql.types.{MetadataBuilder, StructType} + +// TODO(gatorsmile): combine HiveCatalogedDDLSuite and HiveDDLSuite +class HiveCatalogedDDLSuite extends DDLSuite with TestHiveSingleton with BeforeAndAfterEach { + override def afterEach(): Unit = { + try { + // drop all databases, tables and functions after each test + spark.sessionState.catalog.reset() + } finally { + super.afterEach() + } + } + + protected override def generateTable( + catalog: SessionCatalog, + name: TableIdentifier): CatalogTable = { + val storage = + CatalogStorageFormat( + locationUri = Some(catalog.defaultTablePath(name)), + inputFormat = Some("org.apache.hadoop.mapred.SequenceFileInputFormat"), + outputFormat = Some("org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat"), + serde = Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"), + compressed = false, + properties = Map("serialization.format" -> "1")) + val metadata = new MetadataBuilder() + .putString("key", "value") + .build() + CatalogTable( + identifier = name, + tableType = CatalogTableType.EXTERNAL, + storage = storage, + schema = new StructType() + .add("col1", "int", nullable = true, metadata = metadata) + .add("col2", "string") + .add("a", "int") + .add("b", "int"), + provider = Some("hive"), + partitionColumnNames = Seq("a", "b"), + createTime = 0L, + tracksPartitionsInCatalog = true) + } + +} class HiveDDLSuite extends QueryTest with SQLTestUtils with TestHiveSingleton with BeforeAndAfterEach { From 589e9955e42f5a23e24c20ec22098bc4182aceb9 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Fri, 20 Jan 2017 12:39:59 -0800 Subject: [PATCH 2/4] address comments. --- .../sql/execution/command/DDLSuite.scala | 60 ++++--------------- .../sql/hive/execution/HiveDDLSuite.scala | 33 ++++++++++ 2 files changed, 43 insertions(+), 50 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index a9adb15ae3d7..afb47159676d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -43,7 +43,7 @@ class InMemoryCatalogedDDLSuite extends DDLSuite with SharedSQLContext with Befo // drop all databases, tables and functions after each test spark.sessionState.catalog.reset() } finally { - Utils.deleteRecursively(new File("spark-warehouse")) + Utils.deleteRecursively(new File(spark.sessionState.conf.warehousePath)) super.afterEach() } } @@ -201,40 +201,9 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { private val escapedIdentifier = "`(.+)`".r - def normalizeCatalogTable(table: CatalogTable): CatalogTable = { - val nondeterministicProps = Set( - "CreateTime", - "transient_lastDdlTime", - "grantTime", - "lastUpdateTime", - "last_modified_by", - "last_modified_time", - "Owner:", - "COLUMN_STATS_ACCURATE", - // The following are hive specific schema parameters which we do not need to match exactly. - "numFiles", - "numRows", - "rawDataSize", - "totalSize", - "totalNumberFiles", - "maxFileSize", - "minFileSize", - // EXTERNAL is not non-deterministic, but it is filtered out for external tables. - "EXTERNAL" - ) - - table.copy( - createTime = 0L, - lastAccessTime = 0L, - owner = "", - properties = table.properties.filterKeys(!nondeterministicProps.contains(_)), - // View texts are checked separately - viewOriginalText = None, - viewText = None - ) - } + protected def normalizeCatalogTable(table: CatalogTable): CatalogTable = table - def normalizeSerdeProp(props: Map[String, String]): Map[String, String] = { + private def normalizeSerdeProp(props: Map[String, String]): Map[String, String] = { props.filterNot(p => Seq("serialization.format", "path").contains(p._1)) } @@ -283,12 +252,8 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { } private def getDatabaseLocation(dbName: String): String = { - val expectedLocation = if (isUsingHiveMetastore) { - val dbPath = new Path(spark.sessionState.conf.warehousePath) - s"${dbPath.toUri.getPath.stripSuffix("/")}/$dbName.db" - } else { - s"spark-warehouse/$dbName.db" - } + val dbPath = new Path(spark.sessionState.conf.warehousePath) + val expectedLocation = s"${dbPath.toUri.getPath.stripSuffix("/")}/$dbName.db" makeQualifiedPath(expectedLocation) } @@ -374,16 +339,11 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { getDatabaseLocation(dbNameWithoutBackTicks), Map.empty)) - if (isUsingHiveMetastore) { - val e = intercept[AnalysisException] { - sql(s"CREATE DATABASE $dbName") - }.getMessage - assert(e.contains(s"Database $dbNameWithoutBackTicks already exists")) - } else { - intercept[DatabaseAlreadyExistsException] { - sql(s"CREATE DATABASE $dbName") - } - } + // TODO: HiveExternalCatalog should throw DatabaseAlreadyExistsException + val e = intercept[AnalysisException] { + sql(s"CREATE DATABASE $dbName") + }.getMessage + assert(e.contains(s"already exists")) } finally { catalog.reset() } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index 3842a775f7ea..78a094fc56e5 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -75,6 +75,39 @@ class HiveCatalogedDDLSuite extends DDLSuite with TestHiveSingleton with BeforeA tracksPartitionsInCatalog = true) } + protected override def normalizeCatalogTable(table: CatalogTable): CatalogTable = { + val nondeterministicProps = Set( + "CreateTime", + "transient_lastDdlTime", + "grantTime", + "lastUpdateTime", + "last_modified_by", + "last_modified_time", + "Owner:", + "COLUMN_STATS_ACCURATE", + // The following are hive specific schema parameters which we do not need to match exactly. + "numFiles", + "numRows", + "rawDataSize", + "totalSize", + "totalNumberFiles", + "maxFileSize", + "minFileSize", + // EXTERNAL is not non-deterministic, but it is filtered out for external tables. + "EXTERNAL" + ) + + table.copy( + createTime = 0L, + lastAccessTime = 0L, + owner = "", + properties = table.properties.filterKeys(!nondeterministicProps.contains(_)), + // View texts are checked separately + viewOriginalText = None, + viewText = None + ) + } + } class HiveDDLSuite From 5fa51b5f1ccb09f196b2910b76306da05c289617 Mon Sep 17 00:00:00 2001 From: Xiao Li Date: Tue, 7 Mar 2017 16:59:01 -0800 Subject: [PATCH 3/4] address comments. --- .../sql/execution/command/DDLSuite.scala | 28 +++++++++---------- .../sql/hive/execution/HiveDDLSuite.scala | 4 +-- 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index 25a75e1a79fa..ecea1dba4a98 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -259,19 +259,21 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { fs.makeQualified(hadoopPath).toUri } + private def getDBPath(dbName: String): URI = { + val warehousePath = s"file:${spark.sessionState.conf.warehousePath.stripPrefix("file:")}" + new Path(warehousePath, s"$dbName.db").toUri + } + test("Create Database using Default Warehouse Path") { val catalog = spark.sessionState.catalog val dbName = "db1" try { sql(s"CREATE DATABASE $dbName") val db1 = catalog.getDatabaseMetadata(dbName) - val dbPath = new Path(spark.sessionState.conf.warehousePath) - val expectedDBLocation = s"file:${dbPath.toUri.getPath.stripSuffix("/")}/$dbName.db" - val expectedDBUri = CatalogUtils.stringToURI(expectedDBLocation) assert(db1 == CatalogDatabase( dbName, "", - expectedDBUri, // expectedLocation, + getDBPath(dbName), Map.empty)) sql(s"DROP DATABASE $dbName CASCADE") assert(!catalog.databaseExists(dbName)) @@ -314,15 +316,10 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { val dbNameWithoutBackTicks = cleanIdentifier(dbName) sql(s"CREATE DATABASE $dbName") val db1 = catalog.getDatabaseMetadata(dbNameWithoutBackTicks) - val dbPath = new Path(spark.sessionState.conf.warehousePath) - val expectedDBLocation = - s"file:${dbPath.toUri.getPath.stripSuffix("/")}/$dbNameWithoutBackTicks.db" - val expectedDBUri = CatalogUtils.stringToURI(expectedDBLocation) - assert(db1 == CatalogDatabase( dbNameWithoutBackTicks, "", - expectedDBUri, + getDBPath(dbNameWithoutBackTicks), Map.empty)) // TODO: HiveExternalCatalog should throw DatabaseAlreadyExistsException @@ -555,10 +552,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { databaseNames.foreach { dbName => try { val dbNameWithoutBackTicks = cleanIdentifier(dbName) - val dbPath = new Path(spark.sessionState.conf.warehousePath) - val expectedDBLocation = - s"file:${dbPath.toUri.getPath.stripSuffix("/")}/$dbNameWithoutBackTicks.db" - val location = CatalogUtils.stringToURI(expectedDBLocation) + val location = getDBPath(dbNameWithoutBackTicks) sql(s"CREATE DATABASE $dbName") @@ -602,6 +596,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { var message = intercept[AnalysisException] { sql(s"DROP DATABASE $dbName") }.getMessage + // TODO: Unify the exception. if (isUsingHiveMetastore) { assert(message.contains(s"NoSuchObjectException: $dbNameWithoutBackTicks")) } else { @@ -635,6 +630,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { val message = intercept[AnalysisException] { sql(s"DROP DATABASE $dbName RESTRICT") }.getMessage + // TODO: Unify the exception. if (isUsingHiveMetastore) { assert(message.contains(s"Database $dbName is not empty. One or more tables exist")) } else { @@ -968,6 +964,10 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { if (!isUsingHiveMetastore) { assert(catalog.getPartition(tableIdent, part1).parameters("numFiles") == "1") assert(catalog.getPartition(tableIdent, part2).parameters("numFiles") == "2") + } else { + // After ALTER TABLE, the statistics of the first partition is removed by Hive megastore + assert(catalog.getPartition(tableIdent, part1).parameters.get("numFiles").isEmpty) + assert(catalog.getPartition(tableIdent, part2).parameters("numFiles") == "2") } } finally { fs.delete(root, true) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index 115ea2b0b5ab..e35699ba2dbe 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -95,9 +95,7 @@ class HiveCatalogedDDLSuite extends DDLSuite with TestHiveSingleton with BeforeA "totalSize", "totalNumberFiles", "maxFileSize", - "minFileSize", - // EXTERNAL is not non-deterministic, but it is filtered out for external tables. - "EXTERNAL" + "minFileSize" ) table.copy( From 64a8f5a12231b158c6ad908640f3729d2bd646af Mon Sep 17 00:00:00 2001 From: Xiao Li Date: Tue, 7 Mar 2017 17:02:25 -0800 Subject: [PATCH 4/4] address comments. --- .../org/apache/spark/sql/execution/command/DDLSuite.scala | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index ecea1dba4a98..7fd626c61a4a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -50,13 +50,7 @@ class InMemoryCatalogedDDLSuite extends DDLSuite with SharedSQLContext with Befo catalog: SessionCatalog, name: TableIdentifier): CatalogTable = { val storage = - CatalogStorageFormat( - locationUri = Some(catalog.defaultTablePath(name)), - inputFormat = None, - outputFormat = None, - serde = None, - compressed = false, - properties = Map()) + CatalogStorageFormat.empty.copy(locationUri = Some(catalog.defaultTablePath(name))) val metadata = new MetadataBuilder() .putString("key", "value") .build()