diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala index 69dac7b062354..426db6a4e1c12 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala @@ -849,7 +849,12 @@ private[hive] object HiveClientImpl { throw new SparkException("Cannot recognize hive type string: " + hc.getType, e) } - val metadata = new MetadataBuilder().putString(HIVE_TYPE_STRING, hc.getType).build() + val metadata = if (hc.getType != columnType.catalogString) { + new MetadataBuilder().putString(HIVE_TYPE_STRING, hc.getType).build() + } else { + Metadata.empty + } + val field = StructField( name = hc.getName, dataType = columnType, diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala index 8140f883ee542..18137e7ea1d63 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala @@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.plans.logical.SubqueryAlias import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.{ExamplePointUDT, SQLTestUtils} -import org.apache.spark.sql.types.{DecimalType, IntegerType, StringType, StructField, StructType} +import org.apache.spark.sql.types._ class HiveMetastoreCatalogSuite extends TestHiveSingleton with SQLTestUtils { import spark.implicits._ @@ -67,6 +67,73 @@ class HiveMetastoreCatalogSuite extends TestHiveSingleton with SQLTestUtils { assert(aliases.size == 1) } } + + test("Validate catalog metadata for supported data types") { + withTable("t") { + sql( + """ + |CREATE TABLE t ( + |c1 boolean, + |c2 tinyint, + |c3 smallint, + |c4 short, + |c5 bigint, + |c6 long, + |c7 float, + |c8 double, + |c9 date, + |c10 timestamp, + |c11 string, + |c12 char(10), + |c13 varchar(10), + |c14 binary, + |c15 decimal, + |c16 decimal(10), + |c17 decimal(10,2), + |c18 array, + |c19 array, + |c20 array, + |c21 map, + |c22 map, + |c23 struct, + |c24 struct + |) + """.stripMargin) + + val schema = hiveClient.getTable("default", "t").schema + val expectedSchema = new StructType() + .add("c1", "boolean") + .add("c2", "tinyint") + .add("c3", "smallint") + .add("c4", "short") + .add("c5", "bigint") + .add("c6", "long") + .add("c7", "float") + .add("c8", "double") + .add("c9", "date") + .add("c10", "timestamp") + .add("c11", "string") + .add("c12", "string", true, + new MetadataBuilder().putString(HIVE_TYPE_STRING, "char(10)").build()) + .add("c13", "string", true, + new MetadataBuilder().putString(HIVE_TYPE_STRING, "varchar(10)").build()) + .add("c14", "binary") + .add("c15", "decimal") + .add("c16", "decimal(10)") + .add("c17", "decimal(10,2)") + .add("c18", "array") + .add("c19", "array") + .add("c20", "array", true, + new MetadataBuilder().putString(HIVE_TYPE_STRING, "array").build()) + .add("c21", "map") + .add("c22", "map", true, + new MetadataBuilder().putString(HIVE_TYPE_STRING, "map").build()) + .add("c23", "struct") + .add("c24", "struct", true, + new MetadataBuilder().putString(HIVE_TYPE_STRING, "struct").build()) + assert(schema == expectedSchema) + } + } } class DataSourceWithHiveMetastoreCatalogSuite @@ -180,5 +247,6 @@ class DataSourceWithHiveMetastoreCatalogSuite } } } + } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSchemaInferenceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSchemaInferenceSuite.scala index 3d0e43cbbe037..f2d27671094d7 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSchemaInferenceSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSchemaInferenceSuite.scala @@ -71,7 +71,7 @@ class HiveSchemaInferenceSuite name = field, dataType = LongType, nullable = true, - metadata = new MetadataBuilder().putString(HIVE_TYPE_STRING, "bigint").build()) + metadata = Metadata.empty) } // and all partition columns as ints val partitionStructFields = partitionCols.map { field => @@ -80,7 +80,7 @@ class HiveSchemaInferenceSuite name = field.toLowerCase, dataType = IntegerType, nullable = true, - metadata = new MetadataBuilder().putString(HIVE_TYPE_STRING, "int").build()) + metadata = Metadata.empty) } val schema = StructType(structFields ++ partitionStructFields) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala index 03e50e4119f6f..9ff9ecf7f3677 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala @@ -40,14 +40,7 @@ import org.apache.spark.sql.types._ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleton { - private def dropMetadata(schema: StructType): StructType = { - val newFields = schema.fields.map { f => - StructField(f.name, f.dataType, f.nullable, Metadata.empty) - } - StructType(newFields) - } - - test("Hive serde tables should fallback to HDFS for size estimation") { + test("Hive serde tables should fallback to HDFS for size estimation") { withSQLConf(SQLConf.ENABLE_FALL_BACK_TO_HDFS_FOR_STATS.key -> "true") { withTable("csv_table") { withTempDir { tempDir => @@ -138,9 +131,9 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto // Verify that the schema stored in catalog is a dummy one used for // data source tables. The actual schema is stored in table properties. - val rawSchema = dropMetadata(hiveClient.getTable("default", table).schema) - val expectedRawSchema = new StructType() - .add("col", "array") + val rawSchema = hiveClient.getTable("default", table).schema + val metadata = new MetadataBuilder().putString("comment", "from deserializer").build() + val expectedRawSchema = new StructType().add("col", "array", true, metadata) assert(rawSchema == expectedRawSchema) val actualSchema = spark.sharedState.externalCatalog.getTable("default", table).schema @@ -161,14 +154,13 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto } test("Analyze hive serde tables when schema is not same as schema in table properties") { - val table = "hive_serde" withTable(table) { sql(s"CREATE TABLE $table (C1 INT, C2 STRING, C3 DOUBLE)") // Verify that the table schema stored in hive catalog is // different than the schema stored in table properties. - val rawSchema = dropMetadata(hiveClient.getTable("default", table).schema) + val rawSchema = hiveClient.getTable("default", table).schema val expectedRawSchema = new StructType() .add("c1", "int") .add("c2", "string")