Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -849,7 +849,12 @@ private[hive] object HiveClientImpl {
throw new SparkException("Cannot recognize hive type string: " + hc.getType, e)
}

val metadata = new MetadataBuilder().putString(HIVE_TYPE_STRING, hc.getType).build()
val metadata = if (hc.getType != columnType.catalogString) {
new MetadataBuilder().putString(HIVE_TYPE_STRING, hc.getType).build()
} else {
Metadata.empty
}

val field = StructField(
name = hc.getName,
dataType = columnType,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.plans.logical.SubqueryAlias
import org.apache.spark.sql.hive.test.TestHiveSingleton
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.test.{ExamplePointUDT, SQLTestUtils}
import org.apache.spark.sql.types.{DecimalType, IntegerType, StringType, StructField, StructType}
import org.apache.spark.sql.types._

class HiveMetastoreCatalogSuite extends TestHiveSingleton with SQLTestUtils {
import spark.implicits._
Expand Down Expand Up @@ -67,6 +67,73 @@ class HiveMetastoreCatalogSuite extends TestHiveSingleton with SQLTestUtils {
assert(aliases.size == 1)
}
}

test("Validate catalog metadata for supported data types") {
withTable("t") {
sql(
"""
|CREATE TABLE t (
|c1 boolean,
|c2 tinyint,
|c3 smallint,
|c4 short,
|c5 bigint,
|c6 long,
|c7 float,
|c8 double,
|c9 date,
|c10 timestamp,
|c11 string,
|c12 char(10),
|c13 varchar(10),
|c14 binary,
|c15 decimal,
|c16 decimal(10),
|c17 decimal(10,2),
|c18 array<string>,
|c19 array<int>,
|c20 array<char(10)>,
|c21 map<int,int>,
|c22 map<int,char(10)>,
|c23 struct<a:int,b:int>,
|c24 struct<c:varchar(10),d:int>
|)
""".stripMargin)

val schema = hiveClient.getTable("default", "t").schema
val expectedSchema = new StructType()
.add("c1", "boolean")
.add("c2", "tinyint")
.add("c3", "smallint")
.add("c4", "short")
.add("c5", "bigint")
.add("c6", "long")
.add("c7", "float")
.add("c8", "double")
.add("c9", "date")
.add("c10", "timestamp")
.add("c11", "string")
.add("c12", "string", true,
new MetadataBuilder().putString(HIVE_TYPE_STRING, "char(10)").build())
.add("c13", "string", true,
new MetadataBuilder().putString(HIVE_TYPE_STRING, "varchar(10)").build())
.add("c14", "binary")
.add("c15", "decimal")
.add("c16", "decimal(10)")
.add("c17", "decimal(10,2)")
.add("c18", "array<string>")
.add("c19", "array<int>")
.add("c20", "array<string>", true,
new MetadataBuilder().putString(HIVE_TYPE_STRING, "array<char(10)>").build())
.add("c21", "map<int,int>")
.add("c22", "map<int,string>", true,
new MetadataBuilder().putString(HIVE_TYPE_STRING, "map<int,char(10)>").build())
.add("c23", "struct<a:int,b:int>")
.add("c24", "struct<c:string,d:int>", true,
new MetadataBuilder().putString(HIVE_TYPE_STRING, "struct<c:varchar(10),d:int>").build())
assert(schema == expectedSchema)
}
}
}

class DataSourceWithHiveMetastoreCatalogSuite
Expand Down Expand Up @@ -180,5 +247,6 @@ class DataSourceWithHiveMetastoreCatalogSuite
}
}
}

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ class HiveSchemaInferenceSuite
name = field,
dataType = LongType,
nullable = true,
metadata = new MetadataBuilder().putString(HIVE_TYPE_STRING, "bigint").build())
metadata = Metadata.empty)
}
// and all partition columns as ints
val partitionStructFields = partitionCols.map { field =>
Expand All @@ -80,7 +80,7 @@ class HiveSchemaInferenceSuite
name = field.toLowerCase,
dataType = IntegerType,
nullable = true,
metadata = new MetadataBuilder().putString(HIVE_TYPE_STRING, "int").build())
metadata = Metadata.empty)
}
val schema = StructType(structFields ++ partitionStructFields)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,7 @@ import org.apache.spark.sql.types._


class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleton {
private def dropMetadata(schema: StructType): StructType = {
val newFields = schema.fields.map { f =>
StructField(f.name, f.dataType, f.nullable, Metadata.empty)
}
StructType(newFields)
}

test("Hive serde tables should fallback to HDFS for size estimation") {
test("Hive serde tables should fallback to HDFS for size estimation") {
withSQLConf(SQLConf.ENABLE_FALL_BACK_TO_HDFS_FOR_STATS.key -> "true") {
withTable("csv_table") {
withTempDir { tempDir =>
Expand Down Expand Up @@ -138,9 +131,9 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto

// Verify that the schema stored in catalog is a dummy one used for
// data source tables. The actual schema is stored in table properties.
val rawSchema = dropMetadata(hiveClient.getTable("default", table).schema)
val expectedRawSchema = new StructType()
.add("col", "array<string>")
val rawSchema = hiveClient.getTable("default", table).schema
val metadata = new MetadataBuilder().putString("comment", "from deserializer").build()
val expectedRawSchema = new StructType().add("col", "array<string>", true, metadata)
assert(rawSchema == expectedRawSchema)

val actualSchema = spark.sharedState.externalCatalog.getTable("default", table).schema
Expand All @@ -161,14 +154,13 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
}

test("Analyze hive serde tables when schema is not same as schema in table properties") {

val table = "hive_serde"
withTable(table) {
sql(s"CREATE TABLE $table (C1 INT, C2 STRING, C3 DOUBLE)")

// Verify that the table schema stored in hive catalog is
// different than the schema stored in table properties.
val rawSchema = dropMetadata(hiveClient.getTable("default", table).schema)
val rawSchema = hiveClient.getTable("default", table).schema
val expectedRawSchema = new StructType()
.add("c1", "int")
.add("c2", "string")
Expand Down