diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala index ebef0b95bb08..ea86cd95e1c7 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala @@ -1092,14 +1092,23 @@ private[hive] object HiveClientImpl extends Logging { hiveTable.setViewExpandedText(t) } + // hive may convert schema into lower cases while bucketSpec will not + // only convert if case not match + def restoreHiveBucketSpecColNames(schema: StructType, names: Seq[String]): Seq[String] = { + names.map { name => + schema.find(col => SQLConf.get.resolver(col.name, name)).map(_.name).getOrElse(name) + } + } + table.bucketSpec match { case Some(bucketSpec) if !HiveExternalCatalog.isDatasourceTable(table) => hiveTable.setNumBuckets(bucketSpec.numBuckets) - hiveTable.setBucketCols(bucketSpec.bucketColumnNames.toList.asJava) + hiveTable.setBucketCols( + restoreHiveBucketSpecColNames(table.schema, bucketSpec.bucketColumnNames).toList.asJava) if (bucketSpec.sortColumnNames.nonEmpty) { hiveTable.setSortCols( - bucketSpec.sortColumnNames + restoreHiveBucketSpecColNames(table.schema, bucketSpec.sortColumnNames) .map(col => new Order(col, HIVE_COLUMN_ORDER_ASC)) .toList .asJava diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala index b715f484fa02..c0c1dddc8cb2 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala @@ -18,6 +18,7 @@ package org.apache.spark.sql.hive import java.io.File +import java.util.Locale import com.google.common.io.Files import org.apache.hadoop.fs.Path @@ -870,4 +871,68 @@ class InsertSuite extends QueryTest with TestHiveSingleton with BeforeAndAfter assert(e.contains("Partition spec is invalid")) } } + + test("SPARK-35531 Insert data with different cases") { + withTable("TEST1") { + val createHive = + """ + |create table TEST1( + |v1 BIGINT, + |s1 INT) + |partitioned by (pk BIGINT) + |clustered by (v1) + |sorted by (s1) + |into 200 buckets + |STORED AS PARQUET + |""".stripMargin + + val insertString = + """ + |insert into test1 + |select + |* from values(1,1,1) + |""".stripMargin + + val dropString = "drop table if exists test1" + + + spark.sql(dropString) + spark.sql(createHive.toLowerCase(Locale.ROOT)) + + spark.sql(insertString.toLowerCase(Locale.ROOT)) + spark.sql(insertString.toUpperCase(Locale.ROOT)) + + spark.sql(dropString) + spark.sql(createHive.toUpperCase(Locale.ROOT)) + + spark.sql(insertString.toLowerCase(Locale.ROOT)) + spark.sql(insertString.toUpperCase(Locale.ROOT)) + + val createSpark = + """ + |create table TEST1( + |v1 BIGINT, + |s1 INT) + |using parquet + |partitioned by (pk BIGINT) + |clustered by (v1) + |sorted by (s1) + |into 200 buckets + | + |""".stripMargin + + spark.sql(dropString) + spark.sql(createSpark.toLowerCase(Locale.ROOT)) + + spark.sql(insertString.toLowerCase(Locale.ROOT)) + spark.sql(insertString.toUpperCase(Locale.ROOT)) + + + spark.sql(dropString) + spark.sql(createSpark.toUpperCase(Locale.ROOT)) + + spark.sql(insertString.toLowerCase(Locale.ROOT)) + spark.sql(insertString.toUpperCase(Locale.ROOT)) + } + } }