diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala index 31f28f205fef..ec65bf7107d4 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala @@ -17,14 +17,12 @@ package org.apache.spark.sql.hive -import org.apache.hadoop.hive.conf.HiveConf.ConfVars - import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.analysis.Analyzer import org.apache.spark.sql.execution.SparkPlanner import org.apache.spark.sql.execution.datasources._ import org.apache.spark.sql.hive.client.HiveClient -import org.apache.spark.sql.internal.SessionState +import org.apache.spark.sql.internal.{SessionState, SQLConf} /** @@ -58,6 +56,11 @@ private[hive] class HiveSessionState(sparkSession: SparkSession) newHadoopConf()) } + override lazy val conf: SQLConf = new SQLConf { + // Hive-backed catalog is case incensitive + override def caseSensitiveAnalysis: Boolean = false + } + /** * An analyzer that uses the Hive metastore. */ diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala index 6d418c1dcf46..3bb6216e8f7b 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala @@ -519,7 +519,8 @@ private[hive] class TestHiveSessionState( override lazy val conf: SQLConf = { new SQLConf { clear() - override def caseSensitiveAnalysis: Boolean = getConf(SQLConf.CASE_SENSITIVE, false) + // Hive-backed catalog is case incensitive + override def caseSensitiveAnalysis: Boolean = false override def clear(): Unit = { super.clear() TestHiveContext.overrideConfs.foreach { case (k, v) => setConfString(k, v) } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index d55ddb251d00..367b24743748 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -22,6 +22,7 @@ import java.io.File import org.apache.hadoop.fs.Path import org.scalatest.BeforeAndAfterEach +import org.apache.spark.internal.config._ import org.apache.spark.sql.{AnalysisException, QueryTest, SaveMode} import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, CatalogTableType} import org.apache.spark.sql.catalyst.TableIdentifier @@ -56,6 +57,34 @@ class HiveDDLSuite fs.exists(filesystemPath) } + test("case insensitive tables") { + assert(hiveContext.sparkContext.conf.get(CATALOG_IMPLEMENTATION) == "hive") + Seq("true", "false").foreach { caseSensitive => + withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive) { + val tabName = "tAb1" + val tabNameToLowerCase = tabName.toLowerCase + withTable(tabName) { + // use a column name in upper case. + val colName = "C1" + assert(!tableDirectoryExists(TableIdentifier(tabNameToLowerCase))) + sql(s"CREATE TABLE $tabName($colName int)") + + // create another table in the lower case. + val message = intercept[AnalysisException] { + sql(s"CREATE TABLE $tabNameToLowerCase($colName int)") + }.getMessage + assert(message.contains("AlreadyExistsException(message:Table tab1 already exists)")) + + // use the table's column names in upper case. + sql(s"SELECT $colName FROM $tabName") + assert(tableDirectoryExists(TableIdentifier(tabNameToLowerCase))) + sql(s"DROP TABLE $tabName") + assert(!tableDirectoryExists(TableIdentifier(tabNameToLowerCase))) + } + } + } + } + test("drop tables") { withTable("tab1") { val tabName = "tab1" diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index 4845da7c853c..d5a860521e7e 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -21,6 +21,7 @@ import java.sql.{Date, Timestamp} import org.apache.hadoop.fs.Path +import org.apache.spark.internal.config.CATALOG_IMPLEMENTATION import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis.{EliminateSubqueryAliases, FunctionRegistry} @@ -29,6 +30,7 @@ import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRela import org.apache.spark.sql.functions._ import org.apache.spark.sql.hive.{HiveUtils, MetastoreRelation} import org.apache.spark.sql.hive.test.TestHiveSingleton +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SQLTestUtils import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.CalendarInterval @@ -892,21 +894,26 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton { Row(0, "1") :: Row(22, "2") :: Row(0, "3") :: Row(44, "4") :: Row(0, "5") :: Nil) } - test("SPARK-7269 Check analysis failed in case in-sensitive") { - Seq(1, 2, 3).map { i => - (i.toString, i.toString) - }.toDF("key", "value").registerTempTable("df_analysis") - sql("SELECT kEy from df_analysis group by key").collect() - sql("SELECT kEy+3 from df_analysis group by key+3").collect() - sql("SELECT kEy+3, a.kEy, A.kEy from df_analysis A group by key").collect() - sql("SELECT cast(kEy+1 as Int) from df_analysis A group by cast(key+1 as int)").collect() - sql("SELECT cast(kEy+1 as Int) from df_analysis A group by key+1").collect() - sql("SELECT 2 from df_analysis A group by key+1").collect() - intercept[AnalysisException] { - sql("SELECT kEy+1 from df_analysis group by key+3") - } - intercept[AnalysisException] { - sql("SELECT cast(key+2 as Int) from df_analysis A group by cast(key+1 as int)") + test("Check analysis failed due to case in-sensitive in Hive") { + assert(sparkSession.sparkContext.conf.get(CATALOG_IMPLEMENTATION) == "hive") + Seq("true", "false").foreach { caseSensitive => + withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive) { + Seq(1, 2, 3).map { i => + (i.toString, i.toString) + }.toDF("key", "value").registerTempTable("df_analysis") + sql("SELECT kEy from df_analysis group by key").collect() + sql("SELECT kEy+3 from df_analysis group by key+3").collect() + sql("SELECT kEy+3, a.kEy, A.kEy from df_analysis A group by key").collect() + sql("SELECT cast(kEy+1 as Int) from df_analysis A group by cast(key+1 as int)").collect() + sql("SELECT cast(kEy+1 as Int) from df_analysis A group by key+1").collect() + sql("SELECT 2 from df_analysis A group by key+1").collect() + intercept[AnalysisException] { + sql("SELECT kEy+1 from df_analysis group by key+3") + } + intercept[AnalysisException] { + sql("SELECT cast(key+2 as Int) from df_analysis A group by cast(key+1 as int)") + } + } } }