[CARMEL-7522][CARMEL-5229] Resolve table schema with upload CSV header in not case insensitive (apache#167)

wakun · GitHub Enterprise · commit 974074ef22c0 · 2024-02-20T09:21:08.000-06:00
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -658,7 +658,7 @@ case class UploadDataCommand(
 
       targetTable.schema.
         fields.
-        map(t => if (view.schema.fields.exists(_.name == t.name)) {
+        map(t => if (view.schema.fields.exists(_.name.equalsIgnoreCase(t.name))) {
           t.name
         } else {
           s"null as ${t.name}"
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/delta/DeltaQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/delta/DeltaQuerySuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.execution.datasources.delta
 
 import java.io.File
 
+import org.apache.hadoop.fs.FileSystem
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.SparkConf
@@ -32,7 +33,7 @@ import org.apache.spark.sql.delta.{DeltaLog, DeltaTableUtils}
 import org.apache.spark.sql.delta.catalog.DeltaCatalog
 import org.apache.spark.sql.execution.{CommandResultExec, FileSourceScanExec, SparkPlan}
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
-import org.apache.spark.sql.execution.command.{ExecutedCommandExec, RunnableCommand}
+import org.apache.spark.sql.execution.command.{ExecutedCommandExec, RunnableCommand, UploadDataCommand}
 import org.apache.spark.sql.execution.datasources.v2.{AppendDataExecV1, AtomicCreateTableAsSelectExec, AtomicReplaceTableAsSelectExec, CreateTableExec, OverwriteByExpressionExecV1}
 import org.apache.spark.sql.execution.metric.SQLMetric
 import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
@@ -3733,56 +3734,55 @@ class DeltaQuerySuite extends QueryTest
 //      }
 //    }
 //  }
-//
-//  test("CARMEL-5229 : upload - Resolve table schema with upload csv header in case insensitive") {
-//    withTempDir { dir =>
-//      Seq(true, false).foreach { ae =>
-//        withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> ae.toString,
-//          SQLConf.AUTO_REPARTITION_FOR_WRITING_ENABLED.key -> "true",
-//          SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "0",
-//          SQLConf.COALESCE_PARTITIONS_ENABLED.key -> "false",
-//          SQLConf.SHUFFLE_PARTITIONS.key -> "5") {
-//
-//          withTable( "dest") {
-//            sql("CREATE TABLE dest(Id BIGINT, NAME STRING, sex Boolean) USING DELTA")
-//
-//            val table = TableIdentifier("dest")
-//            val targetTable = getCatalogTable(table)
-//
-//            val path = new Path(dir.getCanonicalPath, ae.toString)
-//            spark.range(0, 5).map(x => (x, s"user_$x", x % 2==0)).toDF("ID", "name", "Sex").
-//              toDF().coalesce(1).write.option("header", true).csv(path.toString)
-//
-//            val uploadDataCommand =
-//              UploadDataCommand(table, "", true, None, Some(Map("header" -> "true")))
-//
-//            val fs = FileSystem.get(sparkContext.hadoopConfiguration)
-//            val defaultFs = FileSystem.getDefaultUri((sparkContext.hadoopConfiguration)).getScheme
-//            val isDefaultLocal = defaultFs == null || defaultFs == "file"
-//
-//            val files = fs.listStatus(new Path(path.toString)).
-//              filter(p => p.getPath.toString.endsWith(".csv")).map(p => p.getPath.toString)
-//            val csvFile = new Path(files(0))
-//            val targetPath = new Path(targetTable.storage.locationUri.get.getPath)
-//            val result = uploadDataCommand.performUpload(spark, fs, targetTable,
-//              csvFile, isDefaultLocal)
-//
-//            assert(result == targetPath)
-//            checkAnswer(
-//              sql("SELECT * FROM dest"),
-//              Seq(
-//                Row(0, "user_0", true),
-//                Row(1, "user_1", false),
-//                Row(2, "user_2", true),
-//                Row(3, "user_3", false),
-//                Row(4, "user_4", true)
-//              )
-//            )
-//          }
-//        }
-//      }
-//    }
-//  }
+
+  test("CARMEL-5229 : upload - Resolve table schema with upload csv header in case insensitive") {
+    withTempDir { dir =>
+      Seq(true, false).foreach { ae =>
+        withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> ae.toString,
+          SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "0",
+          SQLConf.COALESCE_PARTITIONS_ENABLED.key -> "false",
+          SQLConf.SHUFFLE_PARTITIONS.key -> "5") {
+
+          withTable( "dest") {
+            sql("CREATE TABLE dest(Id BIGINT, NAME STRING, sex Boolean) USING DELTA")
+
+            val table = TableIdentifier("dest")
+            val targetTable = getCatalogTable(table)
+
+            val path = new Path(dir.getCanonicalPath, ae.toString)
+            spark.range(0, 5).map(x => (x, s"user_$x", x % 2==0)).toDF("ID", "name", "Sex").
+              toDF().coalesce(1).write.option("header", true).csv(path.toString)
+
+            val uploadDataCommand =
+              UploadDataCommand(table, "", true, None, Map("header" -> "true"))
+
+            val fs = FileSystem.get(sparkContext.hadoopConfiguration)
+            val defaultFs = FileSystem.getDefaultUri((sparkContext.hadoopConfiguration)).getScheme
+            val isDefaultLocal = defaultFs == null || defaultFs == "file"
+
+            val files = fs.listStatus(new Path(path.toString)).
+              filter(p => p.getPath.toString.endsWith(".csv")).map(p => p.getPath.toString)
+            val csvFile = new Path(files(0))
+            val targetPath = new Path(targetTable.storage.locationUri.get.getPath)
+            val result = uploadDataCommand.performUpload(spark, fs, targetTable,
+              csvFile, isDefaultLocal)
+
+            assert(result == targetPath)
+            checkAnswer(
+              sql("SELECT * FROM dest"),
+              Seq(
+                Row(0, "user_0", true),
+                Row(1, "user_1", false),
+                Row(2, "user_2", true),
+                Row(3, "user_3", false),
+                Row(4, "user_4", true)
+              )
+            )
+          }
+        }
+      }
+    }
+  }
 
   test("CARMEL-5202: Fix NoSuchElementException when creating DELTA table") {
     withView("v1") {