apache · Peng-Lei · Dec 2, 2021 · LuciferYang · Dec 3, 2021 · LuciferYang
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -258,7 +258,9 @@ case class CatalogTable(
    * schema of this table's partition columns
    */
   def partitionSchema: StructType = {
-    val partitionFields = schema.takeRight(partitionColumnNames.length)
+    val partitionFields = partitionColumnNames.map { partCol =>
+      schema.find(_.name == partCol).get
+    }
     assert(partitionFields.map(_.name) == partitionColumnNames)
 
     StructType(partitionFields)
@@ -268,7 +270,9 @@ case class CatalogTable(
    * schema of this table's data columns
    */
   def dataSchema: StructType = {
-    val dataFields = schema.dropRight(partitionColumnNames.length)
+    val dataFields = schema.filterNot { i =>
+      partitionColumnNames.contains(i.name)
+    }
     StructType(dataFields)
   }
 

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
@@ -105,7 +105,7 @@ case class CreateDataSourceTableCommand(table: CatalogTable, ignoreIfExists: Boo
 
       case _ =>
         table.copy(
-          schema = dataSource.schema,
+          schema = table.schema.merge(dataSource.schema),
           partitionColumnNames = partitionColumnNames,
           // If metastore partition management for file source tables is enabled, we start off with
           // partition provider hive, but no partitions in the metastore. The user has to call

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
@@ -215,15 +215,7 @@ case class PreprocessTableCreation(sparkSession: SparkSession) extends Rule[Logi
       } else {
         DDLUtils.checkTableColumns(tableDesc)
         val normalizedTable = normalizeCatalogTable(tableDesc.schema, tableDesc)
-
-        val partitionSchema = normalizedTable.partitionColumnNames.map { partCol =>
-          normalizedTable.schema.find(_.name == partCol).get
-        }
-
-        val reorderedSchema =
-          StructType(normalizedTable.schema.filterNot(partitionSchema.contains) ++ partitionSchema)
-
-        c.copy(tableDesc = normalizedTable.copy(schema = reorderedSchema))
+        c.copy(tableDesc = normalizedTable)
       }
 
     case create: V2CreateTablePlan if create.childrenResolved =>

diff --git a/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out b/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out
@@ -506,9 +506,9 @@ desc formatted char_part
 struct<col_name:string,data_type:string,comment:string>
 -- !query output
 c1                  	char(5)             	                    
+c2                  	char(2)             	                    
 v1                  	varchar(6)          	                    
 v2                  	varchar(2)          	                    
-c2                  	char(2)             	                    
 # Partition Information	                    	                    
 # col_name          	data_type           	comment             
 v2                  	varchar(2)          	                    
@@ -540,9 +540,9 @@ desc formatted char_part
 struct<col_name:string,data_type:string,comment:string>
 -- !query output
 c1                  	char(5)             	                    
+c2                  	char(2)             	                    
 v1                  	varchar(6)          	                    
 v2                  	varchar(2)          	                    
-c2                  	char(2)             	                    
 # Partition Information	                    	                    
 # col_name          	data_type           	comment             
 v2                  	varchar(2)          	                    
@@ -575,9 +575,9 @@ desc formatted char_part
 struct<col_name:string,data_type:string,comment:string>
 -- !query output
 c1                  	char(5)             	                    
+c2                  	char(2)             	                    
 v1                  	varchar(6)          	                    
 v2                  	varchar(2)          	                    
-c2                  	char(2)             	                    
 # Partition Information	                    	                    
 # col_name          	data_type           	comment             
 v2                  	varchar(2)          	                    
@@ -609,9 +609,9 @@ desc formatted char_part
 struct<col_name:string,data_type:string,comment:string>
 -- !query output
 c1                  	char(5)             	                    
+c2                  	char(2)             	                    
 v1                  	varchar(6)          	                    
 v2                  	varchar(2)          	                    
-c2                  	char(2)             	                    
 # Partition Information	                    	                    
 # col_name          	data_type           	comment             
 v2                  	varchar(2)          	                    
@@ -643,9 +643,9 @@ desc formatted char_part
 struct<col_name:string,data_type:string,comment:string>
 -- !query output
 c1                  	char(5)             	                    
+c2                  	char(2)             	                    
 v1                  	varchar(6)          	                    
 v2                  	varchar(2)          	                    
-c2                  	char(2)             	                    
 # Partition Information	                    	                    
 # col_name          	data_type           	comment             
 v2                  	varchar(2)          	                    

diff --git a/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out b/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out
@@ -136,9 +136,9 @@ SHOW CREATE TABLE tbl
 struct<createtab_stmt:string>
 -- !query output
 CREATE TABLE `default`.`tbl` (
+  `a` INT,
   `b` STRING,
-  `c` INT,
-  `a` INT)
+  `c` INT)
 USING parquet
 PARTITIONED BY (a)
 

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala
@@ -226,6 +226,27 @@ abstract class ShowCreateTableSuite extends QueryTest with SQLTestUtils {
     }
   }
 
+  test("SPARK-37517: Keep consistent columns order with user specified") {
+    val t = "SPARK_37517"
+    withTable(t) {
+      sql(
+        s"""
+           |CREATE TABLE $t (
+           |  a bigint NOT NULL,
+           |  b bigint NOT NULL,
+           |  c ARRAY<INT>,
+           |  d STRUCT<x: INT, y: ARRAY<BOOLEAN>>
+           |)
+           |USING PARQUET
+           |PARTITIONED BY (a)
+        """.stripMargin)
+      val expected = s"CREATE TABLE `default`.`$t` (" +
+        s" `a` BIGINT NOT NULL, `b` BIGINT, `c` ARRAY<INT>," +
+        s" `d` STRUCT<`x`: INT, `y`: ARRAY<BOOLEAN>>) USING PARQUET PARTITIONED BY (a)"
+      assert(getShowDDL(s"SHOW CREATE TABLE $t") == expected)
+    }
+  }
+
   protected def getShowDDL(showCreateTableSql: String): String = {
     sql(showCreateTableSql).head().getString(0).split("\n").map(_.trim).mkString(" ")
   }