diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala index 3dc5aca5c88c..bde04cfd9fba 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala @@ -258,7 +258,9 @@ case class CatalogTable( * schema of this table's partition columns */ def partitionSchema: StructType = { - val partitionFields = schema.takeRight(partitionColumnNames.length) + val partitionFields = partitionColumnNames.map { partCol => + schema.find(_.name == partCol).get + } assert(partitionFields.map(_.name) == partitionColumnNames) StructType(partitionFields) @@ -268,7 +270,9 @@ case class CatalogTable( * schema of this table's data columns */ def dataSchema: StructType = { - val dataFields = schema.dropRight(partitionColumnNames.length) + val dataFields = schema.filterNot { i => + partitionColumnNames.contains(i.name) + } StructType(dataFields) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala index e64426f8de8f..c8a70dee0653 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala @@ -105,7 +105,7 @@ case class CreateDataSourceTableCommand(table: CatalogTable, ignoreIfExists: Boo case _ => table.copy( - schema = dataSource.schema, + schema = table.schema.merge(dataSource.schema), partitionColumnNames = partitionColumnNames, // If metastore partition management for file source tables is enabled, we start off with // partition provider hive, but no partitions in the metastore. The user has to call diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala index 327d92672db8..35494b4eff2d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala @@ -215,15 +215,7 @@ case class PreprocessTableCreation(sparkSession: SparkSession) extends Rule[Logi } else { DDLUtils.checkTableColumns(tableDesc) val normalizedTable = normalizeCatalogTable(tableDesc.schema, tableDesc) - - val partitionSchema = normalizedTable.partitionColumnNames.map { partCol => - normalizedTable.schema.find(_.name == partCol).get - } - - val reorderedSchema = - StructType(normalizedTable.schema.filterNot(partitionSchema.contains) ++ partitionSchema) - - c.copy(tableDesc = normalizedTable.copy(schema = reorderedSchema)) + c.copy(tableDesc = normalizedTable) } case create: V2CreateTablePlan if create.childrenResolved => diff --git a/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out b/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out index 00180eec5653..333216134114 100644 --- a/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out @@ -506,9 +506,9 @@ desc formatted char_part struct -- !query output c1 char(5) +c2 char(2) v1 varchar(6) v2 varchar(2) -c2 char(2) # Partition Information # col_name data_type comment v2 varchar(2) @@ -540,9 +540,9 @@ desc formatted char_part struct -- !query output c1 char(5) +c2 char(2) v1 varchar(6) v2 varchar(2) -c2 char(2) # Partition Information # col_name data_type comment v2 varchar(2) @@ -575,9 +575,9 @@ desc formatted char_part struct -- !query output c1 char(5) +c2 char(2) v1 varchar(6) v2 varchar(2) -c2 char(2) # Partition Information # col_name data_type comment v2 varchar(2) @@ -609,9 +609,9 @@ desc formatted char_part struct -- !query output c1 char(5) +c2 char(2) v1 varchar(6) v2 varchar(2) -c2 char(2) # Partition Information # col_name data_type comment v2 varchar(2) @@ -643,9 +643,9 @@ desc formatted char_part struct -- !query output c1 char(5) +c2 char(2) v1 varchar(6) v2 varchar(2) -c2 char(2) # Partition Information # col_name data_type comment v2 varchar(2) diff --git a/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out b/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out index e7399e45c357..65b6e2b4d1b4 100644 --- a/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out @@ -136,9 +136,9 @@ SHOW CREATE TABLE tbl struct -- !query output CREATE TABLE `default`.`tbl` ( + `a` INT, `b` STRING, - `c` INT, - `a` INT) + `c` INT) USING parquet PARTITIONED BY (a) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala index 13983120955f..120046bc8585 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala @@ -226,6 +226,27 @@ abstract class ShowCreateTableSuite extends QueryTest with SQLTestUtils { } } + test("SPARK-37517: Keep consistent columns order with user specified") { + val t = "SPARK_37517" + withTable(t) { + sql( + s""" + |CREATE TABLE $t ( + | a bigint NOT NULL, + | b bigint NOT NULL, + | c ARRAY, + | d STRUCT> + |) + |USING PARQUET + |PARTITIONED BY (a) + """.stripMargin) + val expected = s"CREATE TABLE `default`.`$t` (" + + s" `a` BIGINT NOT NULL, `b` BIGINT, `c` ARRAY," + + s" `d` STRUCT<`x`: INT, `y`: ARRAY>) USING PARQUET PARTITIONED BY (a)" + assert(getShowDDL(s"SHOW CREATE TABLE $t") == expected) + } + } + protected def getShowDDL(showCreateTableSql: String): String = { sql(showCreateTableSql).head().getString(0).split("\n").map(_.trim).mkString(" ") }