From cd68cfc95d27b931d4ea6e99b88b4e57ee3e4ef2 Mon Sep 17 00:00:00 2001 From: Yann Date: Mon, 24 Jan 2022 17:51:09 +0800 Subject: [PATCH] [HUDI-3237] gracefully fail to change column data type --- .../org/apache/spark/sql/hudi/HoodieSqlCommonUtils.scala | 6 ++++++ .../hudi/command/AlterHoodieTableChangeColumnCommand.scala | 7 +++++++ .../hudi-spark/src/test/resources/sql-statements.sql | 4 ---- .../scala/org/apache/spark/sql/hudi/TestAlterTable.scala | 7 ++++--- 4 files changed, 17 insertions(+), 7 deletions(-) diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/HoodieSqlCommonUtils.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/HoodieSqlCommonUtils.scala index 4901c0d39117d..d9180591c9e52 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/HoodieSqlCommonUtils.scala +++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/HoodieSqlCommonUtils.scala @@ -312,4 +312,10 @@ object HoodieSqlCommonUtils extends SparkAdapterSupport { case field if resolver(field.name, name) => field } } + + // Compare a [[StructField]] to another, return true if they have the same column + // name(by resolver) and dataType. + def columnEqual(field: StructField, other: StructField, resolver: Resolver): Boolean = { + resolver(field.name, other.name) && field.dataType == other.dataType + } } diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/AlterHoodieTableChangeColumnCommand.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/AlterHoodieTableChangeColumnCommand.scala index befda70680f85..3aa5ca945486e 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/AlterHoodieTableChangeColumnCommand.scala +++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/AlterHoodieTableChangeColumnCommand.scala @@ -49,6 +49,13 @@ case class AlterHoodieTableChangeColumnCommand( throw new AnalysisException(s"Can't find column `$columnName` given table data columns " + s"${hoodieCatalogTable.dataSchema.fieldNames.mkString("[`", "`, `", "`]")}") ) + // Throw an AnalysisException if the column name/dataType is changed. + if (!columnEqual(originColumn, newColumn, resolver)) { + throw new AnalysisException( + "ALTER TABLE CHANGE COLUMN is not supported for changing column " + + s"'${originColumn.name}' with type '${originColumn.dataType}' to " + + s"'${newColumn.name}' with type '${newColumn.dataType}'") + } // Get the new schema val newTableSchema = StructType( diff --git a/hudi-spark-datasource/hudi-spark/src/test/resources/sql-statements.sql b/hudi-spark-datasource/hudi-spark/src/test/resources/sql-statements.sql index 135c83b4b975e..e19dd1eb6b8ba 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/resources/sql-statements.sql +++ b/hudi-spark-datasource/hudi-spark/src/test/resources/sql-statements.sql @@ -239,10 +239,6 @@ alter table h2_p add columns(ext0 int); +----------+ | ok | +----------+ -alter table h2_p change column ext0 ext0 bigint; -+----------+ -| ok | -+----------+ # DROP TABLE drop table h0; diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestAlterTable.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestAlterTable.scala index 469b135959846..0f2cb547c2fe9 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestAlterTable.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestAlterTable.scala @@ -91,9 +91,10 @@ class TestAlterTable extends TestHoodieSqlBase { ) // change column's data type - spark.sql(s"alter table $newTableName change column id id bigint") - assertResult(StructType(Seq(StructField("id", LongType, nullable = true))))( - spark.sql(s"select id from $newTableName").schema) + checkExceptionContain(s"alter table $newTableName change column id id bigint") ( + "ALTER TABLE CHANGE COLUMN is not supported for changing column 'id'" + + " with type 'IntegerType' to 'id' with type 'LongType'" + ) // Insert data to the new table. spark.sql(s"insert into $newTableName values(2, 'a2', 12, 1000, 'e0')")