apache · rdblue · Jun 22, 2019 · Jun 27, 2019 · Jul 11, 2019 · brkyvz
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/TableChange.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/TableChange.java
@@ -227,14 +227,18 @@ public String property() {
     }
   }
 
+  interface ColumnChange extends TableChange {
+    String[] fieldNames();
+  }
+
   /**
    * A TableChange to add a field.
    * <p>
    * If the field already exists, the change must result in an {@link IllegalArgumentException}.
    * If the new field is nested and its parent does not exist or is not a struct, the change must
    * result in an {@link IllegalArgumentException}.
    */
-  final class AddColumn implements TableChange {
+  final class AddColumn implements ColumnChange {
     private final String[] fieldNames;
     private final DataType dataType;
     private final boolean isNullable;
@@ -247,6 +251,7 @@ private AddColumn(String[] fieldNames, DataType dataType, boolean isNullable, St
       this.comment = comment;
     }
 
+    @Override
     public String[] fieldNames() {
       return fieldNames;
     }
@@ -272,7 +277,7 @@ public String comment() {
    * <p>
    * If the field does not exist, the change must result in an {@link IllegalArgumentException}.
    */
-  final class RenameColumn implements TableChange {
+  final class RenameColumn implements ColumnChange {
     private final String[] fieldNames;
     private final String newName;
 
@@ -281,6 +286,7 @@ private RenameColumn(String[] fieldNames, String newName) {
       this.newName = newName;
     }
 
+    @Override
     public String[] fieldNames() {
       return fieldNames;
     }
@@ -297,7 +303,7 @@ public String newName() {
    * <p>
    * If the field does not exist, the change must result in an {@link IllegalArgumentException}.
    */
-  final class UpdateColumnType implements TableChange {
+  final class UpdateColumnType implements ColumnChange {
     private final String[] fieldNames;
     private final DataType newDataType;
     private final boolean isNullable;
@@ -308,6 +314,7 @@ private UpdateColumnType(String[] fieldNames, DataType newDataType, boolean isNu
       this.isNullable = isNullable;
     }
 
+    @Override
     public String[] fieldNames() {
       return fieldNames;
     }
@@ -328,7 +335,7 @@ public boolean isNullable() {
    * <p>
    * If the field does not exist, the change must result in an {@link IllegalArgumentException}.
    */
-  final class UpdateColumnComment implements TableChange {
+  final class UpdateColumnComment implements ColumnChange {
     private final String[] fieldNames;
     private final String newComment;
 
@@ -337,6 +344,7 @@ private UpdateColumnComment(String[] fieldNames, String newComment) {
       this.newComment = newComment;
     }
 
+    @Override
     public String[] fieldNames() {
       return fieldNames;
     }
@@ -351,13 +359,14 @@ public String newComment() {
    * <p>
    * If the field does not exist, the change must result in an {@link IllegalArgumentException}.
    */
-  final class DeleteColumn implements TableChange {
+  final class DeleteColumn implements ColumnChange {
     private final String[] fieldNames;
 
     private DeleteColumn(String[] fieldNames) {
       this.fieldNames = fieldNames;
     }
 
+    @Override
     public String[] fieldNames() {
       return fieldNames;
     }

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/utils/CatalogV2Util.scala b/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/utils/CatalogV2Util.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.catalog.v2.{CatalogPlugin, Identifier, TableChange}
 import org.apache.spark.sql.catalog.v2.TableChange.{AddColumn, DeleteColumn, RemoveProperty, RenameColumn, SetProperty, UpdateColumnComment, UpdateColumnType}
 import org.apache.spark.sql.catalyst.analysis.NoSuchTableException
 import org.apache.spark.sql.sources.v2.Table
-import org.apache.spark.sql.types.{StructField, StructType}
+import org.apache.spark.sql.types.{ArrayType, MapType, StructField, StructType}
 
 object CatalogV2Util {
   import org.apache.spark.sql.catalog.v2.CatalogV2Implicits._
@@ -132,16 +132,45 @@ object CatalogV2Util {
     val pos = struct.getFieldIndex(fieldNames.head)
         .getOrElse(throw new IllegalArgumentException(s"Cannot find field: ${fieldNames.head}"))
     val field = struct.fields(pos)
-    val replacement: Option[StructField] = if (fieldNames.tail.isEmpty) {
-      update(field)
-    } else {
-      field.dataType match {
-        case nestedStruct: StructType =>
-          val updatedType: StructType = replace(nestedStruct, fieldNames.tail, update)
-          Some(StructField(field.name, updatedType, field.nullable, field.metadata))
-        case _ =>
-          throw new IllegalArgumentException(s"Not a struct: ${fieldNames.head}")
-      }
+    val replacement: Option[StructField] = (fieldNames.tail, field.dataType) match {
+      case (Seq(), _) =>
+        update(field)
+
+      case (names, struct: StructType) =>
+        val updatedType: StructType = replace(struct, names, update)
+        Some(StructField(field.name, updatedType, field.nullable, field.metadata))
+
+      case (Seq("key"), map @ MapType(keyType, _, _)) =>
+        val updated = update(StructField("key", keyType, nullable = false))
+            .getOrElse(throw new IllegalArgumentException(s"Cannot delete map key"))
+        Some(field.copy(dataType = map.copy(keyType = updated.dataType)))
+
+      case (Seq("key", names @ _*), map @ MapType(keyStruct: StructType, _, _)) =>
+        Some(field.copy(dataType = map.copy(keyType = replace(keyStruct, names, update))))
+
+      case (Seq("value"), map @ MapType(_, mapValueType, isNullable)) =>
+        val updated = update(StructField("value", mapValueType, nullable = isNullable))
+            .getOrElse(throw new IllegalArgumentException(s"Cannot delete map value"))
+        Some(field.copy(dataType = map.copy(
+          valueType = updated.dataType,
+          valueContainsNull = updated.nullable)))
+
+      case (Seq("value", names @ _*), map @ MapType(_, valueStruct: StructType, _)) =>
+        Some(field.copy(dataType = map.copy(valueType = replace(valueStruct, names, update))))
+
+      case (Seq("element"), array @ ArrayType(elementType, isNullable)) =>
+        val updated = update(StructField("element", elementType, nullable = isNullable))
+            .getOrElse(throw new IllegalArgumentException(s"Cannot delete array element"))
+        Some(field.copy(dataType = array.copy(
+          elementType = updated.dataType,
+          containsNull = updated.nullable)))
+
+      case (Seq("element", names @ _*), array @ ArrayType(elementStruct: StructType, _)) =>
+        Some(field.copy(dataType = array.copy(elementType = replace(elementStruct, names, update))))
+
+      case (names, dataType) =>
+        throw new IllegalArgumentException(
+          s"Cannot find field: ${names.head} in ${dataType.simpleString}")
     }
 
     val newFields = struct.fields.zipWithIndex.flatMap {

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -24,7 +24,7 @@ import scala.collection.mutable.ArrayBuffer
 import scala.util.Random
 
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalog.v2.{CatalogNotFoundException, CatalogPlugin, LookupCatalog}
+import org.apache.spark.sql.catalog.v2.{CatalogNotFoundException, CatalogPlugin, LookupCatalog, TableChange}
 import org.apache.spark.sql.catalyst._
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.encoders.OuterScopes
@@ -34,6 +34,7 @@ import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.expressions.objects._
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.plans.logical.sql.{AlterTableAddColumnsStatement, AlterTableAlterColumnStatement, AlterTableDropColumnsStatement, AlterTableRenameColumnStatement, AlterTableSetLocationStatement, AlterTableSetPropertiesStatement, AlterTableUnsetPropertiesStatement}
 import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.catalyst.trees.TreeNodeRef
 import org.apache.spark.sql.catalyst.util.toPrettySQL
@@ -165,6 +166,7 @@ class Analyzer(
       new SubstituteUnresolvedOrdinals(conf)),
     Batch("Resolution", fixedPoint,
       ResolveTableValuedFunctions ::
+      ResolveAlterTable ::
       ResolveTables ::
       ResolveRelations ::
       ResolveReferences ::
@@ -787,6 +789,86 @@ class Analyzer(
     }
   }
 
+  /**
+   * Resolve ALTER TABLE statements that use a DSv2 catalog.
+   *
+   * This rule converts unresolved ALTER TABLE statements to v2 when a v2 catalog is responsible
+   * for the table identifier. A v2 catalog is responsible for an identifier when the identifier
+   * has a catalog specified, like prod_catalog.db.table, or when a default v2 catalog is set and
+   * the table identifier does not include a catalog.
+   */
+  object ResolveAlterTable extends Rule[LogicalPlan] {
+    import org.apache.spark.sql.catalog.v2.CatalogV2Implicits._
+    override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
+      case alter @ AlterTableAddColumnsStatement(
+          CatalogObjectIdentifier(Some(v2Catalog), ident), cols) =>
+        val changes = cols.map { col =>
+          TableChange.addColumn(col.name.toArray, col.dataType, true, col.comment.orNull)
+        }
+
+        AlterTable(
+          v2Catalog.asTableCatalog, ident,
+          UnresolvedRelation(alter.tableName),
+          changes)
+
+      case alter @ AlterTableAlterColumnStatement(
+          CatalogObjectIdentifier(Some(v2Catalog), ident), colName, dataType, comment) =>
+        val typeChange = dataType.map { newDataType =>
+          TableChange.updateColumnType(colName.toArray, newDataType, true)
+        }
+
+        val commentChange = comment.map { newComment =>
+          TableChange.updateColumnComment(colName.toArray, newComment)
+        }
+
+        AlterTable(
+          v2Catalog.asTableCatalog, ident,
+          UnresolvedRelation(alter.tableName),
+          typeChange.toSeq ++ commentChange.toSeq)
+
+      case alter @ AlterTableRenameColumnStatement(
+          CatalogObjectIdentifier(Some(v2Catalog), ident), col, newName) =>
+        AlterTable(
+          v2Catalog.asTableCatalog, ident,
+          UnresolvedRelation(alter.tableName),
+          Seq(TableChange.renameColumn(col.toArray, newName)))
+
+      case alter @ AlterTableDropColumnsStatement(
+          CatalogObjectIdentifier(Some(v2Catalog), ident), cols) =>
+        val changes = cols.map(col => TableChange.deleteColumn(col.toArray))
+        AlterTable(
+          v2Catalog.asTableCatalog, ident,
+          UnresolvedRelation(alter.tableName),
+          changes)
+
+      case alter @ AlterTableSetPropertiesStatement(
+          CatalogObjectIdentifier(Some(v2Catalog), ident), props) =>
+        val changes = props.map {
+          case (key, value) =>
+            TableChange.setProperty(key, value)
+        }
+
+        AlterTable(
+          v2Catalog.asTableCatalog, ident,
+          UnresolvedRelation(alter.tableName),
+          changes.toSeq)
+
+      case alter @ AlterTableUnsetPropertiesStatement(
+          CatalogObjectIdentifier(Some(v2Catalog), ident), keys, _) =>
+        AlterTable(
+          v2Catalog.asTableCatalog, ident,
+          UnresolvedRelation(alter.tableName),
+          keys.map(key => TableChange.removeProperty(key)))
+
+      case alter @ AlterTableSetLocationStatement(
+          CatalogObjectIdentifier(Some(v2Catalog), ident), newLoc) =>
+        AlterTable(
+          v2Catalog.asTableCatalog, ident,
+          UnresolvedRelation(alter.tableName),
+          Seq(TableChange.setProperty("location", newLoc)))
+    }
+  }
+
   /**
    * Replaces [[UnresolvedAttribute]]s with concrete [[AttributeReference]]s from
    * a logical plan node's children.

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.analysis
 
 import org.apache.spark.api.python.PythonEvalType
 import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalog.v2.TableChange.{AddColumn, DeleteColumn, RenameColumn, UpdateColumnComment, UpdateColumnType}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.SubExprUtils._
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
@@ -353,6 +354,59 @@ trait CheckAnalysis extends PredicateHelper {
               case _ =>
             }
 
+          case alter: AlterTable if alter.childrenResolved =>
+            val table = alter.table
+            def findField(operation: String, fieldName: Array[String]): StructField = {
+              // include collections because structs nested in maps and arrays may be altered
+              val field = table.schema.findNestedField(fieldName, includeCollections = true)
+              if (field.isEmpty) {
+                throw new AnalysisException(
+                  s"Cannot $operation missing field in ${table.name} schema: ${fieldName.quoted}")
+              }
+              field.get
+            }
+
+            alter.changes.foreach {
+              case add: AddColumn =>
+                val parent = add.fieldNames.init
+                if (parent.nonEmpty) {
+                  findField("add to", parent)
+                }
+              case update: UpdateColumnType =>
+                val field = findField("update", update.fieldNames)
+                val fieldName = update.fieldNames.quoted
+                update.newDataType match {
+                  case _: StructType =>
+                    throw new AnalysisException(
+                      s"Cannot update ${table.name} field $fieldName type: " +
+                          s"update a struct by adding, deleting, or updating its fields")
+                  case _: MapType =>
+                    throw new AnalysisException(
+                      s"Cannot update ${table.name} field $fieldName type: " +
+                          s"update a map by updating $fieldName.key or $fieldName.value")
+                  case _: ArrayType =>
+                    throw new AnalysisException(
+                      s"Cannot update ${table.name} field $fieldName type: " +
+                          s"update the element by updating $fieldName.element")
+                  case _: AtomicType =>
+                    // update is okay
+                }
+                if (!Cast.canUpCast(field.dataType, update.newDataType)) {
+                  throw new AnalysisException(
+                    s"Cannot update ${table.name} field $fieldName: " +
+                        s"${field.dataType.simpleString} cannot be cast to " +
+                        s"${update.newDataType.simpleString}")
+                }
+              case rename: RenameColumn =>
+                findField("rename", rename.fieldNames)
+              case update: UpdateColumnComment =>
+                findField("update", update.fieldNames)
+              case delete: DeleteColumn =>
+                findField("delete", delete.fieldNames)
+              case _ =>
+              // no validation needed for set and remove property
+            }
+
           case _ => // Fallbacks to the following checks
         }
 

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
@@ -40,12 +40,15 @@ class UnresolvedException[TreeType <: TreeNode[_]](tree: TreeType, function: Str
  *
  * @param multipartIdentifier table name
  */
-case class UnresolvedRelation(multipartIdentifier: Seq[String]) extends LeafNode {
+case class UnresolvedRelation(
+    multipartIdentifier: Seq[String]) extends LeafNode with NamedRelation {
   import org.apache.spark.sql.catalog.v2.CatalogV2Implicits._
 
   /** Returns a `.` separated name for this relation. */
   def tableName: String = multipartIdentifier.quoted
 
+  override def name: String = tableName
+
   override def output: Seq[Attribute] = Nil
 
   override lazy val resolved = false