-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-28139][SQL] Add v2 ALTER TABLE implementation. #24937
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -26,7 +26,7 @@ import org.apache.spark.sql.catalog.v2.{CatalogPlugin, Identifier, TableChange} | |
| import org.apache.spark.sql.catalog.v2.TableChange.{AddColumn, DeleteColumn, RemoveProperty, RenameColumn, SetProperty, UpdateColumnComment, UpdateColumnType} | ||
| import org.apache.spark.sql.catalyst.analysis.NoSuchTableException | ||
| import org.apache.spark.sql.sources.v2.Table | ||
| import org.apache.spark.sql.types.{StructField, StructType} | ||
| import org.apache.spark.sql.types.{ArrayType, MapType, StructField, StructType} | ||
|
|
||
| object CatalogV2Util { | ||
| import org.apache.spark.sql.catalog.v2.CatalogV2Implicits._ | ||
|
|
@@ -132,16 +132,45 @@ object CatalogV2Util { | |
| val pos = struct.getFieldIndex(fieldNames.head) | ||
| .getOrElse(throw new IllegalArgumentException(s"Cannot find field: ${fieldNames.head}")) | ||
| val field = struct.fields(pos) | ||
| val replacement: Option[StructField] = if (fieldNames.tail.isEmpty) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you need to perform name resolution according to case sensitivity instead of strict equality when calling getFieldIndex? |
||
| update(field) | ||
| } else { | ||
| field.dataType match { | ||
| case nestedStruct: StructType => | ||
| val updatedType: StructType = replace(nestedStruct, fieldNames.tail, update) | ||
| Some(StructField(field.name, updatedType, field.nullable, field.metadata)) | ||
| case _ => | ||
| throw new IllegalArgumentException(s"Not a struct: ${fieldNames.head}") | ||
| } | ||
| val replacement: Option[StructField] = (fieldNames.tail, field.dataType) match { | ||
| case (Seq(), _) => | ||
| update(field) | ||
|
|
||
| case (names, struct: StructType) => | ||
| val updatedType: StructType = replace(struct, names, update) | ||
| Some(StructField(field.name, updatedType, field.nullable, field.metadata)) | ||
|
|
||
| case (Seq("key"), map @ MapType(keyType, _, _)) => | ||
| val updated = update(StructField("key", keyType, nullable = false)) | ||
| .getOrElse(throw new IllegalArgumentException(s"Cannot delete map key")) | ||
| Some(field.copy(dataType = map.copy(keyType = updated.dataType))) | ||
|
|
||
| case (Seq("key", names @ _*), map @ MapType(keyStruct: StructType, _, _)) => | ||
| Some(field.copy(dataType = map.copy(keyType = replace(keyStruct, names, update)))) | ||
|
|
||
| case (Seq("value"), map @ MapType(_, mapValueType, isNullable)) => | ||
| val updated = update(StructField("value", mapValueType, nullable = isNullable)) | ||
| .getOrElse(throw new IllegalArgumentException(s"Cannot delete map value")) | ||
| Some(field.copy(dataType = map.copy( | ||
| valueType = updated.dataType, | ||
| valueContainsNull = updated.nullable))) | ||
|
|
||
| case (Seq("value", names @ _*), map @ MapType(_, valueStruct: StructType, _)) => | ||
| Some(field.copy(dataType = map.copy(valueType = replace(valueStruct, names, update)))) | ||
|
|
||
| case (Seq("element"), array @ ArrayType(elementType, isNullable)) => | ||
| val updated = update(StructField("element", elementType, nullable = isNullable)) | ||
| .getOrElse(throw new IllegalArgumentException(s"Cannot delete array element")) | ||
| Some(field.copy(dataType = array.copy( | ||
| elementType = updated.dataType, | ||
| containsNull = updated.nullable))) | ||
|
|
||
| case (Seq("element", names @ _*), array @ ArrayType(elementStruct: StructType, _)) => | ||
| Some(field.copy(dataType = array.copy(elementType = replace(elementStruct, names, update)))) | ||
|
|
||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This looks a pretty cool feature to me. Can we document it in |
||
| case (names, dataType) => | ||
| throw new IllegalArgumentException( | ||
| s"Cannot find field: ${names.head} in ${dataType.simpleString}") | ||
| } | ||
|
|
||
| val newFields = struct.fields.zipWithIndex.flatMap { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -24,7 +24,7 @@ import scala.collection.mutable.ArrayBuffer | |
| import scala.util.Random | ||
|
|
||
| import org.apache.spark.sql.AnalysisException | ||
| import org.apache.spark.sql.catalog.v2.{CatalogNotFoundException, CatalogPlugin, LookupCatalog} | ||
| import org.apache.spark.sql.catalog.v2.{CatalogNotFoundException, CatalogPlugin, LookupCatalog, TableChange} | ||
| import org.apache.spark.sql.catalyst._ | ||
| import org.apache.spark.sql.catalyst.catalog._ | ||
| import org.apache.spark.sql.catalyst.encoders.OuterScopes | ||
|
|
@@ -34,6 +34,7 @@ import org.apache.spark.sql.catalyst.expressions.aggregate._ | |
| import org.apache.spark.sql.catalyst.expressions.objects._ | ||
| import org.apache.spark.sql.catalyst.plans._ | ||
| import org.apache.spark.sql.catalyst.plans.logical._ | ||
| import org.apache.spark.sql.catalyst.plans.logical.sql.{AlterTableAddColumnsStatement, AlterTableAlterColumnStatement, AlterTableDropColumnsStatement, AlterTableRenameColumnStatement, AlterTableSetLocationStatement, AlterTableSetPropertiesStatement, AlterTableUnsetPropertiesStatement} | ||
|
||
| import org.apache.spark.sql.catalyst.rules._ | ||
| import org.apache.spark.sql.catalyst.trees.TreeNodeRef | ||
| import org.apache.spark.sql.catalyst.util.toPrettySQL | ||
|
|
@@ -165,6 +166,7 @@ class Analyzer( | |
| new SubstituteUnresolvedOrdinals(conf)), | ||
| Batch("Resolution", fixedPoint, | ||
| ResolveTableValuedFunctions :: | ||
| ResolveAlterTable :: | ||
| ResolveTables :: | ||
| ResolveRelations :: | ||
| ResolveReferences :: | ||
|
|
@@ -787,6 +789,86 @@ class Analyzer( | |
| } | ||
| } | ||
|
|
||
| /** | ||
| * Resolve ALTER TABLE statements that use a DSv2 catalog. | ||
| * | ||
| * This rule converts unresolved ALTER TABLE statements to v2 when a v2 catalog is responsible | ||
| * for the table identifier. A v2 catalog is responsible for an identifier when the identifier | ||
| * has a catalog specified, like prod_catalog.db.table, or when a default v2 catalog is set and | ||
| * the table identifier does not include a catalog. | ||
| */ | ||
| object ResolveAlterTable extends Rule[LogicalPlan] { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you remind me where we handle the v1 code path for ALTER TABLE? |
||
| import org.apache.spark.sql.catalog.v2.CatalogV2Implicits._ | ||
| override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators { | ||
| case alter @ AlterTableAddColumnsStatement( | ||
| CatalogObjectIdentifier(Some(v2Catalog), ident), cols) => | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we have a JIRA to follow up on changing these to use the V2SessionCatalog if a catalog is not defined? |
||
| val changes = cols.map { col => | ||
| TableChange.addColumn(col.name.toArray, col.dataType, true, col.comment.orNull) | ||
| } | ||
|
|
||
| AlterTable( | ||
| v2Catalog.asTableCatalog, ident, | ||
| UnresolvedRelation(alter.tableName), | ||
| changes) | ||
|
|
||
| case alter @ AlterTableAlterColumnStatement( | ||
| CatalogObjectIdentifier(Some(v2Catalog), ident), colName, dataType, comment) => | ||
| val typeChange = dataType.map { newDataType => | ||
| TableChange.updateColumnType(colName.toArray, newDataType, true) | ||
| } | ||
|
|
||
| val commentChange = comment.map { newComment => | ||
| TableChange.updateColumnComment(colName.toArray, newComment) | ||
| } | ||
|
|
||
| AlterTable( | ||
| v2Catalog.asTableCatalog, ident, | ||
| UnresolvedRelation(alter.tableName), | ||
| typeChange.toSeq ++ commentChange.toSeq) | ||
|
|
||
| case alter @ AlterTableRenameColumnStatement( | ||
| CatalogObjectIdentifier(Some(v2Catalog), ident), col, newName) => | ||
| AlterTable( | ||
| v2Catalog.asTableCatalog, ident, | ||
| UnresolvedRelation(alter.tableName), | ||
| Seq(TableChange.renameColumn(col.toArray, newName))) | ||
|
|
||
| case alter @ AlterTableDropColumnsStatement( | ||
| CatalogObjectIdentifier(Some(v2Catalog), ident), cols) => | ||
| val changes = cols.map(col => TableChange.deleteColumn(col.toArray)) | ||
| AlterTable( | ||
| v2Catalog.asTableCatalog, ident, | ||
| UnresolvedRelation(alter.tableName), | ||
| changes) | ||
|
|
||
| case alter @ AlterTableSetPropertiesStatement( | ||
| CatalogObjectIdentifier(Some(v2Catalog), ident), props) => | ||
| val changes = props.map { | ||
| case (key, value) => | ||
| TableChange.setProperty(key, value) | ||
| } | ||
|
|
||
| AlterTable( | ||
| v2Catalog.asTableCatalog, ident, | ||
| UnresolvedRelation(alter.tableName), | ||
| changes.toSeq) | ||
|
|
||
| case alter @ AlterTableUnsetPropertiesStatement( | ||
| CatalogObjectIdentifier(Some(v2Catalog), ident), keys, _) => | ||
| AlterTable( | ||
| v2Catalog.asTableCatalog, ident, | ||
| UnresolvedRelation(alter.tableName), | ||
| keys.map(key => TableChange.removeProperty(key))) | ||
|
|
||
| case alter @ AlterTableSetLocationStatement( | ||
| CatalogObjectIdentifier(Some(v2Catalog), ident), newLoc) => | ||
| AlterTable( | ||
| v2Catalog.asTableCatalog, ident, | ||
| UnresolvedRelation(alter.tableName), | ||
| Seq(TableChange.setProperty("location", newLoc))) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm, I feel changing the location of a table deserves its own special
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not all tables have locations and we're using the convention of passing optional metadata as table properties. I think that given that we don't want to build special support for everything, this is the right way to pass the change. |
||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Replaces [[UnresolvedAttribute]]s with concrete [[AttributeReference]]s from | ||
| * a logical plan node's children. | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.analysis | |
|
|
||
| import org.apache.spark.api.python.PythonEvalType | ||
| import org.apache.spark.sql.AnalysisException | ||
| import org.apache.spark.sql.catalog.v2.TableChange.{AddColumn, DeleteColumn, RenameColumn, UpdateColumnComment, UpdateColumnType} | ||
| import org.apache.spark.sql.catalyst.expressions._ | ||
| import org.apache.spark.sql.catalyst.expressions.SubExprUtils._ | ||
| import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression | ||
|
|
@@ -353,6 +354,59 @@ trait CheckAnalysis extends PredicateHelper { | |
| case _ => | ||
| } | ||
|
|
||
| case alter: AlterTable if alter.childrenResolved => | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there a reason that some of these checks are here, and some in the
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes. The meaning of |
||
| val table = alter.table | ||
| def findField(operation: String, fieldName: Array[String]): StructField = { | ||
| // include collections because structs nested in maps and arrays may be altered | ||
| val field = table.schema.findNestedField(fieldName, includeCollections = true) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. how is case sensitivity handled here?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe it uses the resolver, so it should be case sensitive if the analyzer is. |
||
| if (field.isEmpty) { | ||
| throw new AnalysisException( | ||
| s"Cannot $operation missing field in ${table.name} schema: ${fieldName.quoted}") | ||
| } | ||
| field.get | ||
| } | ||
|
|
||
| alter.changes.foreach { | ||
| case add: AddColumn => | ||
| val parent = add.fieldNames.init | ||
| if (parent.nonEmpty) { | ||
| findField("add to", parent) | ||
| } | ||
| case update: UpdateColumnType => | ||
| val field = findField("update", update.fieldNames) | ||
| val fieldName = update.fieldNames.quoted | ||
| update.newDataType match { | ||
| case _: StructType => | ||
| throw new AnalysisException( | ||
| s"Cannot update ${table.name} field $fieldName type: " + | ||
| s"update a struct by adding, deleting, or updating its fields") | ||
| case _: MapType => | ||
| throw new AnalysisException( | ||
| s"Cannot update ${table.name} field $fieldName type: " + | ||
| s"update a map by updating $fieldName.key or $fieldName.value") | ||
| case _: ArrayType => | ||
| throw new AnalysisException( | ||
| s"Cannot update ${table.name} field $fieldName type: " + | ||
| s"update the element by updating $fieldName.element") | ||
| case _: AtomicType => | ||
| // update is okay | ||
| } | ||
| if (!Cast.canUpCast(field.dataType, update.newDataType)) { | ||
rdblue marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| throw new AnalysisException( | ||
| s"Cannot update ${table.name} field $fieldName: " + | ||
| s"${field.dataType.simpleString} cannot be cast to " + | ||
| s"${update.newDataType.simpleString}") | ||
| } | ||
| case rename: RenameColumn => | ||
| findField("rename", rename.fieldNames) | ||
| case update: UpdateColumnComment => | ||
| findField("update", update.fieldNames) | ||
| case delete: DeleteColumn => | ||
| findField("delete", delete.fieldNames) | ||
| case _ => | ||
| // no validation needed for set and remove property | ||
| } | ||
|
|
||
| case _ => // Fallbacks to the following checks | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you add docs for this please?