-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-29563][SQL] CREATE TABLE LIKE should look up catalog/table like v2 commands #26219
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
940282b
13ee783
5e99f37
4a1fece
4c772e5
11ea11b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -141,6 +141,37 @@ class ResolveCatalogs(val catalogManager: CatalogManager) | |
| writeOptions = c.options.filterKeys(_ != "path"), | ||
| ignoreIfExists = c.ifNotExists) | ||
|
|
||
| case c @ CreateTableLikeStatement(target, source, loc, ifNotExists) => | ||
| def validateLocation(loc: Option[String]) = { | ||
| if (loc.isDefined) { | ||
| throw new AnalysisException("Location clause not supported for " + | ||
| "CREATE TABLE LIKE statement when tables are of V2 type") | ||
| } | ||
| } | ||
| (target, source) match { | ||
| case (NonSessionCatalog(tCatalog, t), NonSessionCatalog(sCatalog, s)) => | ||
| validateLocation(loc) | ||
| CreateTableLike(tCatalog.asTableCatalog, | ||
| t, | ||
| Some(sCatalog.asTableCatalog), | ||
| s, | ||
| ifNotExists) | ||
| case (NonSessionCatalog(tCatalog, t), SessionCatalog(sCatalog, s)) => | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we need to catch session catalog, we should move the case to
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @cloud-fan NonSessionCatalog is not available in ResolveSessionCatalog, right ? this case catches both NonSessionCatalog and SessionCatalog ?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok let's leave it. |
||
| validateLocation(loc) | ||
| CreateTableLike(tCatalog.asTableCatalog, | ||
| t, | ||
| None, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. shall we pass |
||
| source, | ||
| ifNotExists) | ||
| case (SessionCatalog(tCatalog, t), NonSessionCatalog(sCatalog, s)) => | ||
| throw new AnalysisException("CREATE TABLE LIKE is not allowed when " + | ||
| "source table is V2 type and target table is V1 type") | ||
|
|
||
| // When target and source are V1 type, its handled in v1 CreateTableLikeCommand. We | ||
| // return from here without any transformation and its handled in ResolveSessionCatalog. | ||
| case _ => c | ||
| } | ||
|
|
||
| case RefreshTableStatement(NonSessionCatalog(catalog, tableName)) => | ||
| RefreshTable(catalog.asTableCatalog, tableName.asIdentifier) | ||
|
|
||
|
|
@@ -216,4 +247,12 @@ class ResolveCatalogs(val catalogManager: CatalogManager) | |
| case _ => None | ||
| } | ||
| } | ||
|
|
||
| object SessionCatalog { | ||
| def unapply(nameParts: Seq[String]): Option[(CatalogPlugin, Seq[String])] = nameParts match { | ||
| case CatalogAndIdentifierParts(catalog, parts) if isSessionCatalog(catalog) => | ||
| Some(catalog -> parts) | ||
| case _ => None | ||
| } | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -187,6 +187,16 @@ case class CreateTableAsSelect( | |
| } | ||
| } | ||
|
|
||
| /** | ||
| * Create a new table with the same table definition of the source table. | ||
| */ | ||
| case class CreateTableLike( | ||
| targetCatalog: TableCatalog, | ||
| targetTableName: Seq[String], | ||
| sourceCatalog: Option[TableCatalog], | ||
| sourceTableName: Seq[String], | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. for the source table, what we really care is the table itself, not which catalog it comes from. I think it's better to define the plan as In the planner, we match
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @cloud-fan I am a bit confused. The source can be both V1 or V2, right ? So how can we expect a DataSourceV2Relation as source ?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the v1 table has a v2 adapter called |
||
| ifNotExists: Boolean) extends Command | ||
|
|
||
| /** | ||
| * Replace a table with a v2 catalog. | ||
| * | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,64 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one or more | ||
| * contributor license agreements. See the NOTICE file distributed with | ||
| * this work for additional information regarding copyright ownership. | ||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| * (the "License"); you may not use this file except in compliance with | ||
| * the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.spark.sql.execution.datasources.v2 | ||
|
|
||
| import org.apache.spark.SparkException | ||
| import org.apache.spark.sql.catalyst.InternalRow | ||
| import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException | ||
| import org.apache.spark.sql.catalyst.expressions.Attribute | ||
| import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog, TableChange, V1Table} | ||
|
|
||
| /** | ||
| * Physical plan node for CREATE TABLE LIKE statement. | ||
| */ | ||
| case class CreateTableLikeExec( | ||
| targetCatalog: TableCatalog, | ||
| targetTable: Seq[String], | ||
| sourceCatalog: Option[TableCatalog], | ||
| sourceTable: Seq[String], | ||
| ifNotExists: Boolean) extends V2CommandExec { | ||
| import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ | ||
|
|
||
| override def output: Seq[Attribute] = Seq.empty | ||
|
|
||
| override protected def run(): Seq[InternalRow] = { | ||
| val sessionCatalog = sqlContext.sparkSession.sessionState.catalog | ||
| // If source catalog is not specified then its resolved from session catalog. | ||
| val sourceTab = sourceCatalog.map { catalog => | ||
| catalog.loadTable(sourceTable.asIdentifier) | ||
| }.getOrElse( | ||
| V1Table(sessionCatalog.getTempViewOrPermanentTableMetadata(sourceTable.asTableIdentifier)) | ||
| ) | ||
|
|
||
| if (!targetCatalog.tableExists(targetTable.asIdentifier)) { | ||
| try { | ||
| targetCatalog.createTable(targetTable.asIdentifier, | ||
| sourceTab.schema, | ||
| sourceTab.partitioning, | ||
| sourceTab.properties()) | ||
| } catch { | ||
| case _: TableAlreadyExistsException if ifNotExists => | ||
| logWarning(s"Table ${targetTable.quoted} was created concurrently. Ignoring.") | ||
| } | ||
| } else if (!ifNotExists) { | ||
| throw new TableAlreadyExistsException(targetTable.asIdentifier) | ||
| } | ||
|
|
||
| Seq.empty | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -24,6 +24,7 @@ import org.apache.spark.sql._ | |
| import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NamespaceAlreadyExistsException, NoSuchDatabaseException, NoSuchNamespaceException, NoSuchTableException, TableAlreadyExistsException} | ||
| import org.apache.spark.sql.connector.catalog._ | ||
| import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME | ||
| import org.apache.spark.sql.connector.expressions.LogicalExpressions | ||
| import org.apache.spark.sql.execution.datasources.v2.V2SessionCatalog | ||
| import org.apache.spark.sql.internal.SQLConf | ||
| import org.apache.spark.sql.internal.SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION | ||
|
|
@@ -1562,6 +1563,97 @@ class DataSourceV2SQLSuite | |
| assert(e.message.contains("ALTER VIEW QUERY is only supported with v1 tables")) | ||
| } | ||
|
|
||
| test("CREATE TABLE LIKE with target v2 and source v2") { | ||
| val targetTable = "testcat.target_tab" | ||
| val sourceTable = "testcat.source_tab" | ||
|
|
||
| withTable(targetTable, sourceTable) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is |
||
| val e1 = intercept[AnalysisException] { | ||
| sql(s"CREATE TABLE $targetTable LIKE $sourceTable") | ||
| } | ||
| assert(e1.message.contains("Table source_tab not found")) | ||
|
|
||
| val e2 = intercept[AnalysisException] { | ||
| sql(s"CREATE TABLE $targetTable LIKE $sourceTable LOCATION '/tmp'") | ||
| } | ||
| assert(e2.message.contains("Location clause not supported for CREATE TABLE LIKE" + | ||
| " statement when tables are of V2 type")) | ||
|
|
||
| sql( | ||
| s""" | ||
| |CREATE TABLE $sourceTable | ||
| |(id bigint, data string, p int) USING foo PARTITIONED BY (id, p) | ||
| |TBLPROPERTIES ('prop'='propvalue') | ||
| |""".stripMargin) | ||
| sql(s"CREATE TABLE $targetTable LIKE $sourceTable") | ||
| val testCatalog = catalog("testcat").asTableCatalog | ||
| val table = testCatalog.loadTable(Identifier.of(Array(), "target_tab")) | ||
| assert(table.name == targetTable) | ||
| assert(table.partitioning().size == 2) | ||
| assert(table.partitioning()(0) == LogicalExpressions.identity("id")) | ||
| assert(table.partitioning()(1) == LogicalExpressions.identity("p")) | ||
| assert(table.properties.asScala == Map("prop" -> "propvalue", "provider" -> "foo")) | ||
|
|
||
| // 2nd invocation should result in error. | ||
| val e3 = intercept[AnalysisException] { | ||
| sql(s"CREATE TABLE $targetTable LIKE $sourceTable") | ||
| } | ||
| assert(e3.message.contains("Table target_tab already exists")) | ||
|
|
||
| // No error when IF NOT EXISTS is specified. | ||
| sql(s"CREATE TABLE IF NOT EXISTS $targetTable LIKE $sourceTable") | ||
| } | ||
| } | ||
|
|
||
| test("CREATE TABLE LIKE with target v2 and source v1") { | ||
| val targetTable = "testcat.target_tab" | ||
| val sourceTable = "default.source_tab" | ||
|
|
||
| withTable(targetTable, sourceTable) { | ||
| val e1 = intercept[AnalysisException] { | ||
| sql(s"CREATE TABLE $targetTable LIKE $sourceTable") | ||
| } | ||
| assert(e1.message.contains("Table or view 'source_tab' not found in database 'default'")) | ||
|
|
||
| val e2 = intercept[AnalysisException] { | ||
| sql(s"CREATE TABLE $targetTable LIKE $sourceTable LOCATION '/tmp'") | ||
| } | ||
| assert(e2.message.contains("Location clause not supported for CREATE TABLE LIKE" + | ||
| " statement when tables are of V2 type")) | ||
|
|
||
| sql( | ||
| s""" | ||
| |CREATE TABLE $sourceTable | ||
| |(id bigint, data string, p int) USING parquet PARTITIONED BY (id, p) | ||
| |TBLPROPERTIES ('prop'='propvalue') | ||
| |""".stripMargin) | ||
| sql(s"CREATE TABLE $targetTable LIKE $sourceTable") | ||
| val testCatalog = catalog("testcat").asTableCatalog | ||
| val table = testCatalog.loadTable(Identifier.of(Array(), "target_tab")) | ||
| assert(table.name == targetTable) | ||
| assert(table.partitioning().size == 2) | ||
| assert(table.partitioning()(0) == LogicalExpressions.identity("id")) | ||
| assert(table.partitioning()(1) == LogicalExpressions.identity("p")) | ||
| assert(table.properties.asScala == Map("prop" -> "propvalue")) | ||
|
|
||
| // 2nd invocation should result in error. | ||
| val e3 = intercept[AnalysisException] { | ||
| sql(s"CREATE TABLE $targetTable LIKE $sourceTable") | ||
| } | ||
| assert(e3.message.contains("Table target_tab already exists")) | ||
|
|
||
| // No error when IF NOT EXISTS is specified. | ||
| sql(s"CREATE TABLE IF NOT EXISTS $targetTable LIKE $sourceTable") | ||
|
|
||
| // if target is V1 and source if V2 then its not allowed. | ||
| val e4 = intercept[AnalysisException] { | ||
| sql(s"CREATE TABLE $sourceTable LIKE $targetTable") | ||
| } | ||
| assert(e4.message.contains("CREATE TABLE LIKE is not allowed when source table" + | ||
| " is V2 type and target table is V1 type")) | ||
| } | ||
| } | ||
|
|
||
| private def testV1Command(sqlCommand: String, sqlParams: String): Unit = { | ||
| val e = intercept[AnalysisException] { | ||
| sql(s"$sqlCommand $sqlParams") | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
we can support the LOCATION clause. See
CatalogV2Utils.convertTableProperties, we can store the location in a special table propertylocation.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@cloud-fan Thanks a lot. I was not aware of this. I will check.