-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-26913][SQL] New data source V2 API: SupportsDirectWrite #23824
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,33 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one or more | ||
| * contributor license agreements. See the NOTICE file distributed with | ||
| * this work for additional information regarding copyright ownership. | ||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| * (the "License"); you may not use this file except in compliance with | ||
| * the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.spark.sql.sources.v2; | ||
|
|
||
| import org.apache.spark.annotation.Evolving; | ||
| import org.apache.spark.sql.sources.v2.writer.WriteBuilder; | ||
|
|
||
| /** | ||
| * An empty mix-in interface for {@link Table}, to indicate this table supports direct write without | ||
| * validation with the table schema. | ||
| * <p> | ||
| * If a {@link Table} implements this interface, the | ||
| * {@link SupportsWrite#newWriteBuilder(DataSourceOptions)} must return a {@link WriteBuilder} | ||
| * with {@link WriteBuilder#buildForBatch()} implemented. | ||
| * </p> | ||
| */ | ||
| @Evolving | ||
| public interface SupportsDirectWrite extends SupportsWrite {} | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm fine with it but eventually we should put it in the capability API. |
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -33,7 +33,7 @@ import org.apache.spark.sql.execution.datasources.{CreateTable, DataSource, Logi | |
| import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, DataSourceV2Utils, FileDataSourceV2, WriteToDataSourceV2} | ||
| import org.apache.spark.sql.sources.BaseRelation | ||
| import org.apache.spark.sql.sources.v2._ | ||
| import org.apache.spark.sql.sources.v2.writer.SupportsSaveMode | ||
| import org.apache.spark.sql.sources.v2.writer.{SupportsSaveMode, WriteBuilder} | ||
| import org.apache.spark.sql.types.StructType | ||
|
|
||
| /** | ||
|
|
@@ -264,6 +264,9 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) { | |
| val options = sessionOptions ++ extraOptions + checkFilesExistsOption | ||
| val dsOptions = new DataSourceOptions(options.asJava) | ||
| provider.getTable(dsOptions) match { | ||
| case table: SupportsDirectWrite => | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this should work without save mode. That said, we should add a new flag in
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Creating |
||
| writeToDataSourceV2(table.newWriteBuilder(dsOptions), table.name) | ||
|
|
||
| case table: SupportsBatchWrite => | ||
| lazy val relation = DataSourceV2Relation.create(table, options) | ||
| mode match { | ||
|
|
@@ -279,24 +282,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) { | |
| } | ||
|
|
||
| case _ => | ||
| table.newWriteBuilder(dsOptions) match { | ||
| case writeBuilder: SupportsSaveMode => | ||
| val write = writeBuilder.mode(mode) | ||
| .withQueryId(UUID.randomUUID().toString) | ||
| .withInputDataSchema(df.logicalPlan.schema) | ||
| .buildForBatch() | ||
| // It can only return null with `SupportsSaveMode`. We can clean it up after | ||
| // removing `SupportsSaveMode`. | ||
| if (write != null) { | ||
| runCommand(df.sparkSession, "save") { | ||
| WriteToDataSourceV2(write, df.logicalPlan) | ||
| } | ||
| } | ||
|
|
||
| case _ => | ||
| throw new AnalysisException( | ||
| s"data source ${table.name} does not support SaveMode $mode") | ||
| } | ||
| writeToDataSourceV2(table.newWriteBuilder(dsOptions), table.name) | ||
| } | ||
|
|
||
| // Streaming also uses the data source V2 API. So it may be that the data source implements | ||
|
|
@@ -309,6 +295,27 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) { | |
| } | ||
| } | ||
|
|
||
| private def writeToDataSourceV2(writeBuilder: WriteBuilder, name: String): Unit = { | ||
| writeBuilder match { | ||
| case writeBuilder: SupportsSaveMode => | ||
| val write = writeBuilder.mode(mode) | ||
| .withQueryId(UUID.randomUUID().toString) | ||
| .withInputDataSchema(df.logicalPlan.schema) | ||
| .buildForBatch() | ||
| // It can only return null with `SupportsSaveMode`. We can clean it up after | ||
| // removing `SupportsSaveMode`. | ||
| if (write != null) { | ||
| runCommand(df.sparkSession, "save") { | ||
| WriteToDataSourceV2(write, df.logicalPlan) | ||
| } | ||
| } | ||
|
|
||
| case _ => | ||
| throw new AnalysisException( | ||
| s"data source ${name} does not support SaveMode $mode") | ||
| } | ||
| } | ||
|
|
||
| private def saveToV1Source(): Unit = { | ||
| // Code path for data source v1. | ||
| runCommand(df.sparkSession, "save") { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this is wrong, please remove the entire
<p>..</p>