-
Notifications
You must be signed in to change notification settings - Fork 29k
[WIP][SPARK-42578][CONNECT] Add JDBC to DataFrameWriter #40291
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
c0e1e21
2d9969a
8e9827e
b6727e3
fa72b37
ad58ba9
d45aac7
f4f2f43
85d26dc
2c92c0f
e2b4c76
256cd62
0defa24
b933e8d
4f9db44
56a991c
b29b112
84990da
97cb957
210db67
6bfcefc
2f3e265
4274abb
c4968b2
b977bd6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,7 +17,7 @@ | |
|
|
||
| package org.apache.spark.sql | ||
|
|
||
| import java.util.Locale | ||
| import java.util.{Locale, Properties} | ||
|
|
||
| import scala.collection.JavaConverters._ | ||
|
|
||
|
|
@@ -345,6 +345,44 @@ final class DataFrameWriter[T] private[sql] (ds: Dataset[T]) { | |
| }) | ||
| } | ||
|
|
||
| /** | ||
| * Saves the content of the `DataFrame` to an external database table via JDBC. In the case the | ||
| * table already exists in the external database, behavior of this function depends on the save | ||
| * mode, specified by the `mode` function (default to throwing an exception). | ||
| * | ||
| * Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash | ||
| * your external database systems. | ||
| * | ||
| * JDBC-specific option and parameter documentation for storing tables via JDBC in <a | ||
| * href="https://spark.apache.org/docs/latest/sql-data-sources-jdbc.html#data-source-option"> | ||
| * Data Source Option</a> in the version you use. | ||
| * | ||
| * @param table | ||
| * Name of the table in the external database. | ||
| * @param connectionProperties | ||
| * JDBC database connection arguments, a list of arbitrary string tag/value. Normally at least | ||
| * a "user" and "password" property should be included. "batchsize" can be used to control the | ||
| * number of rows per insert. "isolationLevel" can be one of "NONE", "READ_COMMITTED", | ||
| * "READ_UNCOMMITTED", "REPEATABLE_READ", or "SERIALIZABLE", corresponding to standard | ||
| * transaction isolation levels defined by JDBC's Connection object, with default of | ||
| * "READ_UNCOMMITTED". | ||
| * @since 3.4.0 | ||
| */ | ||
| def jdbc(url: String, table: String, connectionProperties: Properties): Unit = { | ||
| // connectionProperties should override settings in extraOptions. | ||
|
||
| this.extraOptions ++= connectionProperties.asScala | ||
| // explicit url and dbtable should override all | ||
| this.extraOptions ++= Seq("url" -> url, "dbtable" -> table) | ||
| format("jdbc") | ||
| executeWriteOperation(builder => { | ||
| builder.setTable( | ||
| proto.WriteOperation.SaveTable | ||
| .newBuilder() | ||
| .setTableName(table) | ||
| .setSaveMethod(proto.WriteOperation.SaveTable.TableSaveMethod.TABLE_SAVE_METHOD_SAVE)) | ||
| }) | ||
| } | ||
|
|
||
| /** | ||
| * Saves the content of the `DataFrame` in JSON format (<a href="http://jsonlines.org/"> JSON | ||
| * Lines text format or newline-delimited JSON</a>) at the specified path. This is equivalent | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we can remove
ProblemFilters.exclude[Problem]("org.apache.spark.sql.DataFrameWriter.jdbc")fromCheckConnectJvmClientCompatibilityin this pr`
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you for the reminder.