-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-42555][CONNECT][FOLLOWUP] Add the new proto msg to support the remaining jdbc API #40277
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
f2ee71a
8083cf9
c07cf47
4d48895
e722c50
25abf55
f0abb56
95ccd7c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -140,6 +140,9 @@ message Read { | |
|
|
||
| // (Optional) A list of path for file-system backed data sources. | ||
| repeated string paths = 4; | ||
|
|
||
| // (Optional) Condition in the where clause for each partition. | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please add the comment that this currently only works for jdbc.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK |
||
| repeated string predicates = 5; | ||
| } | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| Relation [NAME#0,THEID#0] JDBCRelation(TEST.PEOPLE) [numPartitions=2] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,14 @@ | ||
| { | ||
| "common": { | ||
| "planId": "0" | ||
| }, | ||
| "read": { | ||
| "dataSource": { | ||
| "options": { | ||
| "url": "jdbc:h2:mem:testdb0;user\u003dtestUser;password\u003dtestPass", | ||
| "dbtable": "TEST.PEOPLE" | ||
| }, | ||
| "predicates": ["THEID \u003c 2", "THEID \u003e\u003d 2"] | ||
| } | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -24,7 +24,7 @@ import com.google.common.collect.{Lists, Maps} | |
| import com.google.protobuf.{Any => ProtoAny, ByteString} | ||
| import io.grpc.stub.StreamObserver | ||
|
|
||
| import org.apache.spark.{SparkEnv, TaskContext} | ||
| import org.apache.spark.{Partition, SparkEnv, TaskContext} | ||
| import org.apache.spark.api.python.{PythonEvalType, SimplePythonFunction} | ||
| import org.apache.spark.connect.proto | ||
| import org.apache.spark.connect.proto.{ExecutePlanResponse, SqlCommand} | ||
|
|
@@ -48,6 +48,8 @@ import org.apache.spark.sql.errors.QueryCompilationErrors | |
| import org.apache.spark.sql.execution.QueryExecution | ||
| import org.apache.spark.sql.execution.arrow.ArrowConverters | ||
| import org.apache.spark.sql.execution.command.CreateViewCommand | ||
| import org.apache.spark.sql.execution.datasources.LogicalRelation | ||
| import org.apache.spark.sql.execution.datasources.jdbc.{JDBCOptions, JDBCPartition, JDBCRelation} | ||
| import org.apache.spark.sql.execution.python.UserDefinedPythonFunction | ||
| import org.apache.spark.sql.internal.CatalogImpl | ||
| import org.apache.spark.sql.types._ | ||
|
|
@@ -684,26 +686,43 @@ class SparkConnectPlanner(val session: SparkSession) { | |
| case proto.Read.ReadTypeCase.DATA_SOURCE => | ||
| val localMap = CaseInsensitiveMap[String](rel.getDataSource.getOptionsMap.asScala.toMap) | ||
| val reader = session.read | ||
| if (rel.getDataSource.hasFormat) { | ||
| reader.format(rel.getDataSource.getFormat) | ||
| } | ||
| localMap.foreach { case (key, value) => reader.option(key, value) } | ||
| if (rel.getDataSource.hasSchema && rel.getDataSource.getSchema.nonEmpty) { | ||
|
|
||
| DataType.parseTypeWithFallback( | ||
| rel.getDataSource.getSchema, | ||
| StructType.fromDDL, | ||
| fallbackParser = DataType.fromJson) match { | ||
| case s: StructType => reader.schema(s) | ||
| case other => throw InvalidPlanInput(s"Invalid schema $other") | ||
|
|
||
| if (rel.getDataSource.getPredicatesCount == 0) { | ||
|
||
| if (rel.getDataSource.hasFormat) { | ||
| reader.format(rel.getDataSource.getFormat) | ||
| } | ||
| if (rel.getDataSource.hasSchema && rel.getDataSource.getSchema.nonEmpty) { | ||
|
|
||
| DataType.parseTypeWithFallback( | ||
| rel.getDataSource.getSchema, | ||
| StructType.fromDDL, | ||
| fallbackParser = DataType.fromJson) match { | ||
| case s: StructType => reader.schema(s) | ||
| case other => throw InvalidPlanInput(s"Invalid schema $other") | ||
| } | ||
| } | ||
| if (rel.getDataSource.getPathsCount == 0) { | ||
| reader.load().queryExecution.analyzed | ||
| } else if (rel.getDataSource.getPathsCount == 1) { | ||
| reader.load(rel.getDataSource.getPaths(0)).queryExecution.analyzed | ||
| } else { | ||
| reader.load(rel.getDataSource.getPathsList.asScala.toSeq: _*).queryExecution.analyzed | ||
| } | ||
| } | ||
| if (rel.getDataSource.getPathsCount == 0) { | ||
| reader.load().queryExecution.analyzed | ||
| } else if (rel.getDataSource.getPathsCount == 1) { | ||
| reader.load(rel.getDataSource.getPaths(0)).queryExecution.analyzed | ||
| } else { | ||
| reader.load(rel.getDataSource.getPathsList.asScala.toSeq: _*).queryExecution.analyzed | ||
| if (!localMap.contains(JDBCOptions.JDBC_URL) || | ||
| !localMap.contains(JDBCOptions.JDBC_TABLE_NAME)) { | ||
| throw InvalidPlanInput(s"Invalid jdbc params, please specify jdbc url and table.") | ||
| } | ||
| val url = rel.getDataSource.getOptionsMap.get(JDBCOptions.JDBC_URL) | ||
| val table = rel.getDataSource.getOptionsMap.get(JDBCOptions.JDBC_TABLE_NAME) | ||
| val options = new JDBCOptions(url, table, localMap) | ||
| val predicates = rel.getDataSource.getPredicatesList.asScala.toArray | ||
| val parts: Array[Partition] = predicates.zipWithIndex.map { case (part, i) => | ||
| JDBCPartition(part, i): Partition | ||
| } | ||
| val relation = JDBCRelation(parts, options)(session) | ||
| LogicalRelation(relation) | ||
| } | ||
|
|
||
| case _ => throw InvalidPlanInput("Does not support " + rel.getReadTypeCase.name()) | ||
|
|
||
Large diffs are not rendered by default.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you please set the format to JDBC? We are now relying the presence of predicates to figure out that something is a JDBC table. That is relying far too heavily on the client doing the right thing, for example what would happen if you set format = parquet and still define predicates?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah. we can't rely on client.