-
Notifications
You must be signed in to change notification settings - Fork 28.9k
[SPARK-54112][CONNECT] Support getSchemas for SparkConnectDatabaseMetaData #52819
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -20,7 +20,9 @@ package org.apache.spark.sql.connect.client.jdbc | |||||
| import java.sql.{Array => _, _} | ||||||
|
|
||||||
| import org.apache.spark.SparkBuildInfo.{spark_version => SPARK_VERSION} | ||||||
| import org.apache.spark.sql.connect | ||||||
| import org.apache.spark.sql.connect.client.jdbc.SparkConnectDatabaseMetaData._ | ||||||
| import org.apache.spark.sql.functions._ | ||||||
| import org.apache.spark.util.VersionUtils | ||||||
|
|
||||||
| class SparkConnectDatabaseMetaData(conn: SparkConnectConnection) extends DatabaseMetaData { | ||||||
|
|
@@ -277,6 +279,9 @@ class SparkConnectDatabaseMetaData(conn: SparkConnectConnection) extends Databas | |||||
|
|
||||||
| override def dataDefinitionIgnoredInTransactions: Boolean = false | ||||||
|
|
||||||
| private def isNullOrWildcard(pattern: String): Boolean = | ||||||
| pattern == null || pattern == "%" | ||||||
|
|
||||||
| override def getProcedures( | ||||||
| catalog: String, | ||||||
| schemaPattern: String, | ||||||
|
|
@@ -299,11 +304,59 @@ class SparkConnectDatabaseMetaData(conn: SparkConnectConnection) extends Databas | |||||
| new SparkConnectResultSet(df.collectResult()) | ||||||
| } | ||||||
|
|
||||||
| override def getSchemas: ResultSet = | ||||||
| throw new SQLFeatureNotSupportedException | ||||||
| override def getSchemas: ResultSet = { | ||||||
| conn.checkOpen() | ||||||
|
|
||||||
| override def getSchemas(catalog: String, schemaPattern: String): ResultSet = | ||||||
| throw new SQLFeatureNotSupportedException | ||||||
| getSchemas(null, null) | ||||||
| } | ||||||
|
|
||||||
| // Schema of the returned DataFrame is: | ||||||
| // |-- TABLE_SCHEM: string (nullable = false) | ||||||
| // |-- TABLE_CATALOG: string (nullable = false) | ||||||
| private def getSchemasDataFrame( | ||||||
| catalog: String, schemaPattern: String): connect.DataFrame = { | ||||||
|
|
||||||
| val schemaFilterClause = | ||||||
| if (isNullOrWildcard(schemaPattern)) "1=1" else s"TABLE_SCHEM LIKE '$schemaPattern'" | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What happens if schemaPattern contains a single quote? |
||||||
|
|
||||||
| def internalGetSchemas( | ||||||
| catalogOpt: Option[String], | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems that all 3 call points involve deterministic values, so there's no need to wrap them in an Option, right? |
||||||
| schemaFilterClause: String): connect.DataFrame = { | ||||||
| val catalog = catalogOpt.getOrElse(conn.getCatalog) | ||||||
| // Spark SQL supports LIKE clause in SHOW SCHEMAS command, but we can't use that | ||||||
| // because the LIKE pattern does not follow SQL standard. | ||||||
| conn.spark.sql(s"SHOW SCHEMAS IN `$catalog`") | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What would happen if the |
||||||
| .select($"namespace".as("TABLE_SCHEM")) | ||||||
| .filter(schemaFilterClause) | ||||||
| .withColumn("TABLE_CATALOG", lit(catalog)) | ||||||
| } | ||||||
|
|
||||||
| if (catalog == null) { | ||||||
| // search in all catalogs | ||||||
| val emptyDf = conn.spark.emptyDataFrame | ||||||
| .withColumn("TABLE_SCHEM", lit("")) | ||||||
| .withColumn("TABLE_CATALOG", lit("")) | ||||||
| conn.spark.catalog.listCatalogs().collect().map(_.name).map { catalog => | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Otherwise, it would have the same naming as the outer catalog, resulting in poorer readability. |
||||||
| internalGetSchemas(Some(catalog), schemaFilterClause) | ||||||
| }.fold(emptyDf) { (l, r) => l.unionAll(r) } | ||||||
| } else if (catalog == "") { | ||||||
| // search only in current catalog | ||||||
| internalGetSchemas(None, schemaFilterClause) | ||||||
| .withColumn("TABLE_CATALOG", lit(conn.getCatalog)) | ||||||
| } else { | ||||||
| // search in the specific catalog | ||||||
| internalGetSchemas(Some(catalog), schemaFilterClause) | ||||||
| .withColumn("TABLE_CATALOG", lit(catalog)) | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| override def getSchemas(catalog: String, schemaPattern: String): ResultSet = { | ||||||
| conn.checkOpen() | ||||||
|
|
||||||
| val df = getSchemasDataFrame(catalog, schemaPattern) | ||||||
| .orderBy("TABLE_CATALOG", "TABLE_SCHEM") | ||||||
| new SparkConnectResultSet(df.collectResult()) | ||||||
| } | ||||||
|
|
||||||
| override def getTableTypes: ResultSet = | ||||||
| throw new SQLFeatureNotSupportedException | ||||||
|
|
||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is used to test whether
fooPatternmatches ALLhttps://docs.oracle.com/en/java/javase/17/docs/api/java.sql/java/sql/DatabaseMetaData.html