-
Notifications
You must be signed in to change notification settings - Fork 29.1k
[SPARK-12728][SQL] Integrates SQL generation with native view #10733
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
c04e951
d802724
1d89183
9065506
3c50fd6
51b9db2
737a7d0
0ce28d9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,9 +19,10 @@ package org.apache.spark.sql.hive.execution | |
|
|
||
| import org.apache.spark.sql.{AnalysisException, Row, SQLContext} | ||
| import org.apache.spark.sql.catalyst.TableIdentifier | ||
| import org.apache.spark.sql.catalyst.expressions.Attribute | ||
| import org.apache.spark.sql.catalyst.expressions.Alias | ||
| import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project} | ||
| import org.apache.spark.sql.execution.RunnableCommand | ||
| import org.apache.spark.sql.hive.{HiveContext, HiveMetastoreTypes} | ||
| import org.apache.spark.sql.hive.{HiveContext, HiveMetastoreTypes, SQLBuilder} | ||
| import org.apache.spark.sql.hive.client.{HiveColumn, HiveTable} | ||
|
|
||
| /** | ||
|
|
@@ -32,10 +33,12 @@ import org.apache.spark.sql.hive.client.{HiveColumn, HiveTable} | |
| // from Hive and may not work for some cases like create view on self join. | ||
| private[hive] case class CreateViewAsSelect( | ||
| tableDesc: HiveTable, | ||
| childSchema: Seq[Attribute], | ||
| child: LogicalPlan, | ||
| allowExisting: Boolean, | ||
| orReplace: Boolean) extends RunnableCommand { | ||
|
|
||
| private val childSchema = child.output | ||
|
|
||
| assert(tableDesc.schema == Nil || tableDesc.schema.length == childSchema.length) | ||
| assert(tableDesc.viewText.isDefined) | ||
|
|
||
|
|
@@ -44,55 +47,83 @@ private[hive] case class CreateViewAsSelect( | |
| override def run(sqlContext: SQLContext): Seq[Row] = { | ||
| val hiveContext = sqlContext.asInstanceOf[HiveContext] | ||
|
|
||
| if (hiveContext.catalog.tableExists(tableIdentifier)) { | ||
| if (allowExisting) { | ||
| // view already exists, will do nothing, to keep consistent with Hive | ||
| } else if (orReplace) { | ||
| hiveContext.catalog.client.alertView(prepareTable()) | ||
| } else { | ||
| hiveContext.catalog.tableExists(tableIdentifier) match { | ||
| case true if allowExisting => | ||
| // Handles `CREATE VIEW IF NOT EXISTS v0 AS SELECT ...`. Does nothing when the target view | ||
| // already exists. | ||
|
|
||
| case true if orReplace => | ||
| // Handles `CREATE OR REPLACE VIEW v0 AS SELECT ...` | ||
| hiveContext.catalog.client.alertView(prepareTable(sqlContext)) | ||
|
|
||
| case true => | ||
| // Handles `CREATE VIEW v0 AS SELECT ...`. Throws exception when the target view already | ||
| // exists. | ||
| throw new AnalysisException(s"View $tableIdentifier already exists. " + | ||
| "If you want to update the view definition, please use ALTER VIEW AS or " + | ||
| "CREATE OR REPLACE VIEW AS") | ||
| } | ||
| } else { | ||
| hiveContext.catalog.client.createView(prepareTable()) | ||
|
|
||
| case false => | ||
| hiveContext.catalog.client.createView(prepareTable(sqlContext)) | ||
| } | ||
|
|
||
| Seq.empty[Row] | ||
| } | ||
|
|
||
| private def prepareTable(): HiveTable = { | ||
| // setup column types according to the schema of child. | ||
| val schema = if (tableDesc.schema == Nil) { | ||
| childSchema.map { attr => | ||
| HiveColumn(attr.name, HiveMetastoreTypes.toMetastoreType(attr.dataType), null) | ||
| } | ||
| private def prepareTable(sqlContext: SQLContext): HiveTable = { | ||
| val expandedText = if (sqlContext.conf.canonicalView) { | ||
| rebuildViewQueryString(sqlContext).getOrElse(wrapViewTextWithSelect) | ||
| } else { | ||
| childSchema.zip(tableDesc.schema).map { case (attr, col) => | ||
| HiveColumn(col.name, HiveMetastoreTypes.toMetastoreType(attr.dataType), col.comment) | ||
| wrapViewTextWithSelect | ||
| } | ||
|
|
||
| val viewSchema = { | ||
| if (tableDesc.schema.isEmpty) { | ||
| childSchema.map { attr => | ||
| HiveColumn(attr.name, HiveMetastoreTypes.toMetastoreType(attr.dataType), null) | ||
| } | ||
| } else { | ||
| childSchema.zip(tableDesc.schema).map { case (attr, col) => | ||
| HiveColumn(col.name, HiveMetastoreTypes.toMetastoreType(attr.dataType), col.comment) | ||
| } | ||
| } | ||
| } | ||
|
|
||
| val columnNames = childSchema.map(f => verbose(f.name)) | ||
| tableDesc.copy(schema = viewSchema, viewText = Some(expandedText)) | ||
| } | ||
|
|
||
| private def wrapViewTextWithSelect: String = { | ||
| // When user specified column names for view, we should create a project to do the renaming. | ||
| // When no column name specified, we still need to create a project to declare the columns | ||
| // we need, to make us more robust to top level `*`s. | ||
| val projectList = if (tableDesc.schema == Nil) { | ||
| columnNames.mkString(", ") | ||
| } else { | ||
| columnNames.zip(tableDesc.schema.map(f => verbose(f.name))).map { | ||
| case (name, alias) => s"$name AS $alias" | ||
| }.mkString(", ") | ||
| val viewOutput = { | ||
| val columnNames = childSchema.map(f => quote(f.name)) | ||
| if (tableDesc.schema.isEmpty) { | ||
| columnNames.mkString(", ") | ||
| } else { | ||
| columnNames.zip(tableDesc.schema.map(f => quote(f.name))).map { | ||
| case (name, alias) => s"$name AS $alias" | ||
| }.mkString(", ") | ||
| } | ||
| } | ||
|
|
||
| val viewName = verbose(tableDesc.name) | ||
|
|
||
| val expandedText = s"SELECT $projectList FROM (${tableDesc.viewText.get}) $viewName" | ||
| val viewText = tableDesc.viewText.get | ||
| val viewName = quote(tableDesc.name) | ||
| s"SELECT $viewOutput FROM ($viewText) $viewName" | ||
| } | ||
|
|
||
| tableDesc.copy(schema = schema, viewText = Some(expandedText)) | ||
| private def rebuildViewQueryString(sqlContext: SQLContext): Option[String] = { | ||
| val logicalPlan = if (tableDesc.schema.isEmpty) { | ||
| child | ||
| } else { | ||
| val projectList = childSchema.zip(tableDesc.schema).map { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it safe to call zip? We need to check the number of fields, right?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's also have a test for this.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's an invariant condition of
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nvm, we already have the test at https://github.com/apache/spark/pull/10733/files#diff-074b1d8480e0d0d7c212bc4461f3d4acR43. |
||
| case (attr, col) => Alias(attr, col.name)() | ||
| } | ||
| sqlContext.executePlan(Project(projectList, child)).analyzed | ||
| } | ||
| new SQLBuilder(logicalPlan, sqlContext).toSQL | ||
| } | ||
|
|
||
| // escape backtick with double-backtick in column name and wrap it with backtick. | ||
| private def verbose(name: String) = s"`${name.replaceAll("`", "``")}`" | ||
| private def quote(name: String) = s"`${name.replaceAll("`", "``")}`" | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry. I missed this. How about we also have a check for this?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I just realized that we do the check at https://github.com/apache/spark/pull/10733/files#diff-074b1d8480e0d0d7c212bc4461f3d4acR43 (
assert(tableDesc.schema == Nil || tableDesc.schema.length == childSchema.length))