-
Notifications
You must be signed in to change notification settings - Fork 29.2k
[SPARK-15988] [SQL] Implement DDL commands: Create/Drop temporary macro #13706
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
f2433c2
0b93636
301e950
808a5fa
f4ed3bc
af0136d
5550496
9fe1881
b8ffdc9
fb8b57a
e895a9c
8d520eb
651b485
277ba9f
314913d
3d05e4f
22d8b1a
d91f633
1eb23c7
ad85109
b52698f
3eacebc
fce1121
eaff4e9
97632a9
4ee32e9
b539e94
1563f12
4d8e843
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -52,3 +52,6 @@ class NoSuchPartitionsException(db: String, table: String, specs: Seq[TableParti | |
|
|
||
| class NoSuchTempFunctionException(func: String) | ||
| extends AnalysisException(s"Temporary function '$func' not found") | ||
|
|
||
| class NoSuchTempMacroException(func: String) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please remove it. For reasons, please see the PR #17716.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, Thanks. |
||
| extends AnalysisException(s"Temporary macro '$func' not found") | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -758,6 +758,24 @@ class SessionCatalog( | |
| } | ||
| } | ||
|
|
||
| /** Create a temporary macro. */ | ||
| def createTempMacro( | ||
| name: String, | ||
| info: ExpressionInfo, | ||
| funcDefinition: FunctionBuilder): Unit = { | ||
| if (functionRegistry.functionExists(name)) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not entirely sure if we should throw an exception here. It unfortunately depends on the semantics you follow, SQL will throw an exception, whereas the Dataframe API will just overwrite the function. Let's follow Hive for now.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hive overwrites the temporary function without issuing an exception. |
||
| throw new TempFunctionAlreadyExistsException(name) | ||
| } | ||
| functionRegistry.registerMacro(name, info, funcDefinition) | ||
| } | ||
|
|
||
| /** Drop a temporary macro. */ | ||
| def dropTempMacro(name: String, ignoreIfNotExists: Boolean): Unit = { | ||
| if (!functionRegistry.dropMacro(name) && !ignoreIfNotExists) { | ||
| throw new NoSuchTempMacroException(name) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. After we drop the macro, the existing function works well. That means, we did not delete the original built-in functions. The built-in function will not be dropped by
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, I have update it with this case. |
||
| } | ||
| } | ||
|
|
||
| protected def failFunctionLookup(name: String): Nothing = { | ||
| throw new NoSuchFunctionException(db = currentDb, func = name) | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -25,14 +25,16 @@ import org.antlr.v4.runtime.tree.TerminalNode | |
|
|
||
| import org.apache.spark.sql.SaveMode | ||
| import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier} | ||
| import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute | ||
| import org.apache.spark.sql.catalyst.catalog._ | ||
| import org.apache.spark.sql.catalyst.expressions._ | ||
| import org.apache.spark.sql.catalyst.parser._ | ||
| import org.apache.spark.sql.catalyst.parser.SqlBaseParser._ | ||
| import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, OneRowRelation, ScriptInputOutputSchema} | ||
| import org.apache.spark.sql.execution.command._ | ||
| import org.apache.spark.sql.execution.datasources.{CreateTempViewUsing, _} | ||
| import org.apache.spark.sql.internal.{HiveSerDe, SQLConf, VariableSubstitution} | ||
| import org.apache.spark.sql.types.DataType | ||
| import org.apache.spark.sql.types.{DataType, StructField} | ||
|
|
||
| /** | ||
| * Concrete parser for Spark SQL statements. | ||
|
|
@@ -589,6 +591,53 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder { | |
| ctx.TEMPORARY != null) | ||
| } | ||
|
|
||
| /** | ||
| * Create a [[CreateMacroCommand]] command. | ||
| * | ||
| * For example: | ||
| * {{{ | ||
| * CREATE TEMPORARY MACRO macro_name([col_name col_type, ...]) expression; | ||
| * }}} | ||
| */ | ||
| override def visitCreateMacro(ctx: CreateMacroContext): LogicalPlan = withOrigin(ctx) { | ||
| val arguments = Option(ctx.colTypeList).map(visitColTypeList(_)) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: you can avoid |
||
| .getOrElse(Seq.empty[StructField]).map { col => | ||
| AttributeReference(col.name, col.dataType, col.nullable, col.metadata)() } | ||
| val colToIndex: Map[String, Int] = arguments.map(_.name).zipWithIndex.toMap | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Move this into the
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why i do not to move this into the CreateMacroCommand? Because analyzer.checkAnalysis() will check if macroFunction of CreateMacroCommand is invalid. macroFunction has UnresolvedAttributes, So analyzer.checkAnalysis() will throw a unresolved exception. If it resolved UnresolvedAttributes before, analyzer.checkAnalysis() does not throw a exception.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @hvanhovell So i think i will create a new Wrapper class to avoid unresolved exception in order to DataFrame can reuse this feature later.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, I see. You could also move this code into the companion object of the |
||
| if (colToIndex.size != arguments.size) { | ||
| throw operationNotAllowed( | ||
| s"Cannot support duplicate colNames for CREATE TEMPORARY MACRO ", ctx) | ||
| } | ||
| val macroFunction = expression(ctx.expression).transformUp { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ditto |
||
| case u: UnresolvedAttribute => | ||
| val index = colToIndex.get(u.name).getOrElse( | ||
| throw new ParseException( | ||
| s"Cannot find colName: [${u}] for CREATE TEMPORARY MACRO", ctx)) | ||
| BoundReference(index, arguments(index).dataType, arguments(index).nullable) | ||
| case _: SubqueryExpression => | ||
| throw operationNotAllowed(s"Cannot support Subquery for CREATE TEMPORARY MACRO", ctx) | ||
| } | ||
|
|
||
| CreateMacroCommand( | ||
| ctx.macroName.getText, | ||
| arguments, | ||
| macroFunction) | ||
| } | ||
|
|
||
| /** | ||
| * Create a [[DropMacroCommand]] command. | ||
| * | ||
| * For example: | ||
| * {{{ | ||
| * DROP TEMPORARY MACRO [IF EXISTS] macro_name; | ||
| * }}} | ||
| */ | ||
| override def visitDropMacro(ctx: DropMacroContext): LogicalPlan = withOrigin(ctx) { | ||
| DropMacroCommand( | ||
| ctx.macroName.getText, | ||
| ctx.EXISTS != null) | ||
| } | ||
|
|
||
| /** | ||
| * Create a [[DropTableCommand]] command. | ||
| */ | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,69 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one or more | ||
| * contributor license agreements. See the NOTICE file distributed with | ||
| * this work for additional information regarding copyright ownership. | ||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| * (the "License"); you may not use this file except in compliance with | ||
| * the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.spark.sql.execution.command | ||
|
|
||
| import org.apache.spark.sql.{AnalysisException, Row, SparkSession} | ||
| import org.apache.spark.sql.catalyst.expressions._ | ||
|
|
||
| /** | ||
| * The DDL command that creates a macro. | ||
| * To create a temporary macro, the syntax of using this command in SQL is: | ||
| * {{{ | ||
| * CREATE TEMPORARY MACRO macro_name([col_name col_type, ...]) expression; | ||
| * }}} | ||
| */ | ||
| case class CreateMacroCommand( | ||
| macroName: String, | ||
| columns: Seq[AttributeReference], | ||
| macroFunction: Expression) | ||
| extends RunnableCommand { | ||
|
|
||
| override def run(sparkSession: SparkSession): Seq[Row] = { | ||
| val catalog = sparkSession.sessionState.catalog | ||
| val macroInfo = columns.mkString(",") + " -> " + macroFunction.toString | ||
| val info = new ExpressionInfo(macroInfo, macroName) | ||
| val builder = (children: Seq[Expression]) => { | ||
| if (children.size != columns.size) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is slightly better to |
||
| throw new AnalysisException(s"Actual number of columns: ${children.size} != " + | ||
| s"expected number of columns: ${columns.size} for Macro $macroName") | ||
| } | ||
| macroFunction.transformUp { | ||
| case b: BoundReference => children(b.ordinal) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We do not validate the input type here. This would be entirely fine if macro arguments were defined without a
What do you think?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @hvanhovell good points. Because Analyzer will check expression's checkInputDataTypes after ResolveFunctions, I think we do not validate input type here. Now i do not think it has benefits if we did casts, but it maybe cause unnecessary casts. I will add some comments for it. Thanks.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok that is perfect. |
||
| } | ||
| } | ||
| catalog.createTempMacro(macroName, info, builder) | ||
| Seq.empty[Row] | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * The DDL command that drops a macro. | ||
| * ifExists: returns an error if the macro doesn't exist, unless this is true. | ||
| * {{{ | ||
| * DROP TEMPORARY MACRO [IF EXISTS] macro_name; | ||
| * }}} | ||
| */ | ||
| case class DropMacroCommand(macroName: String, ifExists: Boolean) | ||
| extends RunnableCommand { | ||
|
|
||
| override def run(sparkSession: SparkSession): Seq[Row] = { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This will drop any function... Can we make it Macro specific? |
||
| val catalog = sparkSession.sessionState.catalog | ||
| catalog.dropTempMacro(macroName, ifExists) | ||
| Seq.empty[Row] | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does Hive also support non-temporary macro's.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No, Now Hive only support temporary macro's.