Skip to content
Closed
Show file tree
Hide file tree
Changes from 26 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
f2433c2
init commit
lianhuiwang Jun 16, 2016
0b93636
fix unit test
lianhuiwang Jun 16, 2016
301e950
update
lianhuiwang Jun 18, 2016
808a5fa
update createTempMacro
lianhuiwang Jun 18, 2016
f4ed3bc
address comments
lianhuiwang Jun 20, 2016
af0136d
update
lianhuiwang Jun 20, 2016
5550496
based master
lianhuiwang Nov 10, 2016
9fe1881
update code
lianhuiwang Nov 10, 2016
b8ffdc9
fix function
lianhuiwang Nov 10, 2016
fb8b57a
update comment
lianhuiwang Nov 11, 2016
e895a9c
update comments
lianhuiwang Nov 11, 2016
8d520eb
Merge branch 'master' of https://github.com/apache/spark into macro
lianhuiwang May 27, 2017
651b485
Merge branch 'master' of https://github.com/apache/spark into macro
lianhuiwang May 27, 2017
277ba9f
Merge branch 'macro' of https://github.com/lianhuiwang/spark into macro
lianhuiwang May 27, 2017
314913d
Merge branch 'macro' of https://github.com/lianhuiwang/spark into macro
lianhuiwang May 27, 2017
3d05e4f
reformat code.
lianhuiwang May 27, 2017
22d8b1a
reformat code.
lianhuiwang May 27, 2017
d91f633
reformat code.
lianhuiwang May 27, 2017
1eb23c7
reformat code.
lianhuiwang May 27, 2017
ad85109
remove type check for macro as same with hive.
lianhuiwang May 27, 2017
b52698f
add import
lianhuiwang May 27, 2017
3eacebc
treat macro as temp function like hive
lianhuiwang May 27, 2017
fce1121
add Modifier for FunctionRegistry.
lianhuiwang May 27, 2017
eaff4e9
update comments.
lianhuiwang May 27, 2017
97632a9
add dropMacro().
lianhuiwang May 27, 2017
4ee32e9
reformat code style
lianhuiwang May 27, 2017
b539e94
address some comments.
lianhuiwang May 30, 2017
1563f12
address some comments.
lianhuiwang May 30, 2017
4d8e843
update
lianhuiwang May 30, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,9 @@ statement
| CREATE TEMPORARY? FUNCTION qualifiedName AS className=STRING
(USING resource (',' resource)*)? #createFunction
| DROP TEMPORARY? FUNCTION (IF EXISTS)? qualifiedName #dropFunction
| CREATE TEMPORARY MACRO macroName=identifier

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does Hive also support non-temporary macro's.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, Now Hive only support temporary macro's.

'(' colTypeList? ')' expression #createMacro
| DROP TEMPORARY MACRO (IF EXISTS)? macroName=identifier #dropMacro
| EXPLAIN (LOGICAL | FORMATTED | EXTENDED | CODEGEN | COST)?
statement #explain
| SHOW TABLES ((FROM | IN) db=identifier)?
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ public class ExpressionInfo {
private String name;
private String extended;
private String db;
private boolean macro;

public String getClassName() {
return className;
Expand All @@ -47,19 +48,32 @@ public String getDb() {
return db;
}

public ExpressionInfo(String className, String db, String name, String usage, String extended) {
public boolean isMacro() {
return macro;
}

public ExpressionInfo(String className, String db, String name, String usage, String extended, boolean macro) {
this.className = className;
this.db = db;
this.name = name;
this.usage = usage;
this.extended = extended;
this.macro = macro;
}

public ExpressionInfo(String className, String db, String name, String usage, String extended) {
this(className, db, name, usage, extended, false);
}

public ExpressionInfo(String className, String name) {
this(className, null, name, null, null);
this(className, null, name, null, null, false);
}

public ExpressionInfo(String className, String name, boolean macro) {
this(className, null, name, null, null, macro);
}

public ExpressionInfo(String className, String db, String name) {
this(className, db, name, null, null);
this(className, db, name, null, null, false);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ trait FunctionRegistry {
/** Drop a function and return whether the function existed. */
def dropFunction(name: String): Boolean

/** Drop a macro and return whether the macro existed. */
def dropMacro(name: String): Boolean

/** Checks if a function with a given name exists. */
def functionExists(name: String): Boolean = lookupFunction(name).isDefined

Expand Down Expand Up @@ -107,6 +110,14 @@ class SimpleFunctionRegistry extends FunctionRegistry {
functionBuilders.remove(name).isDefined
}

override def dropMacro(name: String): Boolean = synchronized {

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A drop function can currently also drop a macro. Can you make sure that this cannot happen?

Maybe we should consolidate this into a single drop function with a macro flag. cc @gatorsmile WDYT?

@lianhuiwang lianhuiwang May 30, 2017

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hive can drop temporary function using command 'drop Macro'. And it also can drop temporary macro using command 'drop temporary function'.

if (functionBuilders.get(name).map(_._1).filter(_.isMacro).isDefined) {
functionBuilders.remove(name).isDefined
} else {
false
}
}

override def clear(): Unit = synchronized {
functionBuilders.clear()
}
Expand Down Expand Up @@ -146,6 +157,10 @@ object EmptyFunctionRegistry extends FunctionRegistry {
throw new UnsupportedOperationException
}

override def dropMacro(name: String): Boolean = {
throw new UnsupportedOperationException
}

override def dropFunction(name: String): Boolean = {
throw new UnsupportedOperationException
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,6 @@ class NoSuchPartitionsException(db: String, table: String, specs: Seq[TableParti

class NoSuchTempFunctionException(func: String)
extends AnalysisException(s"Temporary function '$func' not found")

class NoSuchTempMacroException(func: String)

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please remove it. For reasons, please see the PR #17716.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, Thanks.

extends AnalysisException(s"Temporary macro '$func' not found")
Original file line number Diff line number Diff line change
Expand Up @@ -1090,6 +1090,24 @@ class SessionCatalog(
}
}

/** Create a temporary macro. */
def createTempMacro(
name: String,
info: ExpressionInfo,
functionBuilder: FunctionBuilder): Unit = {
if (functionRegistry.functionExists(name)) {

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not entirely sure if we should throw an exception here. It unfortunately depends on the semantics you follow, SQL will throw an exception, whereas the Dataframe API will just overwrite the function. Let's follow Hive for now.

@gatorsmile gatorsmile May 29, 2017

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hive> create temporary macro max(x int)
    > x*x;
OK
Time taken: 0.014 seconds

hive> select max(3) from t1;
OK
9
Time taken: 0.468 seconds, Fetched: 1 row(s)

hive> select max(3,4) from t1;
FAILED: SemanticException [Error 10015]: Line 1:7 Arguments length mismatch '4': The macro max accepts exactly 1 arguments.

Hive overwrites the temporary function without issuing an exception.

throw new AnalysisException(s"Function $name already exists")
}
functionRegistry.registerFunction(name, info, functionBuilder)
}

/** Drop a temporary macro. */
def dropTempMacro(name: String, ignoreIfNotExists: Boolean): Unit = {
if (!functionRegistry.dropMacro(name) && !ignoreIfNotExists) {
throw new NoSuchTempMacroException(name)

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hive>  DROP TEMPORARY MACRO max;
OK
Time taken: 0.01 seconds
hive> select max(3) from t1;
OK
3

After we drop the macro, the existing function works well. That means, we did not delete the original built-in functions. The built-in function will not be dropped by DROP TEMPORARY MACRO. After we drop the macro with the same name, the original function max is using the original built-in function.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I have update it with this case.

}
}

/**
* Returns whether it is a temporary function. If not existed, returns false.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.sql.execution.command._
import org.apache.spark.sql.execution.datasources.{CreateTable, _}
import org.apache.spark.sql.internal.{HiveSerDe, SQLConf, VariableSubstitution}
import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.types.{StructField, StructType}

/**
* Concrete parser for Spark SQL statements.
Expand Down Expand Up @@ -715,6 +715,37 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
ctx.TEMPORARY != null)
}

/**
* Create a [[CreateMacroCommand]] command.
*
* For example:
* {{{
* CREATE TEMPORARY MACRO macro_name([col_name col_type, ...]) expression;
* }}}
*/
override def visitCreateMacro(ctx: CreateMacroContext): LogicalPlan = withOrigin(ctx) {
val arguments = Option(ctx.colTypeList).map(visitColTypeList(_))

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: you can avoid (_)...

.getOrElse(Seq.empty[StructField])
val e = expression(ctx.expression)
CreateMacroCommand(
ctx.macroName.getText,
MacroFunctionWrapper(arguments, e))
}

/**
* Create a [[DropMacroCommand]] command.
*
* For example:
* {{{
* DROP TEMPORARY MACRO [IF EXISTS] macro_name;
* }}}
*/
override def visitDropMacro(ctx: DropMacroContext): LogicalPlan = withOrigin(ctx) {
DropMacroCommand(
ctx.macroName.getText,
ctx.EXISTS != null)
}

/**
* Create a [[DropTableCommand]] command.
*/
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.sql.execution.command

import org.apache.spark.sql.{AnalysisException, Row, SparkSession}
import org.apache.spark.sql.catalyst.analysis._
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.types.StructField

/**
* This class provides arguments and body expression of the macro function.
*/
case class MacroFunctionWrapper(columns: Seq[StructField], macroFunction: Expression)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need this?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

because Analyzer will check macroFunction that is invalid if I donot use MacroFunctionWrapper.


/**
* The DDL command that creates a macro.
* To create a temporary macro, the syntax of using this command in SQL is:
* {{{
* CREATE TEMPORARY MACRO macro_name([col_name col_type, ...]) expression;
* }}}
*/
case class CreateMacroCommand(
macroName: String,
funcWrapper: MacroFunctionWrapper)
extends RunnableCommand {

override def run(sparkSession: SparkSession): Seq[Row] = {
val catalog = sparkSession.sessionState.catalog
val columns = funcWrapper.columns.map { col =>

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might easier to use StructType().toAttributes here

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, i will do it, thanks.

AttributeReference(col.name, col.dataType, col.nullable, col.metadata)() }

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: put } on a new line

val colToIndex: Map[String, Int] = columns.map(_.name).zipWithIndex.toMap
if (colToIndex.size != columns.size) {
throw new AnalysisException(s"Cannot support duplicate colNames " +
s"for CREATE TEMPORARY MACRO $macroName, actual columns: ${columns.mkString(",")}")
}
val macroFunction = funcWrapper.macroFunction.transform {
case u: UnresolvedAttribute =>
val index = colToIndex.get(u.name).getOrElse(

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should respect the case-sensitivity settings here. So a lookup might not be the best idea.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, i will do it, thanks.

throw new AnalysisException(s"Cannot find colName: ${u} " +
s"for CREATE TEMPORARY MACRO $macroName, actual columns: ${columns.mkString(",")}"))
BoundReference(index, columns(index).dataType, columns(index).nullable)
case u: UnresolvedFunction =>
sparkSession.sessionState.catalog.lookupFunction(u.name, u.children)
case s: SubqueryExpression =>
throw new AnalysisException(s"Cannot support Subquery: ${s} " +
s"for CREATE TEMPORARY MACRO $macroName")
case u: UnresolvedGenerator =>

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this what Hive does? I really don't see why we should not support this.

Please note that we cannot use generators if we decide that an expression has to be a fully resolved expression at creation time.

throw new AnalysisException(s"Cannot support Generator: ${u} " +
s"for CREATE TEMPORARY MACRO $macroName")
}

val macroInfo = columns.mkString(",") + " -> " + funcWrapper.macroFunction.toString

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you give an example of what this would look like?

val info = new ExpressionInfo(macroInfo, macroName, true)
val builder = (children: Seq[Expression]) => {
if (children.size != columns.size) {

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is slightly better to columns.size in a separate variable, so we do not include columns in the closure.

throw new AnalysisException(s"Actual number of columns: ${children.size} != " +
s"expected number of columns: ${columns.size} for Macro $macroName")
}
macroFunction.transform {
// Skip to validate the input type because check it at runtime.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How do we check at runtime? The current code does not seem to respect the types passed, and rely on the macro's expression to do some type validation, this means you can pass anything to the macro and the user can end up with an unexpected result:

create macro plus(a int, b int) as a + b;
select plus(1.0, 1.0) as result -- This returns a decimal, and not an int as expected

So I think we should at least validate the input expressions. The hacky way would be to add casts, and have the analyzer fail if the cast cannot be made (this is terrible UX). A better way to would be to create some sentinel expression that makes sure the analyzer will insert the correct cast, and throws a relevant exception (mentioning the macro) when this blows up...

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On a related note, we are currently not sure if the macro produces a valid expression. Maybe we should run analysis on the macro expression to make sure it does not fail every query later on, e.g.:

val resolvedMacroFunction = try {
  val plan = Project(Alias(macroFunction, "m")() :: Nil, OneRowRelation)
  val analyzed @ Project(Seq(named), OneRowRelation) =
    sparkSession.sessionState.analyzer.execute(plan)
  sparkSession.sessionState.analyzer.checkAnalysis(analyzed)
  named.children.head
} catch {
  case a: AnalysisException =>
    ...
}

Note that we cannot use generators if we use this approach...

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, Now i update it with you ideas. Thanks.

case b: BoundReference => children(b.ordinal)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We do not validate the input type here. This would be entirely fine if macro arguments were defined without a DataType. I am not sure what we need to do here though. We have two options:

  • Ignore the DataType and rely on the expressions inputTypes to get casting done. This must be documented though.
  • Introduce casts to make sure the input conforms to the required input.

What do you think?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@hvanhovell good points. Because Analyzer will check expression's checkInputDataTypes after ResolveFunctions, I think we do not validate input type here. Now i do not think it has benefits if we did casts, but it maybe cause unnecessary casts. I will add some comments for it. Thanks.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok that is perfect.

}
}
catalog.createTempMacro(macroName, info, builder)
Seq.empty[Row]
}
}

/**
* The DDL command that drops a macro.
* ifExists: returns an error if the macro doesn't exist, unless this is true.
* {{{
* DROP TEMPORARY MACRO [IF EXISTS] macro_name;
* }}}
*/
case class DropMacroCommand(macroName: String, ifExists: Boolean)
extends RunnableCommand {

override def run(sparkSession: SparkSession): Seq[Row] = {

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will drop any function... Can we make it Macro specific?

val catalog = sparkSession.sessionState.catalog
catalog.dropTempMacro(macroName, ifExists)
Seq.empty[Row]
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1516,6 +1516,35 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
)
}

test("create/drop temporary macro") {

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you use SQLQueryTestSuite instead?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you also add a case for a macro without parameters? E.g.: create temporary macro c() as 3E9

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we also test a combination of temporary macros/functions...?

intercept[AnalysisException] {
sql(s"CREATE TEMPORARY MACRO simple_add_error(x int) x + y")
}
intercept[AnalysisException] {
sql(s"CREATE TEMPORARY MACRO simple_add_error(x int, x int) x + y")
}
intercept[AnalysisException] {
sql(s"CREATE TEMPORARY MACRO simple_add_error(x int) x NOT IN (select c2 from t2) ")
}
sql("CREATE TEMPORARY MACRO fixed_number() 42")
checkAnswer(sql("SELECT fixed_number()"), Row(42))
sql("CREATE TEMPORARY MACRO string_len_plus_two(x string) length(x) + 2")
checkAnswer(sql("SELECT string_len_plus_two('abc')"), Row(5))
sql("CREATE TEMPORARY MACRO simple_add(x int, y int) x + y")
checkAnswer(sql("SELECT simple_add(1, 2)"), Row(3))
intercept[AnalysisException] {
sql(s"SELECT simple_add(1)")
}
sql("DROP TEMPORARY MACRO fixed_number")
intercept[AnalysisException] {
sql(s"DROP TEMPORARY MACRO abs")
}
intercept[AnalysisException] {
sql("DROP TEMPORARY MACRO SOME_MACRO")
}
sql("DROP TEMPORARY MACRO IF EXISTS SOME_MACRO")
}

test("create a data source table without schema") {
import testImplicits._
withTempPath { tempDir =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1153,13 +1153,6 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd
sql("ALTER INDEX my_index ON my_table set IDXPROPERTIES (\"prop1\"=\"val1_new\")")}
}

test("create/drop macro commands are not supported") {
assertUnsupportedFeature {
sql("CREATE TEMPORARY MACRO SIGMOID (x DOUBLE) 1.0 / (1.0 + EXP(-x))")
}
assertUnsupportedFeature { sql("DROP TEMPORARY MACRO SIGMOID") }
}

test("dynamic partitioning is allowed when hive.exec.dynamic.partition.mode is nonstrict") {
val modeConfKey = "hive.exec.dynamic.partition.mode"
withTable("with_parts") {
Expand Down