From 9a81c65c5107b07145bdc555ed5fc9e2630f35c7 Mon Sep 17 00:00:00 2001 From: ulysses Date: Fri, 21 Aug 2020 14:11:14 +0800 Subject: [PATCH 01/19] init --- .../spark/sql/catalyst/catalog/SessionCatalog.scala | 11 +++++++++++ .../spark/sql/execution/command/functions.scala | 12 +++++++----- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index 6fba3156c3919..c9f237574092f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -1369,6 +1369,17 @@ class SessionCatalog( functionRegistry.registerFunction(func, info, builder) } + /** + * Make sure function class is on the classpath. + */ + def requireFunctionClassExists(funcDefinition: CatalogFunction): Unit = { + val className = funcDefinition.className + if (!Utils.classIsLoadable(className)) { + throw new AnalysisException(s"Can not load class '$className' when registering " + + s"the function '${funcDefinition.identifier}', please make sure it is on the classpath") + } + } + /** * Unregister a temporary or permanent function from a session-specific [[FunctionRegistry]] * Return true if function exists. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala index fae8de4780102..8a7cf70f17bdb 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala @@ -76,21 +76,23 @@ case class CreateFunctionCommand( override def run(sparkSession: SparkSession): Seq[Row] = { val catalog = sparkSession.sessionState.catalog val func = CatalogFunction(FunctionIdentifier(functionName, databaseName), className, resources) + catalog.loadFunctionResources(resources) + // We fail fast if function class is not exists. + catalog.requireFunctionClassExists(func) if (isTemp) { - // We first load resources and then put the builder in the function registry. - catalog.loadFunctionResources(resources) catalog.registerFunction(func, overrideIfExists = replace) } else { // Handles `CREATE OR REPLACE FUNCTION AS ... USING ...` if (replace && catalog.functionExists(func.identifier)) { - // alter the function in the metastore + // Alter the function in the metastore catalog.alterFunction(func) } else { // For a permanent, we will store the metadata into underlying external catalog. - // This function will be loaded into the FunctionRegistry when a query uses it. - // We do not load it into FunctionRegistry right now. catalog.createFunction(func, ignoreIfExists) } + // We already created permanent function when we arrive there, + // so we override the cached function if exists. + catalog.registerFunction(func, overrideIfExists = true) } Seq.empty[Row] } From ce834875e2297336e27874f70041210311899815 Mon Sep 17 00:00:00 2001 From: ulysses Date: Fri, 21 Aug 2020 14:31:37 +0800 Subject: [PATCH 02/19] test --- .../spark/sql/execution/command/DDLSuite.scala | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index 17857a6ce173d..9c083875f9415 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -3101,6 +3101,22 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { assert(spark.sessionState.catalog.isRegisteredFunction(rand)) } } + + test("SPARK-32677: Cache function directly after create") { + withUserDefinedFunction("func1" -> false) { + val func = FunctionIdentifier("func1", Some("default")) + val msg = intercept[AnalysisException] { + sql("CREATE FUNCTION func1 AS 'test.non.exists.udf'") + }.getMessage + assert(msg.contains("please make sure it is on the classpath")) + assert(!spark.sessionState.catalog.functionExists(func)) + assert(!spark.sessionState.catalog.isRegisteredFunction(func)) + + sql("CREATE FUNCTION func1 AS 'test.org.apache.spark.sql.MyDoubleAvg'") + assert(spark.sessionState.catalog.functionExists(func)) + assert(spark.sessionState.catalog.isRegisteredFunction(func)) + } + } } object FakeLocalFsFileSystem { From c75cff4ed3403abbf21a6e5037dd6a20d5cade6a Mon Sep 17 00:00:00 2001 From: ulysses Date: Fri, 21 Aug 2020 21:06:05 +0800 Subject: [PATCH 03/19] udaf --- .../src/test/resources/sql-tests/results/udaf.sql.out | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/results/udaf.sql.out b/sql/core/src/test/resources/sql-tests/results/udaf.sql.out index 9f4229a11b65d..6c687d5cdf36f 100644 --- a/sql/core/src/test/resources/sql-tests/results/udaf.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udaf.sql.out @@ -42,7 +42,8 @@ CREATE FUNCTION udaf1 AS 'test.non.existent.udaf' -- !query schema struct<> -- !query output - +org.apache.spark.sql.AnalysisException +Can not load class 'test.non.existent.udaf' when registering the function 'default.udaf1', please make sure it is on the classpath; line 1 pos 7 -- !query @@ -50,8 +51,8 @@ SELECT default.udaf1(int_col1) as udaf1 from t1 -- !query schema struct<> -- !query output -org.apache.spark.sql.AnalysisException -Can not load class 'test.non.existent.udaf' when registering the function 'default.udaf1', please make sure it is on the classpath; line 1 pos 7 +org.apache.spark.sql.catalyst.analysis.NoSuchFunctionException +Undefined function: 'udaf1'. This function is neither a registered temporary function nor a permanent function registered in the database 'default'.; line 1 pos 7 -- !query @@ -67,4 +68,5 @@ DROP FUNCTION udaf1 -- !query schema struct<> -- !query output +Function 'default.udaf1' not found in database 'default'; From def8c4e09b35b540ff6e191d7287c15e9cbcc1c5 Mon Sep 17 00:00:00 2001 From: ulysses Date: Fri, 21 Aug 2020 21:24:52 +0800 Subject: [PATCH 04/19] do not register --- .../org/apache/spark/sql/execution/command/functions.scala | 3 --- 1 file changed, 3 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala index 8a7cf70f17bdb..3a41b5e3f31c5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala @@ -90,9 +90,6 @@ case class CreateFunctionCommand( // For a permanent, we will store the metadata into underlying external catalog. catalog.createFunction(func, ignoreIfExists) } - // We already created permanent function when we arrive there, - // so we override the cached function if exists. - catalog.registerFunction(func, overrideIfExists = true) } Seq.empty[Row] } From 97dcadfd1d88b0fe62c149df1045752cc678c91c Mon Sep 17 00:00:00 2001 From: ulysses Date: Fri, 21 Aug 2020 21:30:42 +0800 Subject: [PATCH 05/19] revert comment --- .../org/apache/spark/sql/execution/command/functions.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala index 3a41b5e3f31c5..bd292651425ea 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala @@ -88,6 +88,8 @@ case class CreateFunctionCommand( catalog.alterFunction(func) } else { // For a permanent, we will store the metadata into underlying external catalog. + // This function will be loaded into the FunctionRegistry when a query uses it. + // We do not load it into FunctionRegistry right now. catalog.createFunction(func, ignoreIfExists) } } From 3e6320d75b06fd2c3d32c3b070aa5a62fcfd339b Mon Sep 17 00:00:00 2001 From: ulysses Date: Sat, 22 Aug 2020 15:53:05 +0800 Subject: [PATCH 06/19] udaf --- sql/core/src/test/resources/sql-tests/results/udaf.sql.out | 1 + 1 file changed, 1 insertion(+) diff --git a/sql/core/src/test/resources/sql-tests/results/udaf.sql.out b/sql/core/src/test/resources/sql-tests/results/udaf.sql.out index 6c687d5cdf36f..72553990f1a93 100644 --- a/sql/core/src/test/resources/sql-tests/results/udaf.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udaf.sql.out @@ -68,5 +68,6 @@ DROP FUNCTION udaf1 -- !query schema struct<> -- !query output +org.apache.spark.sql.catalyst.analysis.NoSuchPermanentFunctionException Function 'default.udaf1' not found in database 'default'; From ba248c9344bc9af42fbbab0d70182237271514a2 Mon Sep 17 00:00:00 2001 From: ulysses Date: Sun, 30 Aug 2020 19:11:32 +0800 Subject: [PATCH 07/19] HiveUDFDynamicLoadSuite --- .../org/apache/spark/sql/hive/HiveUDFDynamicLoadSuite.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUDFDynamicLoadSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUDFDynamicLoadSuite.scala index ee8e6f4f78be5..cd8c02b405e7b 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUDFDynamicLoadSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUDFDynamicLoadSuite.scala @@ -156,7 +156,9 @@ class HiveUDFDynamicLoadSuite extends QueryTest with SQLTestUtils with TestHiveS sql(s"CREATE FUNCTION ${udfInfo.funcName} AS '${udfInfo.className}' USING JAR '$jarUrl'") - assert(Thread.currentThread().getContextClassLoader eq sparkClassLoader) + assert(Thread.currentThread().getContextClassLoader ne sparkClassLoader) + assert(Thread.currentThread().getContextClassLoader eq + spark.sqlContext.sharedState.jarClassLoader) // JAR will be loaded at first usage, and it will change the current thread's // context classloader to jar classloader in sharedState. From 9fb3c13be2fdfc670bff7c3f88abdd7f6b4e863a Mon Sep 17 00:00:00 2001 From: ulysses Date: Sun, 30 Aug 2020 20:57:27 +0800 Subject: [PATCH 08/19] fix --- .../org/apache/spark/sql/execution/command/DDLSuite.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index 9c083875f9415..9a12df336fb46 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -3102,7 +3102,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { } } - test("SPARK-32677: Cache function directly after create") { + test("SPARK-32677: Load function resource before create") { withUserDefinedFunction("func1" -> false) { val func = FunctionIdentifier("func1", Some("default")) val msg = intercept[AnalysisException] { @@ -3114,7 +3114,6 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { sql("CREATE FUNCTION func1 AS 'test.org.apache.spark.sql.MyDoubleAvg'") assert(spark.sessionState.catalog.functionExists(func)) - assert(spark.sessionState.catalog.isRegisteredFunction(func)) } } } From fe98b71e78006a809eb1ecec5622567228962d39 Mon Sep 17 00:00:00 2001 From: ulysses Date: Mon, 31 Aug 2020 08:17:01 +0800 Subject: [PATCH 09/19] udf-udaf --- .../resources/sql-tests/results/udf/udf-udaf.sql.out | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-udaf.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-udaf.sql.out index 19221947b4a88..2343d352fa33c 100644 --- a/sql/core/src/test/resources/sql-tests/results/udf/udf-udaf.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-udaf.sql.out @@ -42,7 +42,8 @@ CREATE FUNCTION udaf1 AS 'test.non.existent.udaf' -- !query schema struct<> -- !query output - +org.apache.spark.sql.AnalysisException +Can not load class 'test.non.existent.udaf' when registering the function 'default.udaf1', please make sure it is on the classpath; line 1 pos 7 -- !query @@ -50,8 +51,8 @@ SELECT default.udaf1(udf(int_col1)) as udaf1, udf(default.udaf1(udf(int_col1))) -- !query schema struct<> -- !query output -org.apache.spark.sql.AnalysisException -Can not load class 'test.non.existent.udaf' when registering the function 'default.udaf1', please make sure it is on the classpath; line 1 pos 94 +org.apache.spark.sql.catalyst.analysis.NoSuchFunctionException +Undefined function: 'udaf1'. This function is neither a registered temporary function nor a permanent function registered in the database 'default'.; line 1 pos 7 -- !query @@ -67,4 +68,6 @@ DROP FUNCTION udaf1 -- !query schema struct<> -- !query output +org.apache.spark.sql.catalyst.analysis.NoSuchPermanentFunctionException +Function 'default.udaf1' not found in database 'default'; From fe776986065eb3ddda71d947e9483b72b0666401 Mon Sep 17 00:00:00 2001 From: ulysses Date: Mon, 31 Aug 2020 12:14:16 +0800 Subject: [PATCH 10/19] qualified name --- .../apache/spark/sql/catalyst/catalog/SessionCatalog.scala | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index c9f237574092f..adf23ceb3aeb4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -1375,8 +1375,11 @@ class SessionCatalog( def requireFunctionClassExists(funcDefinition: CatalogFunction): Unit = { val className = funcDefinition.className if (!Utils.classIsLoadable(className)) { + val database = + formatDatabaseName(funcDefinition.identifier.database.getOrElse(getCurrentDatabase)) + val qualifiedName = funcDefinition.identifier.copy(database = Some(database)) throw new AnalysisException(s"Can not load class '$className' when registering " + - s"the function '${funcDefinition.identifier}', please make sure it is on the classpath") + s"the function '$qualifiedName', please make sure it is on the classpath") } } From 4750e2e1ada5b9d1decd2bb220dd314737f5dad2 Mon Sep 17 00:00:00 2001 From: ulysses Date: Mon, 31 Aug 2020 18:30:12 +0800 Subject: [PATCH 11/19] fix --- sql/core/src/test/resources/sql-tests/results/udaf.sql.out | 2 +- .../src/test/resources/sql-tests/results/udf/udf-udaf.sql.out | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/results/udaf.sql.out b/sql/core/src/test/resources/sql-tests/results/udaf.sql.out index 72553990f1a93..8c3cb24265e73 100644 --- a/sql/core/src/test/resources/sql-tests/results/udaf.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udaf.sql.out @@ -43,7 +43,7 @@ CREATE FUNCTION udaf1 AS 'test.non.existent.udaf' struct<> -- !query output org.apache.spark.sql.AnalysisException -Can not load class 'test.non.existent.udaf' when registering the function 'default.udaf1', please make sure it is on the classpath; line 1 pos 7 +Can not load class 'test.non.existent.udaf' when registering the function 'default.udaf1', please make sure it is on the classpath; -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-udaf.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-udaf.sql.out index 2343d352fa33c..2ba3f813fa5ee 100644 --- a/sql/core/src/test/resources/sql-tests/results/udf/udf-udaf.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-udaf.sql.out @@ -43,7 +43,7 @@ CREATE FUNCTION udaf1 AS 'test.non.existent.udaf' struct<> -- !query output org.apache.spark.sql.AnalysisException -Can not load class 'test.non.existent.udaf' when registering the function 'default.udaf1', please make sure it is on the classpath; line 1 pos 7 +Can not load class 'test.non.existent.udaf' when registering the function 'default.udaf1', please make sure it is on the classpath; -- !query From 882942f68d3483f563dd1c00536413018e6d5855 Mon Sep 17 00:00:00 2001 From: ulysses Date: Mon, 31 Aug 2020 22:10:31 +0800 Subject: [PATCH 12/19] fix --- sql/core/src/test/resources/sql-tests/results/udaf.sql.out | 2 +- .../src/test/resources/sql-tests/results/udf/udf-udaf.sql.out | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/results/udaf.sql.out b/sql/core/src/test/resources/sql-tests/results/udaf.sql.out index 8c3cb24265e73..3bed62f220af1 100644 --- a/sql/core/src/test/resources/sql-tests/results/udaf.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udaf.sql.out @@ -51,7 +51,7 @@ SELECT default.udaf1(int_col1) as udaf1 from t1 -- !query schema struct<> -- !query output -org.apache.spark.sql.catalyst.analysis.NoSuchFunctionException +org.apache.spark.sql.AnalysisException Undefined function: 'udaf1'. This function is neither a registered temporary function nor a permanent function registered in the database 'default'.; line 1 pos 7 diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-udaf.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-udaf.sql.out index 2ba3f813fa5ee..d1cce8b7b79f2 100644 --- a/sql/core/src/test/resources/sql-tests/results/udf/udf-udaf.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-udaf.sql.out @@ -51,7 +51,7 @@ SELECT default.udaf1(udf(int_col1)) as udaf1, udf(default.udaf1(udf(int_col1))) -- !query schema struct<> -- !query output -org.apache.spark.sql.catalyst.analysis.NoSuchFunctionException +org.apache.spark.sql.AnalysisException Undefined function: 'udaf1'. This function is neither a registered temporary function nor a permanent function registered in the database 'default'.; line 1 pos 7 From d55b8bc41d8116e3f250a14fc29f6321216d1e37 Mon Sep 17 00:00:00 2001 From: ulysses Date: Tue, 1 Sep 2020 08:17:19 +0800 Subject: [PATCH 13/19] fix py --- python/pyspark/sql/tests/test_catalog.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/pyspark/sql/tests/test_catalog.py b/python/pyspark/sql/tests/test_catalog.py index 141b249db0fc6..0d7fcfab5a4a4 100644 --- a/python/pyspark/sql/tests/test_catalog.py +++ b/python/pyspark/sql/tests/test_catalog.py @@ -128,8 +128,8 @@ def test_list_functions(self): with self.function("func1", "some_db.func2"): spark.catalog.registerFunction("temp_func", lambda x: str(x)) - spark.sql("CREATE FUNCTION func1 AS 'org.apache.spark.data.bricks'") - spark.sql("CREATE FUNCTION some_db.func2 AS 'org.apache.spark.data.bricks'") + spark.sql("CREATE FUNCTION func1 AS 'test.org.apache.spark.sql.MyDoubleAvg'") + spark.sql("CREATE FUNCTION some_db.func2 AS 'test.org.apache.spark.sql.MyDoubleAvg'") newFunctions = dict((f.name, f) for f in spark.catalog.listFunctions()) newFunctionsSomeDb = \ dict((f.name, f) for f in spark.catalog.listFunctions("some_db")) From 75e61e9ed1576a029bda51baedc650a959d226ca Mon Sep 17 00:00:00 2001 From: ulysses Date: Tue, 1 Sep 2020 08:25:44 +0800 Subject: [PATCH 14/19] style --- python/pyspark/sql/tests/test_catalog.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/pyspark/sql/tests/test_catalog.py b/python/pyspark/sql/tests/test_catalog.py index 0d7fcfab5a4a4..4d52c7012a8f5 100644 --- a/python/pyspark/sql/tests/test_catalog.py +++ b/python/pyspark/sql/tests/test_catalog.py @@ -129,7 +129,8 @@ def test_list_functions(self): with self.function("func1", "some_db.func2"): spark.catalog.registerFunction("temp_func", lambda x: str(x)) spark.sql("CREATE FUNCTION func1 AS 'test.org.apache.spark.sql.MyDoubleAvg'") - spark.sql("CREATE FUNCTION some_db.func2 AS 'test.org.apache.spark.sql.MyDoubleAvg'") + spark.sql( + "CREATE FUNCTION some_db.func2 AS 'test.org.apache.spark.sql.MyDoubleAvg'") newFunctions = dict((f.name, f) for f in spark.catalog.listFunctions()) newFunctionsSomeDb = \ dict((f.name, f) for f in spark.catalog.listFunctions("some_db")) From bb6d742adbd08350a5014ae4ee87bbf38007c546 Mon Sep 17 00:00:00 2001 From: ulysses Date: Thu, 3 Sep 2020 08:27:20 +0800 Subject: [PATCH 15/19] fix --- .../spark/sql/catalyst/catalog/SessionCatalog.scala | 13 +++---------- .../test/resources/sql-tests/results/udaf.sql.out | 2 +- .../sql-tests/results/udf/udf-udaf.sql.out | 2 +- 3 files changed, 5 insertions(+), 12 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index adf23ceb3aeb4..bc13bcea53292 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -1359,12 +1359,8 @@ class SessionCatalog( val info = new ExpressionInfo(funcDefinition.className, func.database.orNull, func.funcName) val builder = functionBuilder.getOrElse { - val className = funcDefinition.className - if (!Utils.classIsLoadable(className)) { - throw new AnalysisException(s"Can not load class '$className' when registering " + - s"the function '$func', please make sure it is on the classpath") - } - makeFunctionBuilder(func.unquotedString, className) + requireFunctionClassExists(funcDefinition) + makeFunctionBuilder(func.unquotedString, funcDefinition.className) } functionRegistry.registerFunction(func, info, builder) } @@ -1375,11 +1371,8 @@ class SessionCatalog( def requireFunctionClassExists(funcDefinition: CatalogFunction): Unit = { val className = funcDefinition.className if (!Utils.classIsLoadable(className)) { - val database = - formatDatabaseName(funcDefinition.identifier.database.getOrElse(getCurrentDatabase)) - val qualifiedName = funcDefinition.identifier.copy(database = Some(database)) throw new AnalysisException(s"Can not load class '$className' when registering " + - s"the function '$qualifiedName', please make sure it is on the classpath") + s"the function '${funcDefinition.identifier}', please make sure it is on the classpath") } } diff --git a/sql/core/src/test/resources/sql-tests/results/udaf.sql.out b/sql/core/src/test/resources/sql-tests/results/udaf.sql.out index 3bed62f220af1..65513f173c2b8 100644 --- a/sql/core/src/test/resources/sql-tests/results/udaf.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udaf.sql.out @@ -43,7 +43,7 @@ CREATE FUNCTION udaf1 AS 'test.non.existent.udaf' struct<> -- !query output org.apache.spark.sql.AnalysisException -Can not load class 'test.non.existent.udaf' when registering the function 'default.udaf1', please make sure it is on the classpath; +Can not load class 'test.non.existent.udaf' when registering the function 'udaf1', please make sure it is on the classpath; -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-udaf.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-udaf.sql.out index d1cce8b7b79f2..6592e33342a15 100644 --- a/sql/core/src/test/resources/sql-tests/results/udf/udf-udaf.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-udaf.sql.out @@ -43,7 +43,7 @@ CREATE FUNCTION udaf1 AS 'test.non.existent.udaf' struct<> -- !query output org.apache.spark.sql.AnalysisException -Can not load class 'test.non.existent.udaf' when registering the function 'default.udaf1', please make sure it is on the classpath; +Can not load class 'test.non.existent.udaf' when registering the function 'udaf1', please make sure it is on the classpath; -- !query From 438291bf0cdb3ec9ea703dd9d81d6b53bf92d87b Mon Sep 17 00:00:00 2001 From: ulysses Date: Thu, 3 Sep 2020 13:51:42 +0800 Subject: [PATCH 16/19] only check permanent --- .../org/apache/spark/sql/execution/command/functions.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala index bd292651425ea..b1c056d8051e6 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala @@ -77,11 +77,11 @@ case class CreateFunctionCommand( val catalog = sparkSession.sessionState.catalog val func = CatalogFunction(FunctionIdentifier(functionName, databaseName), className, resources) catalog.loadFunctionResources(resources) - // We fail fast if function class is not exists. - catalog.requireFunctionClassExists(func) if (isTemp) { catalog.registerFunction(func, overrideIfExists = replace) } else { + // We fail fast if function class is not exists. + catalog.requireFunctionClassExists(func) // Handles `CREATE OR REPLACE FUNCTION AS ... USING ...` if (replace && catalog.functionExists(func.identifier)) { // Alter the function in the metastore From 6584f39a924c8d9502441049992bed15529c6925 Mon Sep 17 00:00:00 2001 From: ulysses Date: Thu, 3 Sep 2020 13:52:28 +0800 Subject: [PATCH 17/19] error msg --- .../org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index bc13bcea53292..d0282f785463b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -1372,7 +1372,8 @@ class SessionCatalog( val className = funcDefinition.className if (!Utils.classIsLoadable(className)) { throw new AnalysisException(s"Can not load class '$className' when registering " + - s"the function '${funcDefinition.identifier}', please make sure it is on the classpath") + s"the function '${funcDefinition.identifier.unquotedString}', please make sure " + + s"it is on the classpath") } } From 701c25df05ce9adcb1a8bb005804f31119c0b458 Mon Sep 17 00:00:00 2001 From: ulysses Date: Sat, 5 Sep 2020 20:20:51 +0800 Subject: [PATCH 18/19] fill database name --- .../org/apache/spark/sql/execution/command/functions.scala | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala index b1c056d8051e6..af485c0b1d491 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala @@ -75,11 +75,15 @@ case class CreateFunctionCommand( override def run(sparkSession: SparkSession): Seq[Row] = { val catalog = sparkSession.sessionState.catalog - val func = CatalogFunction(FunctionIdentifier(functionName, databaseName), className, resources) catalog.loadFunctionResources(resources) if (isTemp) { + val func = CatalogFunction(FunctionIdentifier(functionName, databaseName), + className, resources) catalog.registerFunction(func, overrideIfExists = replace) } else { + // For a permanent, we fill database name first. + val func = CatalogFunction(FunctionIdentifier(functionName, + Some(databaseName.getOrElse(catalog.getCurrentDatabase))), className, resources) // We fail fast if function class is not exists. catalog.requireFunctionClassExists(func) // Handles `CREATE OR REPLACE FUNCTION AS ... USING ...` From 3cf971b5603b236f0a877d4804245742de94cbd5 Mon Sep 17 00:00:00 2001 From: ulysses Date: Sat, 5 Sep 2020 20:22:36 +0800 Subject: [PATCH 19/19] fix ut --- sql/core/src/test/resources/sql-tests/results/udaf.sql.out | 2 +- .../src/test/resources/sql-tests/results/udf/udf-udaf.sql.out | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/results/udaf.sql.out b/sql/core/src/test/resources/sql-tests/results/udaf.sql.out index 65513f173c2b8..3bed62f220af1 100644 --- a/sql/core/src/test/resources/sql-tests/results/udaf.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udaf.sql.out @@ -43,7 +43,7 @@ CREATE FUNCTION udaf1 AS 'test.non.existent.udaf' struct<> -- !query output org.apache.spark.sql.AnalysisException -Can not load class 'test.non.existent.udaf' when registering the function 'udaf1', please make sure it is on the classpath; +Can not load class 'test.non.existent.udaf' when registering the function 'default.udaf1', please make sure it is on the classpath; -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-udaf.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-udaf.sql.out index 6592e33342a15..d1cce8b7b79f2 100644 --- a/sql/core/src/test/resources/sql-tests/results/udf/udf-udaf.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-udaf.sql.out @@ -43,7 +43,7 @@ CREATE FUNCTION udaf1 AS 'test.non.existent.udaf' struct<> -- !query output org.apache.spark.sql.AnalysisException -Can not load class 'test.non.existent.udaf' when registering the function 'udaf1', please make sure it is on the classpath; +Can not load class 'test.non.existent.udaf' when registering the function 'default.udaf1', please make sure it is on the classpath; -- !query