From 118af1455456d021496ebff82ec1e0e2398e67c6 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Mon, 1 Nov 2021 10:18:29 +0800 Subject: [PATCH 1/9] Update SparkGetFunctionsOperation.scala --- .../SparkGetFunctionsOperation.scala | 24 ++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetFunctionsOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetFunctionsOperation.scala index 352528e26e318..150b3218eed2b 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetFunctionsOperation.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetFunctionsOperation.scala @@ -29,6 +29,8 @@ import org.apache.hive.service.cli.session.HiveSession import org.apache.spark.internal.Logging import org.apache.spark.sql.SQLContext +import org.apache.spark.sql.catalyst.FunctionIdentifier +import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, TableFunctionRegistry} /** * Spark's own GetFunctionsOperation @@ -80,9 +82,18 @@ private[hive] class SparkGetFunctionsOperation( parentSession.getUsername) try { + val builtInFunctions = FunctionRegistry.functionSet ++ TableFunctionRegistry.functionSet + var matchedBuiltInFunctions = if (matchingDbs.nonEmpty && functionPattern == "*") { + FunctionRegistry.functionSet ++ TableFunctionRegistry.functionSet + } else { + Set.empty[FunctionIdentifier] + } matchingDbs.foreach { db => catalog.listFunctions(db, functionPattern).foreach { - case (funcIdentifier, _) => + case (functionIdentifier, _) if builtInFunctions.contains(functionIdentifier) && + !matchedBuiltInFunctions.contains(functionIdentifier) => + matchedBuiltInFunctions += functionIdentifier + case (funcIdentifier, _) if !builtInFunctions.contains(funcIdentifier) => val info = catalog.lookupFunctionInfo(funcIdentifier) val rowData = Array[AnyRef]( DEFAULT_HIVE_CATALOG, // FUNCTION_CAT @@ -94,6 +105,17 @@ private[hive] class SparkGetFunctionsOperation( rowSet.addRow(rowData); } } + matchedBuiltInFunctions.foreach { functionIdentifier => + val info = catalog.lookupFunctionInfo(functionIdentifier) + val rowData = Array[AnyRef]( + DEFAULT_HIVE_CATALOG, // FUNCTION_CAT + "builtin", // FUNCTION_SCHEM + functionIdentifier.funcName, // FUNCTION_NAME + s"Usage: ${info.getUsage}\nExtended Usage:${info.getExtended}", // REMARKS + DatabaseMetaData.functionResultUnknown.asInstanceOf[AnyRef], // FUNCTION_TYPE + info.getClassName) // SPECIFIC_NAME + rowSet.addRow(rowData); + } setState(OperationState.FINISHED) } catch onError() From a1b21fecabdc0496ded429c74de53636ee1ed657 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Mon, 1 Nov 2021 13:11:59 +0800 Subject: [PATCH 2/9] Update SparkGetFunctionsOperation.scala --- .../thriftserver/SparkGetFunctionsOperation.scala | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetFunctionsOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetFunctionsOperation.scala index 150b3218eed2b..ce20bf8981e9c 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetFunctionsOperation.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetFunctionsOperation.scala @@ -82,7 +82,6 @@ private[hive] class SparkGetFunctionsOperation( parentSession.getUsername) try { - val builtInFunctions = FunctionRegistry.functionSet ++ TableFunctionRegistry.functionSet var matchedBuiltInFunctions = if (matchingDbs.nonEmpty && functionPattern == "*") { FunctionRegistry.functionSet ++ TableFunctionRegistry.functionSet } else { @@ -90,14 +89,15 @@ private[hive] class SparkGetFunctionsOperation( } matchingDbs.foreach { db => catalog.listFunctions(db, functionPattern).foreach { - case (functionIdentifier, _) if builtInFunctions.contains(functionIdentifier) && - !matchedBuiltInFunctions.contains(functionIdentifier) => - matchedBuiltInFunctions += functionIdentifier - case (funcIdentifier, _) if !builtInFunctions.contains(funcIdentifier) => + case (funcIdentifier, "SYSTEM") => + if (!matchedBuiltInFunctions.contains(funcIdentifier)) { + matchedBuiltInFunctions += funcIdentifier + } + case (funcIdentifier, _) => val info = catalog.lookupFunctionInfo(funcIdentifier) val rowData = Array[AnyRef]( DEFAULT_HIVE_CATALOG, // FUNCTION_CAT - db, // FUNCTION_SCHEM + db, // FUNCTION_SCHEMA funcIdentifier.funcName, // FUNCTION_NAME s"Usage: ${info.getUsage}\nExtended Usage:${info.getExtended}", // REMARKS DatabaseMetaData.functionResultUnknown.asInstanceOf[AnyRef], // FUNCTION_TYPE @@ -109,7 +109,7 @@ private[hive] class SparkGetFunctionsOperation( val info = catalog.lookupFunctionInfo(functionIdentifier) val rowData = Array[AnyRef]( DEFAULT_HIVE_CATALOG, // FUNCTION_CAT - "builtin", // FUNCTION_SCHEM + "SYSTEM", // FUNCTION_SCHEMA functionIdentifier.funcName, // FUNCTION_NAME s"Usage: ${info.getUsage}\nExtended Usage:${info.getExtended}", // REMARKS DatabaseMetaData.functionResultUnknown.asInstanceOf[AnyRef], // FUNCTION_TYPE From 3362038d8803aa6af579c626b67623e11a494055 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Mon, 1 Nov 2021 13:13:19 +0800 Subject: [PATCH 3/9] Update SparkMetadataOperationSuite.scala --- .../sql/hive/thriftserver/SparkMetadataOperationSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala index a3f1a064f073a..a6483dec4738f 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala @@ -214,7 +214,7 @@ class SparkMetadataOperationSuite extends HiveThriftServer2TestBase { functionNames.foreach { func => val exprInfo = FunctionRegistry.expressions(func)._1 assert(rs.next()) - assert(rs.getString("FUNCTION_SCHEM") === "default") + assert(rs.getString("FUNCTION_SCHEM") === "SYSTEM") assert(rs.getString("FUNCTION_NAME") === exprInfo.getName) assert(rs.getString("REMARKS") === s"Usage: ${exprInfo.getUsage}\nExtended Usage:${exprInfo.getExtended}") From 29da65a68305715c79d87e8cc09388ec9b8beef6 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Wed, 15 Dec 2021 17:50:20 +0800 Subject: [PATCH 4/9] update --- .../apache/spark/sql/internal/SQLConf.scala | 7 +++++++ .../SparkGetFunctionsOperation.scala | 12 ++++++++---- .../SparkMetadataOperationSuite.scala | 19 +++++++++++++++++-- 3 files changed, 32 insertions(+), 6 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 6c6fb40bdee87..ce79376d77089 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -1158,6 +1158,13 @@ object SQLConf { .intConf .createWithDefault(200) + val THRIFTSERVER_SEPARATE_DISPLAY_SYSTEM_FUNCTION = + buildConf("spark.sql.thriftserver.separateDisplaySystemFunctions") + .doc("") + .version("3.3.0") + .booleanConf + .createWithDefault(true) + // This is used to set the default data source val DEFAULT_DATA_SOURCE_NAME = buildConf("spark.sql.sources.default") .doc("The default data source to use in input/output.") diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetFunctionsOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetFunctionsOperation.scala index ce20bf8981e9c..2cd44489498b6 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetFunctionsOperation.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetFunctionsOperation.scala @@ -31,6 +31,7 @@ import org.apache.spark.internal.Logging import org.apache.spark.sql.SQLContext import org.apache.spark.sql.catalyst.FunctionIdentifier import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, TableFunctionRegistry} +import org.apache.spark.sql.internal.SQLConf /** * Spark's own GetFunctionsOperation @@ -82,14 +83,17 @@ private[hive] class SparkGetFunctionsOperation( parentSession.getUsername) try { - var matchedBuiltInFunctions = if (matchingDbs.nonEmpty && functionPattern == "*") { + val separateDisplaySystemFunctions = + sqlContext.conf.getConf(SQLConf.THRIFTSERVER_SEPARATE_DISPLAY_SYSTEM_FUNCTION) + var matchedBuiltInFunctions = if (separateDisplaySystemFunctions && functionPattern == "*" + && matchingDbs.nonEmpty) { FunctionRegistry.functionSet ++ TableFunctionRegistry.functionSet } else { Set.empty[FunctionIdentifier] } matchingDbs.foreach { db => catalog.listFunctions(db, functionPattern).foreach { - case (funcIdentifier, "SYSTEM") => + case (funcIdentifier, "SYSTEM") if separateDisplaySystemFunctions => if (!matchedBuiltInFunctions.contains(funcIdentifier)) { matchedBuiltInFunctions += funcIdentifier } @@ -97,7 +101,7 @@ private[hive] class SparkGetFunctionsOperation( val info = catalog.lookupFunctionInfo(funcIdentifier) val rowData = Array[AnyRef]( DEFAULT_HIVE_CATALOG, // FUNCTION_CAT - db, // FUNCTION_SCHEMA + db, // FUNCTION_SCHEM funcIdentifier.funcName, // FUNCTION_NAME s"Usage: ${info.getUsage}\nExtended Usage:${info.getExtended}", // REMARKS DatabaseMetaData.functionResultUnknown.asInstanceOf[AnyRef], // FUNCTION_TYPE @@ -109,7 +113,7 @@ private[hive] class SparkGetFunctionsOperation( val info = catalog.lookupFunctionInfo(functionIdentifier) val rowData = Array[AnyRef]( DEFAULT_HIVE_CATALOG, // FUNCTION_CAT - "SYSTEM", // FUNCTION_SCHEMA + "SYSTEM", // FUNCTION_SCHEM functionIdentifier.funcName, // FUNCTION_NAME s"Usage: ${info.getUsage}\nExtended Usage:${info.getExtended}", // REMARKS DatabaseMetaData.functionResultUnknown.asInstanceOf[AnyRef], // FUNCTION_TYPE diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala index a6483dec4738f..f1aa31e070e2c 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala @@ -210,11 +210,14 @@ class SparkMetadataOperationSuite extends HiveThriftServer2TestBase { } test("Spark's own GetFunctionsOperation(SparkGetFunctionsOperation)") { - def checkResult(rs: ResultSet, functionNames: Seq[String]): Unit = { + def checkResult( + rs: ResultSet, + functionNames: Seq[String], + functionSchema: String = "default"): Unit = { functionNames.foreach { func => val exprInfo = FunctionRegistry.expressions(func)._1 assert(rs.next()) - assert(rs.getString("FUNCTION_SCHEM") === "SYSTEM") + assert(rs.getString("FUNCTION_SCHEM") === functionSchema) assert(rs.getString("FUNCTION_NAME") === exprInfo.getName) assert(rs.getString("REMARKS") === s"Usage: ${exprInfo.getUsage}\nExtended Usage:${exprInfo.getExtended}") @@ -226,6 +229,7 @@ class SparkMetadataOperationSuite extends HiveThriftServer2TestBase { } withJdbcStatement() { statement => + statement.execute(s"SET ${SQLConf.THRIFTSERVER_SEPARATE_DISPLAY_SYSTEM_FUNCTION.key}=false") val metaData = statement.getConnection.getMetaData // Hive does not have an overlay function, we use overlay to test. checkResult(metaData.getFunctions(null, null, "overlay"), Seq("overlay")) @@ -236,6 +240,17 @@ class SparkMetadataOperationSuite extends HiveThriftServer2TestBase { checkResult(metaData.getFunctions(null, "default", "shift*"), Seq("shiftleft", "shiftright", "shiftrightunsigned")) checkResult(metaData.getFunctions(null, "default", "upPer"), Seq("upper")) + + statement.execute(s"SET ${SQLConf.THRIFTSERVER_SEPARATE_DISPLAY_SYSTEM_FUNCTION.key}=true") + checkResult(metaData.getFunctions(null, null, "overlay"), Seq("overlay"), "SYSTEM") + checkResult(metaData.getFunctions(null, null, "overla*"), Seq("overlay"), "SYSTEM") + checkResult(metaData.getFunctions(null, "", "overla*"), Seq("overlay"), "SYSTEM") + checkResult(metaData.getFunctions(null, null, "does-not-exist*"), Seq.empty, "SYSTEM") + checkResult(metaData.getFunctions(null, "default", "overlay"), Seq("overlay"), "SYSTEM") + checkResult(metaData.getFunctions(null, "default", "shift*"), + Seq("shiftleft", "shiftright", "shiftrightunsigned"), "SYSTEM") + checkResult(metaData.getFunctions(null, "default", "upPer"), Seq("upper"), "SYSTEM") + } } From 4bbc8cc3f1450ea1b13e7e8b67fc2bee592f7f0a Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Wed, 15 Dec 2021 18:02:04 +0800 Subject: [PATCH 5/9] update --- docs/sql-migration-guide.md | 2 ++ .../src/main/scala/org/apache/spark/sql/internal/SQLConf.scala | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index 51f3bd36ac60b..f2af4c114af4b 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -54,6 +54,8 @@ license: | - Since Spark 3.3, nulls are written as empty strings in CSV data source by default. In Spark 3.2 or earlier, nulls were written as empty strings as quoted empty strings, `""`. To restore the previous behavior, set `nullValue` to `""`. + - Since Spark 3.3, Spark Thrift Server will return databases' system functions metadata only once, and Spark will change function schema as `SYSTEM`. In Spark 3.2 or earlier, Spark Thrift Server will return system functions metadata for all databases. To restore the behavior before Spark 3.3, yo you can set `spark.sql.thriftserver.separateDisplaySystemFunctions` to `true`. + ## Upgrading from Spark SQL 3.1 to 3.2 - Since Spark 3.2, ADD FILE/JAR/ARCHIVE commands require each path to be enclosed by `"` or `'` if the path contains whitespaces. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index ce79376d77089..a676cbac6df45 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -1160,7 +1160,8 @@ object SQLConf { val THRIFTSERVER_SEPARATE_DISPLAY_SYSTEM_FUNCTION = buildConf("spark.sql.thriftserver.separateDisplaySystemFunctions") - .doc("") + .doc("When true, Spark Thrift Server will return databases' system functions metadata " + + "only once, and Spark will change function schema as `SYSTEM`.") .version("3.3.0") .booleanConf .createWithDefault(true) From 89bb67847f07d1da900ff42114b394153e8fa2fd Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Wed, 15 Dec 2021 18:08:13 +0800 Subject: [PATCH 6/9] Update sql-migration-guide.md --- docs/sql-migration-guide.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index f2af4c114af4b..57e4618f664ea 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -54,7 +54,7 @@ license: | - Since Spark 3.3, nulls are written as empty strings in CSV data source by default. In Spark 3.2 or earlier, nulls were written as empty strings as quoted empty strings, `""`. To restore the previous behavior, set `nullValue` to `""`. - - Since Spark 3.3, Spark Thrift Server will return databases' system functions metadata only once, and Spark will change function schema as `SYSTEM`. In Spark 3.2 or earlier, Spark Thrift Server will return system functions metadata for all databases. To restore the behavior before Spark 3.3, yo you can set `spark.sql.thriftserver.separateDisplaySystemFunctions` to `true`. + - Since Spark 3.3, Spark Thrift Server will return databases' system functions metadata only once, and Spark will change function schema as `SYSTEM`. In Spark 3.2 or earlier, Spark Thrift Server will return system functions metadata for all databases. To restore the behavior before Spark 3.3, yo you can set `spark.sql.thriftserver.separateDisplaySystemFunctions` to `false`. ## Upgrading from Spark SQL 3.1 to 3.2 From 479d533e9b14417e23250d621db191c706daf2b7 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Thu, 16 Dec 2021 14:47:12 +0800 Subject: [PATCH 7/9] Follow comment --- .../catalyst/analysis/FunctionRegistry.scala | 3 +++ .../sql/catalyst/catalog/SessionCatalog.scala | 8 ++++--- .../sql/execution/command/functions.scala | 5 +++-- .../SparkGetFunctionsOperation.scala | 7 +++--- .../SparkMetadataOperationSuite.scala | 22 ++++++++++++------- 5 files changed, 29 insertions(+), 16 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index f9898b1a14f61..2c11d2f7790fe 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -319,6 +319,9 @@ object FunctionRegistry { val FUNC_ALIAS = TreeNodeTag[String]("functionAliasName") + val builtinFunctionScope = "SYSTEM" + val userFunctionScope = "USER" + // Note: Whenever we add a new entry here, make sure we also update ExpressionToSQLSuite val expressions: Map[String, (ExpressionInfo, FunctionBuilder)] = Map( // misc non-aggregate functions diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index 60f68fb8be61a..8d4de16f1fefb 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -1699,9 +1699,11 @@ class SessionCatalog( // The session catalog caches some persistent functions in the FunctionRegistry // so there can be duplicates. functions.map { - case f if FunctionRegistry.functionSet.contains(f) => (f, "SYSTEM") - case f if TableFunctionRegistry.functionSet.contains(f) => (f, "SYSTEM") - case f => (f, "USER") + case f if FunctionRegistry.functionSet.contains(f) => + (f, FunctionRegistry.builtinFunctionScope) + case f if TableFunctionRegistry.functionSet.contains(f) => + (f, FunctionRegistry.builtinFunctionScope) + case f => (f, FunctionRegistry.userFunctionScope) }.distinct } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala index 09aa56993943f..bb2721b83d4ce 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala @@ -208,8 +208,9 @@ case class ShowFunctionsCommand( sparkSession.sessionState.catalog .listFunctions(dbName, pattern.getOrElse("*")) .collect { - case (f, "USER") if showUserFunctions => f.unquotedString - case (f, "SYSTEM") if showSystemFunctions => f.unquotedString + case (f, FunctionRegistry.userFunctionScope) if showUserFunctions => f.unquotedString + case (f, FunctionRegistry.builtinFunctionScope) if showSystemFunctions => + f.unquotedString } // Hard code "<>", "!=", "between", "case", and "||" // for now as there is no corresponding functions. diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetFunctionsOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetFunctionsOperation.scala index 2cd44489498b6..1e86299f2cb31 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetFunctionsOperation.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetFunctionsOperation.scala @@ -86,14 +86,15 @@ private[hive] class SparkGetFunctionsOperation( val separateDisplaySystemFunctions = sqlContext.conf.getConf(SQLConf.THRIFTSERVER_SEPARATE_DISPLAY_SYSTEM_FUNCTION) var matchedBuiltInFunctions = if (separateDisplaySystemFunctions && functionPattern == "*" - && matchingDbs.nonEmpty) { + && matchingDbs.nonEmpty) { FunctionRegistry.functionSet ++ TableFunctionRegistry.functionSet } else { Set.empty[FunctionIdentifier] } matchingDbs.foreach { db => catalog.listFunctions(db, functionPattern).foreach { - case (funcIdentifier, "SYSTEM") if separateDisplaySystemFunctions => + case (funcIdentifier, FunctionRegistry.`builtinFunctionScope`) + if separateDisplaySystemFunctions => if (!matchedBuiltInFunctions.contains(funcIdentifier)) { matchedBuiltInFunctions += funcIdentifier } @@ -113,7 +114,7 @@ private[hive] class SparkGetFunctionsOperation( val info = catalog.lookupFunctionInfo(functionIdentifier) val rowData = Array[AnyRef]( DEFAULT_HIVE_CATALOG, // FUNCTION_CAT - "SYSTEM", // FUNCTION_SCHEM + FunctionRegistry.builtinFunctionScope, // FUNCTION_SCHEM functionIdentifier.funcName, // FUNCTION_NAME s"Usage: ${info.getUsage}\nExtended Usage:${info.getExtended}", // REMARKS DatabaseMetaData.functionResultUnknown.asInstanceOf[AnyRef], // FUNCTION_TYPE diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala index f1aa31e070e2c..903b6fc8f95cd 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala @@ -242,15 +242,21 @@ class SparkMetadataOperationSuite extends HiveThriftServer2TestBase { checkResult(metaData.getFunctions(null, "default", "upPer"), Seq("upper")) statement.execute(s"SET ${SQLConf.THRIFTSERVER_SEPARATE_DISPLAY_SYSTEM_FUNCTION.key}=true") - checkResult(metaData.getFunctions(null, null, "overlay"), Seq("overlay"), "SYSTEM") - checkResult(metaData.getFunctions(null, null, "overla*"), Seq("overlay"), "SYSTEM") - checkResult(metaData.getFunctions(null, "", "overla*"), Seq("overlay"), "SYSTEM") - checkResult(metaData.getFunctions(null, null, "does-not-exist*"), Seq.empty, "SYSTEM") - checkResult(metaData.getFunctions(null, "default", "overlay"), Seq("overlay"), "SYSTEM") + checkResult(metaData.getFunctions(null, null, "overlay"), Seq("overlay"), + FunctionRegistry.builtinFunctionScope) + checkResult(metaData.getFunctions(null, null, "overla*"), Seq("overlay"), + FunctionRegistry.builtinFunctionScope) + checkResult(metaData.getFunctions(null, "", "overla*"), Seq("overlay"), + FunctionRegistry.builtinFunctionScope) + checkResult(metaData.getFunctions(null, null, "does-not-exist*"), Seq.empty, + FunctionRegistry.builtinFunctionScope) + checkResult(metaData.getFunctions(null, "default", "overlay"), Seq("overlay"), + FunctionRegistry.builtinFunctionScope) checkResult(metaData.getFunctions(null, "default", "shift*"), - Seq("shiftleft", "shiftright", "shiftrightunsigned"), "SYSTEM") - checkResult(metaData.getFunctions(null, "default", "upPer"), Seq("upper"), "SYSTEM") - + Seq("shiftleft", "shiftright", "shiftrightunsigned"), + FunctionRegistry.builtinFunctionScope) + checkResult(metaData.getFunctions(null, "default", "upPer"), Seq("upper"), + FunctionRegistry.builtinFunctionScope) } } From 366eed79e8a35a840863937016f4e84caf43f54e Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Thu, 16 Dec 2021 17:56:06 +0800 Subject: [PATCH 8/9] follow comment --- docs/sql-migration-guide.md | 2 +- .../scala/org/apache/spark/sql/internal/SQLConf.scala | 8 ++++---- .../hive/thriftserver/SparkGetFunctionsOperation.scala | 2 +- .../hive/thriftserver/SparkMetadataOperationSuite.scala | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index 57e4618f664ea..959013e8e0e0f 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -54,7 +54,7 @@ license: | - Since Spark 3.3, nulls are written as empty strings in CSV data source by default. In Spark 3.2 or earlier, nulls were written as empty strings as quoted empty strings, `""`. To restore the previous behavior, set `nullValue` to `""`. - - Since Spark 3.3, Spark Thrift Server will return databases' system functions metadata only once, and Spark will change function schema as `SYSTEM`. In Spark 3.2 or earlier, Spark Thrift Server will return system functions metadata for all databases. To restore the behavior before Spark 3.3, yo you can set `spark.sql.thriftserver.separateDisplaySystemFunctions` to `false`. + - Since Spark 3.3, Spark Thrift Server will return the available system function metadata for databases only once, and Spark will set the function schema as `SYSTEM`. In Spark 3.2 or earlier, Spark Thrift Server will return all system functions metadata for all databases which results in duplicates. To restore the behavior before Spark 3.3, you can set `spark.sql.thriftserver.uniqueSystemFunctions` to `false`. ## Upgrading from Spark SQL 3.1 to 3.2 diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index a676cbac6df45..075a6813d1621 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -1158,10 +1158,10 @@ object SQLConf { .intConf .createWithDefault(200) - val THRIFTSERVER_SEPARATE_DISPLAY_SYSTEM_FUNCTION = - buildConf("spark.sql.thriftserver.separateDisplaySystemFunctions") - .doc("When true, Spark Thrift Server will return databases' system functions metadata " + - "only once, and Spark will change function schema as `SYSTEM`.") + val THRIFTSERVER_UNIQUE_SYSTEM_FUNCTIONS = + buildConf("spark.sql.thriftserver.uniqueSystemFunctions") + .doc("When true, Spark Thrift Server will return the available system function metadata " + + "for databases only once, and Spark will set the function schema as 'SYSTEM'.") .version("3.3.0") .booleanConf .createWithDefault(true) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetFunctionsOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetFunctionsOperation.scala index 1e86299f2cb31..80345d0fc5e2e 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetFunctionsOperation.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetFunctionsOperation.scala @@ -84,7 +84,7 @@ private[hive] class SparkGetFunctionsOperation( try { val separateDisplaySystemFunctions = - sqlContext.conf.getConf(SQLConf.THRIFTSERVER_SEPARATE_DISPLAY_SYSTEM_FUNCTION) + sqlContext.conf.getConf(SQLConf.THRIFTSERVER_UNIQUE_SYSTEM_FUNCTIONS) var matchedBuiltInFunctions = if (separateDisplaySystemFunctions && functionPattern == "*" && matchingDbs.nonEmpty) { FunctionRegistry.functionSet ++ TableFunctionRegistry.functionSet diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala index 903b6fc8f95cd..fee82a8912aa5 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala @@ -229,7 +229,7 @@ class SparkMetadataOperationSuite extends HiveThriftServer2TestBase { } withJdbcStatement() { statement => - statement.execute(s"SET ${SQLConf.THRIFTSERVER_SEPARATE_DISPLAY_SYSTEM_FUNCTION.key}=false") + statement.execute(s"SET ${SQLConf.THRIFTSERVER_UNIQUE_SYSTEM_FUNCTIONS.key}=false") val metaData = statement.getConnection.getMetaData // Hive does not have an overlay function, we use overlay to test. checkResult(metaData.getFunctions(null, null, "overlay"), Seq("overlay")) @@ -241,7 +241,7 @@ class SparkMetadataOperationSuite extends HiveThriftServer2TestBase { Seq("shiftleft", "shiftright", "shiftrightunsigned")) checkResult(metaData.getFunctions(null, "default", "upPer"), Seq("upper")) - statement.execute(s"SET ${SQLConf.THRIFTSERVER_SEPARATE_DISPLAY_SYSTEM_FUNCTION.key}=true") + statement.execute(s"SET ${SQLConf.THRIFTSERVER_UNIQUE_SYSTEM_FUNCTIONS.key}=true") checkResult(metaData.getFunctions(null, null, "overlay"), Seq("overlay"), FunctionRegistry.builtinFunctionScope) checkResult(metaData.getFunctions(null, null, "overla*"), Seq("overlay"), From 392c5eae357e457d84b21433213bde5103908fe0 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Thu, 16 Dec 2021 18:46:36 +0800 Subject: [PATCH 9/9] Update SparkMetadataOperationSuite.scala --- .../SparkMetadataOperationSuite.scala | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala index fee82a8912aa5..62fc86e0d8838 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala @@ -706,4 +706,36 @@ class SparkMetadataOperationSuite extends HiveThriftServer2TestBase { } } } + + test("SPARK-37173: SparkGetFunctionOperation return builtin function only once") { + def checkFunctions( + rs: ResultSet, + functionName: String, + expectedFunctionSchemas: Seq[String], + repeats: Int): Unit = { + var nums = 0 + var functionSchemas = Seq.empty[String] + while (rs.next()) { + if (rs.getString("FUNCTION_NAME") == functionName) { + functionSchemas = functionSchemas :+ rs.getString("FUNCTION_SCHEM") + nums += 1 + } + } + assert(nums === repeats) + functionSchemas.zip(expectedFunctionSchemas).foreach { case (actual, expected) => + assert(actual === expected) + } + } + + withDatabase("test_spark_37173") { statement => + statement.execute(s"CREATE DATABASE IF NOT EXISTS test_spark_37173") + statement.execute(s"SET ${SQLConf.THRIFTSERVER_UNIQUE_SYSTEM_FUNCTIONS.key}=false") + val metaData = statement.getConnection.getMetaData + checkFunctions(metaData.getFunctions(null, "*", "*"), + "length", Seq("default", "test_spark_37173"), 2) + statement.execute(s"SET ${SQLConf.THRIFTSERVER_UNIQUE_SYSTEM_FUNCTIONS.key}=true") + checkFunctions(metaData.getFunctions(null, "*", "*"), + "length", Seq("SYSTEM"), 1) + } + } }