Skip to content
2 changes: 2 additions & 0 deletions docs/sql-migration-guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ license: |

- Since Spark 3.3, nulls are written as empty strings in CSV data source by default. In Spark 3.2 or earlier, nulls were written as empty strings as quoted empty strings, `""`. To restore the previous behavior, set `nullValue` to `""`.

- Since Spark 3.3, Spark Thrift Server will return databases' system functions metadata only once, and Spark will change function schema as `SYSTEM`. In Spark 3.2 or earlier, Spark Thrift Server will return system functions metadata for all databases. To restore the behavior before Spark 3.3, yo you can set `spark.sql.thriftserver.separateDisplaySystemFunctions` to `false`.
Comment thread
AngersZhuuuu marked this conversation as resolved.
Outdated

## Upgrading from Spark SQL 3.1 to 3.2

- Since Spark 3.2, ADD FILE/JAR/ARCHIVE commands require each path to be enclosed by `"` or `'` if the path contains whitespaces.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1158,6 +1158,14 @@ object SQLConf {
.intConf
.createWithDefault(200)

val THRIFTSERVER_SEPARATE_DISPLAY_SYSTEM_FUNCTION =
buildConf("spark.sql.thriftserver.separateDisplaySystemFunctions")
.doc("When true, Spark Thrift Server will return databases' system functions metadata " +
Comment thread
AngersZhuuuu marked this conversation as resolved.
Outdated
"only once, and Spark will change function schema as `SYSTEM`.")
Comment thread
dongjoon-hyun marked this conversation as resolved.
Outdated
.version("3.3.0")
.booleanConf
.createWithDefault(true)

// This is used to set the default data source
val DEFAULT_DATA_SOURCE_NAME = buildConf("spark.sql.sources.default")
.doc("The default data source to use in input/output.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ import org.apache.hive.service.cli.session.HiveSession

import org.apache.spark.internal.Logging
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.catalyst.FunctionIdentifier
import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, TableFunctionRegistry}
import org.apache.spark.sql.internal.SQLConf

/**
* Spark's own GetFunctionsOperation
Expand Down Expand Up @@ -80,8 +83,20 @@ private[hive] class SparkGetFunctionsOperation(
parentSession.getUsername)

try {
val separateDisplaySystemFunctions =
sqlContext.conf.getConf(SQLConf.THRIFTSERVER_SEPARATE_DISPLAY_SYSTEM_FUNCTION)
var matchedBuiltInFunctions = if (separateDisplaySystemFunctions && functionPattern == "*"
&& matchingDbs.nonEmpty) {
Comment thread
AngersZhuuuu marked this conversation as resolved.
Outdated
FunctionRegistry.functionSet ++ TableFunctionRegistry.functionSet
} else {
Set.empty[FunctionIdentifier]
}
matchingDbs.foreach { db =>
catalog.listFunctions(db, functionPattern).foreach {
case (funcIdentifier, "SYSTEM") if separateDisplaySystemFunctions =>
Comment thread
AngersZhuuuu marked this conversation as resolved.
Outdated
if (!matchedBuiltInFunctions.contains(funcIdentifier)) {
matchedBuiltInFunctions += funcIdentifier
}
case (funcIdentifier, _) =>
val info = catalog.lookupFunctionInfo(funcIdentifier)
val rowData = Array[AnyRef](
Expand All @@ -94,6 +109,17 @@ private[hive] class SparkGetFunctionsOperation(
rowSet.addRow(rowData);
}
}
matchedBuiltInFunctions.foreach { functionIdentifier =>
val info = catalog.lookupFunctionInfo(functionIdentifier)
val rowData = Array[AnyRef](
DEFAULT_HIVE_CATALOG, // FUNCTION_CAT
"SYSTEM", // FUNCTION_SCHEM
Comment thread
AngersZhuuuu marked this conversation as resolved.
Outdated
functionIdentifier.funcName, // FUNCTION_NAME
s"Usage: ${info.getUsage}\nExtended Usage:${info.getExtended}", // REMARKS
DatabaseMetaData.functionResultUnknown.asInstanceOf[AnyRef], // FUNCTION_TYPE
info.getClassName) // SPECIFIC_NAME
rowSet.addRow(rowData);
}
setState(OperationState.FINISHED)
} catch onError()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -210,11 +210,14 @@ class SparkMetadataOperationSuite extends HiveThriftServer2TestBase {
}

test("Spark's own GetFunctionsOperation(SparkGetFunctionsOperation)") {
def checkResult(rs: ResultSet, functionNames: Seq[String]): Unit = {
def checkResult(
rs: ResultSet,
functionNames: Seq[String],
functionSchema: String = "default"): Unit = {
functionNames.foreach { func =>
val exprInfo = FunctionRegistry.expressions(func)._1
assert(rs.next())
assert(rs.getString("FUNCTION_SCHEM") === "default")
assert(rs.getString("FUNCTION_SCHEM") === functionSchema)
assert(rs.getString("FUNCTION_NAME") === exprInfo.getName)
assert(rs.getString("REMARKS") ===
s"Usage: ${exprInfo.getUsage}\nExtended Usage:${exprInfo.getExtended}")
Expand All @@ -226,6 +229,7 @@ class SparkMetadataOperationSuite extends HiveThriftServer2TestBase {
}

withJdbcStatement() { statement =>
statement.execute(s"SET ${SQLConf.THRIFTSERVER_SEPARATE_DISPLAY_SYSTEM_FUNCTION.key}=false")
val metaData = statement.getConnection.getMetaData
// Hive does not have an overlay function, we use overlay to test.
checkResult(metaData.getFunctions(null, null, "overlay"), Seq("overlay"))
Expand All @@ -236,6 +240,17 @@ class SparkMetadataOperationSuite extends HiveThriftServer2TestBase {
checkResult(metaData.getFunctions(null, "default", "shift*"),
Seq("shiftleft", "shiftright", "shiftrightunsigned"))
checkResult(metaData.getFunctions(null, "default", "upPer"), Seq("upper"))

statement.execute(s"SET ${SQLConf.THRIFTSERVER_SEPARATE_DISPLAY_SYSTEM_FUNCTION.key}=true")

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we add a test with two schemas and run an unfiltered getFunctions call to show that previously we'd see duplicates, whereas now the functions are unique?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great, thanks!

checkResult(metaData.getFunctions(null, null, "overlay"), Seq("overlay"), "SYSTEM")
checkResult(metaData.getFunctions(null, null, "overla*"), Seq("overlay"), "SYSTEM")
checkResult(metaData.getFunctions(null, "", "overla*"), Seq("overlay"), "SYSTEM")
checkResult(metaData.getFunctions(null, null, "does-not-exist*"), Seq.empty, "SYSTEM")
checkResult(metaData.getFunctions(null, "default", "overlay"), Seq("overlay"), "SYSTEM")
checkResult(metaData.getFunctions(null, "default", "shift*"),
Seq("shiftleft", "shiftright", "shiftrightunsigned"), "SYSTEM")
checkResult(metaData.getFunctions(null, "default", "upPer"), Seq("upper"), "SYSTEM")

}
}

Expand Down