From d0cf0ecda47558abf6d43553e746c3a3aaf774d2 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Thu, 23 Apr 2020 00:18:16 +0800 Subject: [PATCH 1/6] [SPARK-31522][SQL] Hive metastore client initialization related configurations should be static --- .../scala/org/apache/spark/sql/hive/HiveUtils.scala | 13 ++++++------- .../spark/sql/hive/execution/SQLQuerySuite.scala | 11 +++++++++++ 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala index d0f79880e85e9..3add05de31289 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala @@ -61,7 +61,7 @@ private[spark] object HiveUtils extends Logging { /** The version of hive used internally by Spark SQL. */ val builtinHiveVersion: String = if (isHive23) hiveVersion else "1.2.1" - val HIVE_METASTORE_VERSION = buildConf("spark.sql.hive.metastore.version") + val HIVE_METASTORE_VERSION = buildStaticConf("spark.sql.hive.metastore.version") .doc("Version of the Hive metastore. Available options are " + "0.12.0 through 2.3.7 and " + "3.0.0 through 3.1.2.") @@ -72,13 +72,12 @@ private[spark] object HiveUtils extends Logging { // A fake config which is only here for backward compatibility reasons. This config has no effect // to Spark, just for reporting the builtin Hive version of Spark to existing applications that // already rely on this config. - val FAKE_HIVE_VERSION = buildConf("spark.sql.hive.version") + val FAKE_HIVE_VERSION = buildStaticConf("spark.sql.hive.version") .doc(s"deprecated, please use ${HIVE_METASTORE_VERSION.key} to get the Hive version in Spark.") .version("1.1.1") - .stringConf - .createWithDefault(builtinHiveVersion) + .fallbackConf(HIVE_METASTORE_VERSION) - val HIVE_METASTORE_JARS = buildConf("spark.sql.hive.metastore.jars") + val HIVE_METASTORE_JARS = buildStaticConf("spark.sql.hive.metastore.jars") .doc(s""" | Location of the jars that should be used to instantiate the HiveMetastoreClient. | This property can be one of three options: " @@ -137,7 +136,7 @@ private[spark] object HiveUtils extends Logging { .booleanConf .createWithDefault(true) - val HIVE_METASTORE_SHARED_PREFIXES = buildConf("spark.sql.hive.metastore.sharedPrefixes") + val HIVE_METASTORE_SHARED_PREFIXES = buildStaticConf("spark.sql.hive.metastore.sharedPrefixes") .doc("A comma separated list of class prefixes that should be loaded using the classloader " + "that is shared between Spark SQL and a specific version of Hive. An example of classes " + "that should be shared is JDBC drivers that are needed to talk to the metastore. Other " + @@ -151,7 +150,7 @@ private[spark] object HiveUtils extends Logging { private def jdbcPrefixes = Seq( "com.mysql.jdbc", "org.postgresql", "com.microsoft.sqlserver", "oracle.jdbc") - val HIVE_METASTORE_BARRIER_PREFIXES = buildConf("spark.sql.hive.metastore.barrierPrefixes") + val HIVE_METASTORE_BARRIER_PREFIXES = buildStaticConf("spark.sql.hive.metastore.barrierPrefixes") .doc("A comma separated list of class prefixes that should explicitly be reloaded for each " + "version of Hive that Spark SQL is communicating with. For example, Hive UDFs that are " + "declared in a prefix that typically would be shared (i.e. org.apache.spark.*).") diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index 138dcc586a46a..35f9850a54231 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -2493,6 +2493,17 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi } } } + + test("SPARK-31522: hive metastore related configurations should be static") { + Seq("spark.sql.hive.metastore.version ", + "spark.sql.hive.version", + "spark.sql.hive.metastore.jars", + "spark.sql.hive.metastore.sharedPrefixes", + "spark.sql.hive.metastore.barrierPrefixes").foreach { key => + val e = intercept[AnalysisException](sql(s"set $key=abc")) + assert(e.getMessage.contains("Cannot modify the value of a static config")) + } + } } class SQLQuerySuite extends SQLQuerySuiteBase with DisableAdaptiveExecutionSuite From 344f5588f430488f6e8d3b50eaacea46c31e01fe Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Thu, 23 Apr 2020 00:28:19 +0800 Subject: [PATCH 2/6] nit --- .../org/apache/spark/sql/hive/execution/SQLQuerySuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index 35f9850a54231..eb50c6dfafa1b 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -2495,7 +2495,7 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi } test("SPARK-31522: hive metastore related configurations should be static") { - Seq("spark.sql.hive.metastore.version ", + Seq("spark.sql.hive.metastore.version", "spark.sql.hive.version", "spark.sql.hive.metastore.jars", "spark.sql.hive.metastore.sharedPrefixes", From 5c15a98270c428b0d9e7bacf553162d650a887b3 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Thu, 23 Apr 2020 10:17:36 +0800 Subject: [PATCH 3/6] fix test --- .../org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala index 8944b93d9b697..efca8dd6178fa 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala @@ -23,7 +23,7 @@ import java.nio.charset.StandardCharsets.UTF_8 import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.internal.Logging import org.apache.spark.sql.{SparkSession, SQLContext} -import org.apache.spark.sql.hive.{HiveExternalCatalog, HiveUtils} +import org.apache.spark.sql.hive.HiveExternalCatalog import org.apache.spark.util.Utils /** A singleton object for the master program. The slaves should not access this. */ @@ -60,7 +60,6 @@ private[hive] object SparkSQLEnv extends Logging { metadataHive.setOut(new PrintStream(System.out, true, UTF_8.name())) metadataHive.setInfo(new PrintStream(System.err, true, UTF_8.name())) metadataHive.setError(new PrintStream(System.err, true, UTF_8.name())) - sparkSession.conf.set(HiveUtils.FAKE_HIVE_VERSION.key, HiveUtils.builtinHiveVersion) } } From 09b87ff5cfdfc2844f6b94063e1863be71ff5a78 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Thu, 23 Apr 2020 13:30:05 +0800 Subject: [PATCH 4/6] spark.sql.hive.version should be runtime for backward compatibility --- .../org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala | 3 ++- .../src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala index efca8dd6178fa..8944b93d9b697 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala @@ -23,7 +23,7 @@ import java.nio.charset.StandardCharsets.UTF_8 import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.internal.Logging import org.apache.spark.sql.{SparkSession, SQLContext} -import org.apache.spark.sql.hive.HiveExternalCatalog +import org.apache.spark.sql.hive.{HiveExternalCatalog, HiveUtils} import org.apache.spark.util.Utils /** A singleton object for the master program. The slaves should not access this. */ @@ -60,6 +60,7 @@ private[hive] object SparkSQLEnv extends Logging { metadataHive.setOut(new PrintStream(System.out, true, UTF_8.name())) metadataHive.setInfo(new PrintStream(System.err, true, UTF_8.name())) metadataHive.setError(new PrintStream(System.err, true, UTF_8.name())) + sparkSession.conf.set(HiveUtils.FAKE_HIVE_VERSION.key, HiveUtils.builtinHiveVersion) } } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala index 3add05de31289..e76e707786e9a 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala @@ -72,9 +72,10 @@ private[spark] object HiveUtils extends Logging { // A fake config which is only here for backward compatibility reasons. This config has no effect // to Spark, just for reporting the builtin Hive version of Spark to existing applications that // already rely on this config. - val FAKE_HIVE_VERSION = buildStaticConf("spark.sql.hive.version") + val FAKE_HIVE_VERSION = buildConf("spark.sql.hive.version") .doc(s"deprecated, please use ${HIVE_METASTORE_VERSION.key} to get the Hive version in Spark.") .version("1.1.1") + .internal() .fallbackConf(HIVE_METASTORE_VERSION) val HIVE_METASTORE_JARS = buildStaticConf("spark.sql.hive.metastore.jars") From c72ba701ed5685e89b90fb001dfaf32a4b6a9e4a Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Thu, 23 Apr 2020 13:38:29 +0800 Subject: [PATCH 5/6] fix test --- .../org/apache/spark/sql/hive/execution/SQLQuerySuite.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index eb50c6dfafa1b..e93f585b5c86a 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -2496,7 +2496,6 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi test("SPARK-31522: hive metastore related configurations should be static") { Seq("spark.sql.hive.metastore.version", - "spark.sql.hive.version", "spark.sql.hive.metastore.jars", "spark.sql.hive.metastore.sharedPrefixes", "spark.sql.hive.metastore.barrierPrefixes").foreach { key => From 7a26872b814730c6a4f235a939fada7287380bd4 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Thu, 23 Apr 2020 16:27:37 +0800 Subject: [PATCH 6/6] fix tests --- .../src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala index e76e707786e9a..04caf57efdc74 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala @@ -75,7 +75,6 @@ private[spark] object HiveUtils extends Logging { val FAKE_HIVE_VERSION = buildConf("spark.sql.hive.version") .doc(s"deprecated, please use ${HIVE_METASTORE_VERSION.key} to get the Hive version in Spark.") .version("1.1.1") - .internal() .fallbackConf(HIVE_METASTORE_VERSION) val HIVE_METASTORE_JARS = buildStaticConf("spark.sql.hive.metastore.jars")