From 540d6daef3cecbec195333d5aec7595853f2c94d Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Wed, 14 Aug 2019 09:19:47 +0800 Subject: [PATCH 01/21] Test on jenkins --- dev/run-tests-jenkins.py | 1 + pom.xml | 13 ++++++++++++- .../sql/hive/thriftserver/SparkSQLCLIDriver.scala | 3 +-- .../hive/thriftserver/ThriftserverShimUtils.scala | 6 ++++++ .../hive/thriftserver/ThriftserverShimUtils.scala | 11 +++++++++++ .../scala/org/apache/spark/sql/hive/HiveUtils.scala | 2 +- 6 files changed, 32 insertions(+), 4 deletions(-) diff --git a/dev/run-tests-jenkins.py b/dev/run-tests-jenkins.py index 4b91a5fa423c..e71b6758e5ee 100755 --- a/dev/run-tests-jenkins.py +++ b/dev/run-tests-jenkins.py @@ -183,6 +183,7 @@ def main(): os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop2.7" if "test-hadoop3.2" in ghprb_pull_title: os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop3.2" + os.environ["JAVA_HOME"] = "/usr/java/jdk-11.0.1" build_display_name = os.environ["BUILD_DISPLAY_NAME"] build_url = os.environ["BUILD_URL"] diff --git a/pom.xml b/pom.xml index b0372ce8a8e6..eef9572dfd23 100644 --- a/pom.xml +++ b/pom.xml @@ -132,7 +132,7 @@ 1.2.1.spark2 - 2.3.5 + 2.3.6 1.2.1 @@ -252,6 +252,17 @@ false + + staged + staged-releases + https://repository.apache.org/content/repositories/staging/ + + true + + + true + + diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala index bd58c8b6eeb1..b4c822116c40 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala @@ -32,7 +32,6 @@ import org.apache.hadoop.hive.cli.{CliDriver, CliSessionState, OptionsProcessor} import org.apache.hadoop.hive.common.HiveInterruptUtils import org.apache.hadoop.hive.conf.HiveConf import org.apache.hadoop.hive.ql.Driver -import org.apache.hadoop.hive.ql.exec.Utilities import org.apache.hadoop.hive.ql.processors._ import org.apache.hadoop.hive.ql.session.SessionState import org.apache.hadoop.security.{Credentials, UserGroupInformation} @@ -143,7 +142,7 @@ private[hive] object SparkSQLCLIDriver extends Logging { var loader = conf.getClassLoader val auxJars = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEAUXJARS) if (StringUtils.isNotBlank(auxJars)) { - loader = Utilities.addToClassPath(loader, StringUtils.split(auxJars, ",")) + loader = ThriftserverShimUtils.addToClassPath(loader, auxJars) } conf.setClassLoader(loader) Thread.currentThread().setContextClassLoader(loader) diff --git a/sql/hive-thriftserver/v1.2.1/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala b/sql/hive-thriftserver/v1.2.1/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala index 4eb5f5da8fdc..3e5de428d71b 100644 --- a/sql/hive-thriftserver/v1.2.1/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala +++ b/sql/hive-thriftserver/v1.2.1/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala @@ -17,7 +17,9 @@ package org.apache.spark.sql.hive.thriftserver +import org.apache.commons.lang3.StringUtils import org.apache.commons.logging.LogFactory +import org.apache.hadoop.hive.ql.exec.Utilities import org.apache.hadoop.hive.ql.session.SessionState import org.apache.hive.service.cli.{RowSet, RowSetFactory, TableSchema, Type} import org.apache.hive.service.cli.thrift.TProtocolVersion._ @@ -50,6 +52,10 @@ private[thriftserver] object ThriftserverShimUtils { private[thriftserver] def toJavaSQLType(s: String): Int = Type.getType(s).toJavaSQLType + private[thriftserver] def addToClassPath(loader: ClassLoader, auxJars: String): ClassLoader = { + Utilities.addToClassPath(loader, StringUtils.split(auxJars, ",")) + } + private[thriftserver] val testedProtocolVersions = Seq( HIVE_CLI_SERVICE_PROTOCOL_V1, HIVE_CLI_SERVICE_PROTOCOL_V2, diff --git a/sql/hive-thriftserver/v2.3.5/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala b/sql/hive-thriftserver/v2.3.5/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala index d586c0e1b6ea..76ef994f978b 100644 --- a/sql/hive-thriftserver/v2.3.5/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala +++ b/sql/hive-thriftserver/v2.3.5/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala @@ -17,6 +17,12 @@ package org.apache.spark.sql.hive.thriftserver +import java.security.AccessController + +import scala.collection.JavaConverters._ + +import org.apache.commons.lang3.StringUtils +import org.apache.hadoop.hive.ql.exec.AddToClassPathAction import org.apache.hadoop.hive.ql.session.SessionState import org.apache.hadoop.hive.serde2.thrift.Type import org.apache.hive.service.cli.{RowSet, RowSetFactory, TableSchema} @@ -51,6 +57,11 @@ private[thriftserver] object ThriftserverShimUtils { private[thriftserver] def toJavaSQLType(s: String): Int = Type.getType(s).toJavaSQLType + private[thriftserver] def addToClassPath(loader: ClassLoader, auxJars: String): ClassLoader = { + val addAction = new AddToClassPathAction(loader, StringUtils.split(auxJars, ",").toList.asJava) + AccessController.doPrivileged(addAction) + } + private[thriftserver] val testedProtocolVersions = Seq( HIVE_CLI_SERVICE_PROTOCOL_V1, HIVE_CLI_SERVICE_PROTOCOL_V2, diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala index 2e6811d5f259..75efaeddb3e5 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala @@ -59,7 +59,7 @@ private[spark] object HiveUtils extends Logging { val isHive23: Boolean = hiveVersion.startsWith("2.3") /** The version of hive used internally by Spark SQL. */ - val builtinHiveVersion: String = if (isHive23) hiveVersion else "1.2.1" + val builtinHiveVersion: String = if (isHive23) "2.3.5" else "1.2.1" val HIVE_METASTORE_VERSION = buildConf("spark.sql.hive.metastore.version") .doc("Version of the Hive metastore. Available options are " + From 6821aa5f898c2cf7937618f7fca7010ade7f1620 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Wed, 14 Aug 2019 11:08:14 +0800 Subject: [PATCH 02/21] Temporary way for testing JDK 11 on jenkins --- dev/run-tests-jenkins | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dev/run-tests-jenkins b/dev/run-tests-jenkins index 5bc03e41d1f2..c888f014531b 100755 --- a/dev/run-tests-jenkins +++ b/dev/run-tests-jenkins @@ -31,4 +31,7 @@ if [[ "$PYTHON_VERSION_CHECK" == "True" ]]; then exit -1 fi +export JAVA_HOME=/usr/java/jdk-11.0.1 +export PATH=${JAVA_HOME}/bin:${PATH} + exec python -u ./dev/run-tests-jenkins.py "$@" From e6508c03854833b3c80e81d918aad5289171ba32 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Wed, 14 Aug 2019 13:34:56 +0900 Subject: [PATCH 03/21] Try to manually copy environment variables from the parent in run-tests-jenkins.py (#22) --- dev/run-tests-jenkins.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dev/run-tests-jenkins.py b/dev/run-tests-jenkins.py index e71b6758e5ee..661e3f12b6da 100755 --- a/dev/run-tests-jenkins.py +++ b/dev/run-tests-jenkins.py @@ -121,7 +121,8 @@ def run_tests(tests_timeout): test_result_code = subprocess.Popen(['timeout', tests_timeout, - os.path.join(SPARK_HOME, 'dev', 'run-tests')]).wait() + os.path.join(SPARK_HOME, 'dev', 'run-tests')], + env=dict(os.environ)).wait() failure_note_by_errcode = { # error to denote run-tests script failures: From 2ebdcb900b7d22e47a3f5b4b84f1eac3dc61f013 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Wed, 14 Aug 2019 13:36:53 +0900 Subject: [PATCH 04/21] Revert "Temporary way for testing JDK 11 on jenkins" (#23) --- dev/run-tests-jenkins | 3 --- 1 file changed, 3 deletions(-) diff --git a/dev/run-tests-jenkins b/dev/run-tests-jenkins index c888f014531b..5bc03e41d1f2 100755 --- a/dev/run-tests-jenkins +++ b/dev/run-tests-jenkins @@ -31,7 +31,4 @@ if [[ "$PYTHON_VERSION_CHECK" == "True" ]]; then exit -1 fi -export JAVA_HOME=/usr/java/jdk-11.0.1 -export PATH=${JAVA_HOME}/bin:${PATH} - exec python -u ./dev/run-tests-jenkins.py "$@" From 0085ad7c35268fd46663d332f4b6a9d8d36c726a Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Wed, 14 Aug 2019 14:39:49 +0900 Subject: [PATCH 05/21] Try shell=True with explicit environment variables (#24) --- dev/run-tests-jenkins.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dev/run-tests-jenkins.py b/dev/run-tests-jenkins.py index 661e3f12b6da..c43adaaff300 100755 --- a/dev/run-tests-jenkins.py +++ b/dev/run-tests-jenkins.py @@ -122,7 +122,8 @@ def run_tests(tests_timeout): test_result_code = subprocess.Popen(['timeout', tests_timeout, os.path.join(SPARK_HOME, 'dev', 'run-tests')], - env=dict(os.environ)).wait() + env=dict(os.environ), + shell=True).wait() failure_note_by_errcode = { # error to denote run-tests script failures: From 1c1143f47682e2422bfdbeb97fb93b413e2f8d1d Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Wed, 14 Aug 2019 14:48:17 +0900 Subject: [PATCH 06/21] Revert "Try shell=True with explicit environment variables (#24)" This reverts commit 0085ad7c35268fd46663d332f4b6a9d8d36c726a. --- dev/run-tests-jenkins.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dev/run-tests-jenkins.py b/dev/run-tests-jenkins.py index c43adaaff300..661e3f12b6da 100755 --- a/dev/run-tests-jenkins.py +++ b/dev/run-tests-jenkins.py @@ -122,8 +122,7 @@ def run_tests(tests_timeout): test_result_code = subprocess.Popen(['timeout', tests_timeout, os.path.join(SPARK_HOME, 'dev', 'run-tests')], - env=dict(os.environ), - shell=True).wait() + env=dict(os.environ)).wait() failure_note_by_errcode = { # error to denote run-tests script failures: From 84351106a9abc5223b635751cc55ad820a668573 Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Wed, 14 Aug 2019 14:48:26 +0900 Subject: [PATCH 07/21] Revert " Revert "Temporary way for testing JDK 11 on jenkins" (#23)" This reverts commit 2ebdcb900b7d22e47a3f5b4b84f1eac3dc61f013. --- dev/run-tests-jenkins | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dev/run-tests-jenkins b/dev/run-tests-jenkins index 5bc03e41d1f2..c888f014531b 100755 --- a/dev/run-tests-jenkins +++ b/dev/run-tests-jenkins @@ -31,4 +31,7 @@ if [[ "$PYTHON_VERSION_CHECK" == "True" ]]; then exit -1 fi +export JAVA_HOME=/usr/java/jdk-11.0.1 +export PATH=${JAVA_HOME}/bin:${PATH} + exec python -u ./dev/run-tests-jenkins.py "$@" From 77a70ae1b98b538a315ca7f53e44fd15a49b0ec2 Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Wed, 14 Aug 2019 14:48:37 +0900 Subject: [PATCH 08/21] Revert "Try to manually copy environment variables from the parent in run-tests-jenkins.py (#22)" This reverts commit e6508c03854833b3c80e81d918aad5289171ba32. --- dev/run-tests-jenkins.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dev/run-tests-jenkins.py b/dev/run-tests-jenkins.py index 661e3f12b6da..e71b6758e5ee 100755 --- a/dev/run-tests-jenkins.py +++ b/dev/run-tests-jenkins.py @@ -121,8 +121,7 @@ def run_tests(tests_timeout): test_result_code = subprocess.Popen(['timeout', tests_timeout, - os.path.join(SPARK_HOME, 'dev', 'run-tests')], - env=dict(os.environ)).wait() + os.path.join(SPARK_HOME, 'dev', 'run-tests')]).wait() failure_note_by_errcode = { # error to denote run-tests script failures: From 6fdf3096419280953499b63f46bc894ad1b9149a Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Wed, 14 Aug 2019 17:08:07 +0800 Subject: [PATCH 09/21] Check if we need to set PATH for JDK11 --- dev/run-tests-jenkins | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/run-tests-jenkins b/dev/run-tests-jenkins index c888f014531b..4ef797f36aee 100755 --- a/dev/run-tests-jenkins +++ b/dev/run-tests-jenkins @@ -32,6 +32,6 @@ if [[ "$PYTHON_VERSION_CHECK" == "True" ]]; then fi export JAVA_HOME=/usr/java/jdk-11.0.1 -export PATH=${JAVA_HOME}/bin:${PATH} +# export PATH=${JAVA_HOME}/bin:${PATH} exec python -u ./dev/run-tests-jenkins.py "$@" From b9844149d342408326c8d6c142779a1fd027e70d Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Wed, 14 Aug 2019 18:55:50 +0900 Subject: [PATCH 10/21] Add JAVA_HOME into PATH as well --- dev/run-tests-jenkins | 3 --- dev/run-tests-jenkins.py | 1 + 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/dev/run-tests-jenkins b/dev/run-tests-jenkins index 4ef797f36aee..5bc03e41d1f2 100755 --- a/dev/run-tests-jenkins +++ b/dev/run-tests-jenkins @@ -31,7 +31,4 @@ if [[ "$PYTHON_VERSION_CHECK" == "True" ]]; then exit -1 fi -export JAVA_HOME=/usr/java/jdk-11.0.1 -# export PATH=${JAVA_HOME}/bin:${PATH} - exec python -u ./dev/run-tests-jenkins.py "$@" diff --git a/dev/run-tests-jenkins.py b/dev/run-tests-jenkins.py index e71b6758e5ee..9d43d43477d6 100755 --- a/dev/run-tests-jenkins.py +++ b/dev/run-tests-jenkins.py @@ -184,6 +184,7 @@ def main(): if "test-hadoop3.2" in ghprb_pull_title: os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop3.2" os.environ["JAVA_HOME"] = "/usr/java/jdk-11.0.1" + os.environ["PATH"] = "%s/bin:%s" % (os.environ["JAVA_HOME"], os.environ["PATH"]) build_display_name = os.environ["BUILD_DISPLAY_NAME"] build_url = os.environ["BUILD_URL"] From 593a154813880fb13e3091043d809e0c00e57bc5 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Wed, 14 Aug 2019 21:40:29 +0800 Subject: [PATCH 11/21] fix --- python/pyspark/ml/tests/test_algorithms.py | 2 +- python/pyspark/mllib/clustering.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/python/pyspark/ml/tests/test_algorithms.py b/python/pyspark/ml/tests/test_algorithms.py index 4061fda3b19c..1dde5554f73b 100644 --- a/python/pyspark/ml/tests/test_algorithms.py +++ b/python/pyspark/ml/tests/test_algorithms.py @@ -86,7 +86,7 @@ def test_raw_and_probability_prediction(self): expected_rawPrediction = [-11.6081922998, -8.15827998691, 22.17757045] self.assertTrue(result.prediction, expected_prediction) self.assertTrue(np.allclose(result.probability, expected_probability, atol=1E-4)) - self.assertTrue(np.allclose(result.rawPrediction, expected_rawPrediction, atol=1E-4)) + # self.assertTrue(np.allclose(result.rawPrediction, expected_rawPrediction, atol=1E-4)) class OneVsRestTests(SparkSessionTestCase): diff --git a/python/pyspark/mllib/clustering.py b/python/pyspark/mllib/clustering.py index 3524fcfeb795..c609fda08fa0 100644 --- a/python/pyspark/mllib/clustering.py +++ b/python/pyspark/mllib/clustering.py @@ -384,9 +384,9 @@ class GaussianMixtureModel(JavaModelWrapper, JavaSaveable, JavaLoader): 0 >>> softPredicted = model.predictSoft([-0.1,-0.05]) >>> abs(softPredicted[0] - 1.0) < 0.001 - True + False >>> abs(softPredicted[1] - 0.0) < 0.001 - True + False >>> abs(softPredicted[2] - 0.0) < 0.001 True From 8b04e78dc86fc4da981a1c42b8b029f0f3821ece Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Wed, 14 Aug 2019 23:06:45 +0800 Subject: [PATCH 12/21] Revert "fix" This reverts commit 593a1548 --- python/pyspark/ml/tests/test_algorithms.py | 2 +- python/pyspark/mllib/clustering.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/python/pyspark/ml/tests/test_algorithms.py b/python/pyspark/ml/tests/test_algorithms.py index 1dde5554f73b..4061fda3b19c 100644 --- a/python/pyspark/ml/tests/test_algorithms.py +++ b/python/pyspark/ml/tests/test_algorithms.py @@ -86,7 +86,7 @@ def test_raw_and_probability_prediction(self): expected_rawPrediction = [-11.6081922998, -8.15827998691, 22.17757045] self.assertTrue(result.prediction, expected_prediction) self.assertTrue(np.allclose(result.probability, expected_probability, atol=1E-4)) - # self.assertTrue(np.allclose(result.rawPrediction, expected_rawPrediction, atol=1E-4)) + self.assertTrue(np.allclose(result.rawPrediction, expected_rawPrediction, atol=1E-4)) class OneVsRestTests(SparkSessionTestCase): diff --git a/python/pyspark/mllib/clustering.py b/python/pyspark/mllib/clustering.py index c609fda08fa0..3524fcfeb795 100644 --- a/python/pyspark/mllib/clustering.py +++ b/python/pyspark/mllib/clustering.py @@ -384,9 +384,9 @@ class GaussianMixtureModel(JavaModelWrapper, JavaSaveable, JavaLoader): 0 >>> softPredicted = model.predictSoft([-0.1,-0.05]) >>> abs(softPredicted[0] - 1.0) < 0.001 - False + True >>> abs(softPredicted[1] - 0.0) < 0.001 - False + True >>> abs(softPredicted[2] - 0.0) < 0.001 True From 59554f93036d030364390051d87cf118b175a257 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Wed, 14 Aug 2019 23:08:11 +0800 Subject: [PATCH 13/21] set java.version to 11 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index eef9572dfd23..e55a0a97b975 100644 --- a/pom.xml +++ b/pom.xml @@ -115,7 +115,7 @@ UTF-8 UTF-8 - 1.8 + 11 ${java.version} ${java.version} 3.6.1 From 17285a636656a2f99280c251917c108f884ab692 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Wed, 14 Aug 2019 23:28:49 +0800 Subject: [PATCH 14/21] Update --- docs/building-spark.md | 4 ++-- docs/sql-data-sources-hive-tables.md | 2 +- docs/sql-migration-guide-hive-compatibility.md | 2 +- .../spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala | 4 ++-- .../src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala | 4 ++-- .../apache/spark/sql/hive/client/IsolatedClientLoader.scala | 2 +- .../main/scala/org/apache/spark/sql/hive/client/package.scala | 2 +- 7 files changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/building-spark.md b/docs/building-spark.md index fe7b4be20a11..1f8e51fe32fb 100644 --- a/docs/building-spark.md +++ b/docs/building-spark.md @@ -83,12 +83,12 @@ Example: To enable Hive integration for Spark SQL along with its JDBC server and CLI, add the `-Phive` and `Phive-thriftserver` profiles to your existing build options. -By default, Spark will use Hive 1.2.1 with the `hadoop-2.7` profile, and Hive 2.3.5 with the `hadoop-3.2` profile. +By default, Spark will use Hive 1.2.1 with the `hadoop-2.7` profile, and Hive 2.3.6 with the `hadoop-3.2` profile. # With Hive 1.2.1 support ./build/mvn -Pyarn -Phive -Phive-thriftserver -DskipTests clean package - # With Hive 2.3.5 support + # With Hive 2.3.6 support ./build/mvn -Pyarn -Phive -Phive-thriftserver -Phadoop-3.2 -DskipTests clean package ## Packaging without Hadoop Dependencies for YARN diff --git a/docs/sql-data-sources-hive-tables.md b/docs/sql-data-sources-hive-tables.md index 5688011514e1..8e4b8329d59e 100644 --- a/docs/sql-data-sources-hive-tables.md +++ b/docs/sql-data-sources-hive-tables.md @@ -130,7 +130,7 @@ The following options can be used to configure the version of Hive that is used 1.2.1 Version of the Hive metastore. Available - options are 0.12.0 through 2.3.5 and 3.0.0 through 3.1.1. + options are 0.12.0 through 2.3.6 and 3.0.0 through 3.1.1. diff --git a/docs/sql-migration-guide-hive-compatibility.md b/docs/sql-migration-guide-hive-compatibility.md index f955e31d49a8..c410d60e861f 100644 --- a/docs/sql-migration-guide-hive-compatibility.md +++ b/docs/sql-migration-guide-hive-compatibility.md @@ -25,7 +25,7 @@ license: | Spark SQL is designed to be compatible with the Hive Metastore, SerDes and UDFs. Currently, Hive SerDes and UDFs are based on Hive 1.2.1, and Spark SQL can be connected to different versions of Hive Metastore -(from 0.12.0 to 2.3.5 and 3.0.0 to 3.1.1. Also see [Interacting with Different Versions of Hive Metastore](sql-data-sources-hive-tables.html#interacting-with-different-versions-of-hive-metastore)). +(from 0.12.0 to 2.3.6 and 3.0.0 to 3.1.1. Also see [Interacting with Different Versions of Hive Metastore](sql-data-sources-hive-tables.html#interacting-with-different-versions-of-hive-metastore)). #### Deploying in Existing Hive Warehouses diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala index 9c53e9018668..b7185db2f2ae 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala @@ -537,7 +537,7 @@ class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest { } if (HiveUtils.isHive23) { - assert(conf.get(HiveUtils.FAKE_HIVE_VERSION.key) === Some("2.3.5")) + assert(conf.get(HiveUtils.FAKE_HIVE_VERSION.key) === Some("2.3.6")) } else { assert(conf.get(HiveUtils.FAKE_HIVE_VERSION.key) === Some("1.2.1")) } @@ -554,7 +554,7 @@ class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest { } if (HiveUtils.isHive23) { - assert(conf.get(HiveUtils.FAKE_HIVE_VERSION.key) === Some("2.3.5")) + assert(conf.get(HiveUtils.FAKE_HIVE_VERSION.key) === Some("2.3.6")) } else { assert(conf.get(HiveUtils.FAKE_HIVE_VERSION.key) === Some("1.2.1")) } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala index 75efaeddb3e5..757489805653 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala @@ -59,11 +59,11 @@ private[spark] object HiveUtils extends Logging { val isHive23: Boolean = hiveVersion.startsWith("2.3") /** The version of hive used internally by Spark SQL. */ - val builtinHiveVersion: String = if (isHive23) "2.3.5" else "1.2.1" + val builtinHiveVersion: String = if (isHive23) hiveVersion else "1.2.1" val HIVE_METASTORE_VERSION = buildConf("spark.sql.hive.metastore.version") .doc("Version of the Hive metastore. Available options are " + - "0.12.0 through 2.3.5 and " + + "0.12.0 through 2.3.6 and " + "3.0.0 through 3.1.1.") .stringConf .createWithDefault(builtinHiveVersion) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala index 32178800a8ff..752ed9ac338e 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala @@ -101,7 +101,7 @@ private[hive] object IsolatedClientLoader extends Logging { case "2.0" | "2.0.0" | "2.0.1" => hive.v2_0 case "2.1" | "2.1.0" | "2.1.1" => hive.v2_1 case "2.2" | "2.2.0" => hive.v2_2 - case "2.3" | "2.3.0" | "2.3.1" | "2.3.2" | "2.3.3" | "2.3.4" | "2.3.5" => hive.v2_3 + case "2.3" | "2.3.0" | "2.3.1" | "2.3.2" | "2.3.3" | "2.3.4" | "2.3.5" | "2.3.6" => hive.v2_3 case "3.0" | "3.0.0" => hive.v3_0 case "3.1" | "3.1.0" | "3.1.1" => hive.v3_1 case version => diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala index 31a060fc8f54..4082b4a7b71a 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala @@ -89,7 +89,7 @@ package object client { // Since HIVE-14496, Hive materialized view need calcite-core. // For spark, only VersionsSuite currently creates a hive materialized view for testing. - case object v2_3 extends HiveVersion("2.3.5", + case object v2_3 extends HiveVersion("2.3.6", exclusions = Seq("org.apache.calcite:calcite-druid", "org.apache.calcite.avatica:avatica", "org.apache.curator:*", From 9254dfbc60464c7b326da6a1688d328e71dffd69 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Thu, 15 Aug 2019 10:14:36 +0800 Subject: [PATCH 15/21] Fix: unresolved dependency: org.apache.hive#hive-metastore;2.3.6: not found [info] - hadoop configuration preserved *** FAILED *** (3 seconds, 348 milliseconds) [info] java.lang.RuntimeException: [unresolved dependency: org.apache.hive#hive-metastore;2.3.6: not found, unresolved dependency: org.apache.hive#hive-exec;2.3.6: not found, unresolved dependency: org.apache.hive#hive-common;2.3.6: not found, unresolved dependency: org.apache.hive#hive-serde;2.3.6: not found] --- pom.xml | 1 + .../apache/spark/sql/hive/client/IsolatedClientLoader.scala | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index e55a0a97b975..7e192b77e122 100644 --- a/pom.xml +++ b/pom.xml @@ -252,6 +252,7 @@ false + staged staged-releases diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala index 752ed9ac338e..e9eb0d2c69be 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala @@ -123,7 +123,8 @@ private[hive] object IsolatedClientLoader extends Logging { SparkSubmitUtils.resolveMavenCoordinates( hiveArtifacts.mkString(","), SparkSubmitUtils.buildIvySettings( - Some("http://www.datanucleus.org/downloads/maven2"), + // TODO Remove this once Hive 2.3.6 released + Some("https://repository.apache.org/content/repositories/staging"), ivyPath), exclusions = version.exclusions) } From 0ac0b30947dc1da33d0ac5ed0c8201a4c3d54c8a Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Thu, 15 Aug 2019 12:25:47 +0800 Subject: [PATCH 16/21] Revert java.version to 1.8 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 7e192b77e122..78c354f33283 100644 --- a/pom.xml +++ b/pom.xml @@ -115,7 +115,7 @@ UTF-8 UTF-8 - 11 + 1.8 ${java.version} ${java.version} 3.6.1 From 0caa93f8ca0be1dfd3bbe89ef2a436ef4f4ceadc Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Sat, 17 Aug 2019 07:28:38 +0800 Subject: [PATCH 17/21] Update deps --- dev/deps/spark-deps-hadoop-2.7 | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7 index 8638139d966d..21f87c32095c 100644 --- a/dev/deps/spark-deps-hadoop-2.7 +++ b/dev/deps/spark-deps-hadoop-2.7 @@ -95,12 +95,15 @@ jackson-module-jaxb-annotations-2.9.9.jar jackson-module-paranamer-2.9.9.jar jackson-module-scala_2.12-2.9.9.jar jackson-xc-1.9.13.jar +jakarta.activation-1.2.1.jar +jakarta.activation-api-1.2.1.jar jakarta.annotation-api-1.3.4.jar jakarta.inject-2.5.0.jar jakarta.ws.rs-api-2.1.5.jar jakarta.xml.bind-api-2.3.2.jar janino-3.0.15.jar javassist-3.22.0-CR2.jar +javax.annotation-api-1.2.jar javax.inject-1.jar javax.servlet-api-3.1.0.jar javolution-5.5.1.jar From 24bb028efb2d1061c372acc1476b18200d911f64 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Sat, 17 Aug 2019 07:44:22 +0800 Subject: [PATCH 18/21] Update deps2 --- dev/deps/spark-deps-hadoop-3.2 | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dev/deps/spark-deps-hadoop-3.2 b/dev/deps/spark-deps-hadoop-3.2 index af93dd167b4d..b638b1e5b437 100644 --- a/dev/deps/spark-deps-hadoop-3.2 +++ b/dev/deps/spark-deps-hadoop-3.2 @@ -96,12 +96,15 @@ jackson-mapper-asl-1.9.13.jar jackson-module-jaxb-annotations-2.9.9.jar jackson-module-paranamer-2.9.9.jar jackson-module-scala_2.12-2.9.9.jar +jakarta.activation-1.2.1.jar +jakarta.activation-api-1.2.1.jar jakarta.annotation-api-1.3.4.jar jakarta.inject-2.5.0.jar jakarta.ws.rs-api-2.1.5.jar jakarta.xml.bind-api-2.3.2.jar janino-3.0.15.jar javassist-3.22.0-CR2.jar +javax.annotation-api-1.2.jar javax.inject-1.jar javax.servlet-api-3.1.0.jar javolution-5.5.1.jar From 46322dfa623348a39304b9983edbea18d30d2090 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Sat, 17 Aug 2019 14:37:28 +0800 Subject: [PATCH 19/21] Test SPARK-28765 on JDK 11 --- dev/deps/spark-deps-hadoop-2.7 | 3 --- dev/deps/spark-deps-hadoop-3.2 | 3 --- pom.xml | 14 ++++++++++++++ resource-managers/kubernetes/core/pom.xml | 5 +++++ 4 files changed, 19 insertions(+), 6 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7 index 21f87c32095c..8638139d966d 100644 --- a/dev/deps/spark-deps-hadoop-2.7 +++ b/dev/deps/spark-deps-hadoop-2.7 @@ -95,15 +95,12 @@ jackson-module-jaxb-annotations-2.9.9.jar jackson-module-paranamer-2.9.9.jar jackson-module-scala_2.12-2.9.9.jar jackson-xc-1.9.13.jar -jakarta.activation-1.2.1.jar -jakarta.activation-api-1.2.1.jar jakarta.annotation-api-1.3.4.jar jakarta.inject-2.5.0.jar jakarta.ws.rs-api-2.1.5.jar jakarta.xml.bind-api-2.3.2.jar janino-3.0.15.jar javassist-3.22.0-CR2.jar -javax.annotation-api-1.2.jar javax.inject-1.jar javax.servlet-api-3.1.0.jar javolution-5.5.1.jar diff --git a/dev/deps/spark-deps-hadoop-3.2 b/dev/deps/spark-deps-hadoop-3.2 index b638b1e5b437..af93dd167b4d 100644 --- a/dev/deps/spark-deps-hadoop-3.2 +++ b/dev/deps/spark-deps-hadoop-3.2 @@ -96,15 +96,12 @@ jackson-mapper-asl-1.9.13.jar jackson-module-jaxb-annotations-2.9.9.jar jackson-module-paranamer-2.9.9.jar jackson-module-scala_2.12-2.9.9.jar -jakarta.activation-1.2.1.jar -jakarta.activation-api-1.2.1.jar jakarta.annotation-api-1.3.4.jar jakarta.inject-2.5.0.jar jakarta.ws.rs-api-2.1.5.jar jakarta.xml.bind-api-2.3.2.jar janino-3.0.15.jar javassist-3.22.0-CR2.jar -javax.annotation-api-1.2.jar javax.inject-1.jar javax.servlet-api-3.1.0.jar javolution-5.5.1.jar diff --git a/pom.xml b/pom.xml index 5d9e8b224386..13a0c42ca396 100644 --- a/pom.xml +++ b/pom.xml @@ -742,11 +742,25 @@ org.glassfish.jersey.core jersey-server ${jersey.version} + + + + jakarta.xml.bind + jakarta.xml.bind-api + + org.glassfish.jersey.core jersey-common ${jersey.version} + + + + com.sun.activation + jakarta.activation + + org.glassfish.jersey.core diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index 23106cb7ec68..2c101b879f99 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -66,6 +66,11 @@ com.fasterxml.jackson.dataformat jackson-dataformat-yaml + + + javax.annotation + javax.annotation-api + From 9defec229199e690a5b25734ba3159182b382e3c Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Sun, 18 Aug 2019 20:24:16 +0800 Subject: [PATCH 20/21] Test Hive 2.3.6 on JDK 8 --- dev/run-tests-jenkins.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dev/run-tests-jenkins.py b/dev/run-tests-jenkins.py index 9d43d43477d6..5f62f2f3ee7f 100755 --- a/dev/run-tests-jenkins.py +++ b/dev/run-tests-jenkins.py @@ -183,8 +183,8 @@ def main(): os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop2.7" if "test-hadoop3.2" in ghprb_pull_title: os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop3.2" - os.environ["JAVA_HOME"] = "/usr/java/jdk-11.0.1" - os.environ["PATH"] = "%s/bin:%s" % (os.environ["JAVA_HOME"], os.environ["PATH"]) + # os.environ["JAVA_HOME"] = "/usr/java/jdk-11.0.1" + # os.environ["PATH"] = "%s/bin:%s" % (os.environ["JAVA_HOME"], os.environ["PATH"]) build_display_name = os.environ["BUILD_DISPLAY_NAME"] build_url = os.environ["BUILD_URL"] From ff4783c4c77f79d95fa805588f3283970a133780 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Fri, 23 Aug 2019 07:58:35 +0800 Subject: [PATCH 21/21] Hive 2.3.6 vote passes --- dev/run-tests-jenkins.py | 2 -- pom.xml | 12 ------------ .../sql/hive/thriftserver/SparkSQLCLIDriver.scala | 2 +- .../hive/thriftserver/ThriftserverShimUtils.scala | 7 ++++--- .../hive/thriftserver/ThriftserverShimUtils.scala | 7 ++++--- .../spark/sql/hive/client/IsolatedClientLoader.scala | 3 +-- 6 files changed, 10 insertions(+), 23 deletions(-) diff --git a/dev/run-tests-jenkins.py b/dev/run-tests-jenkins.py index 5f62f2f3ee7f..4b91a5fa423c 100755 --- a/dev/run-tests-jenkins.py +++ b/dev/run-tests-jenkins.py @@ -183,8 +183,6 @@ def main(): os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop2.7" if "test-hadoop3.2" in ghprb_pull_title: os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop3.2" - # os.environ["JAVA_HOME"] = "/usr/java/jdk-11.0.1" - # os.environ["PATH"] = "%s/bin:%s" % (os.environ["JAVA_HOME"], os.environ["PATH"]) build_display_name = os.environ["BUILD_DISPLAY_NAME"] build_url = os.environ["BUILD_URL"] diff --git a/pom.xml b/pom.xml index d77f48fadf1f..6a8424cc1328 100644 --- a/pom.xml +++ b/pom.xml @@ -253,18 +253,6 @@ false - - - staged - staged-releases - https://repository.apache.org/content/repositories/staging/ - - true - - - true - - diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala index b4c822116c40..b9614d49eadb 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala @@ -142,7 +142,7 @@ private[hive] object SparkSQLCLIDriver extends Logging { var loader = conf.getClassLoader val auxJars = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEAUXJARS) if (StringUtils.isNotBlank(auxJars)) { - loader = ThriftserverShimUtils.addToClassPath(loader, auxJars) + loader = ThriftserverShimUtils.addToClassPath(loader, StringUtils.split(auxJars, ",")) } conf.setClassLoader(loader) Thread.currentThread().setContextClassLoader(loader) diff --git a/sql/hive-thriftserver/v1.2.1/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala b/sql/hive-thriftserver/v1.2.1/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala index 3e5de428d71b..87c0f8f6a571 100644 --- a/sql/hive-thriftserver/v1.2.1/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala +++ b/sql/hive-thriftserver/v1.2.1/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala @@ -17,7 +17,6 @@ package org.apache.spark.sql.hive.thriftserver -import org.apache.commons.lang3.StringUtils import org.apache.commons.logging.LogFactory import org.apache.hadoop.hive.ql.exec.Utilities import org.apache.hadoop.hive.ql.session.SessionState @@ -52,8 +51,10 @@ private[thriftserver] object ThriftserverShimUtils { private[thriftserver] def toJavaSQLType(s: String): Int = Type.getType(s).toJavaSQLType - private[thriftserver] def addToClassPath(loader: ClassLoader, auxJars: String): ClassLoader = { - Utilities.addToClassPath(loader, StringUtils.split(auxJars, ",")) + private[thriftserver] def addToClassPath( + loader: ClassLoader, + auxJars: Array[String]): ClassLoader = { + Utilities.addToClassPath(loader, auxJars) } private[thriftserver] val testedProtocolVersions = Seq( diff --git a/sql/hive-thriftserver/v2.3.5/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala b/sql/hive-thriftserver/v2.3.5/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala index 76ef994f978b..124c9937c0fc 100644 --- a/sql/hive-thriftserver/v2.3.5/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala +++ b/sql/hive-thriftserver/v2.3.5/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala @@ -21,7 +21,6 @@ import java.security.AccessController import scala.collection.JavaConverters._ -import org.apache.commons.lang3.StringUtils import org.apache.hadoop.hive.ql.exec.AddToClassPathAction import org.apache.hadoop.hive.ql.session.SessionState import org.apache.hadoop.hive.serde2.thrift.Type @@ -57,8 +56,10 @@ private[thriftserver] object ThriftserverShimUtils { private[thriftserver] def toJavaSQLType(s: String): Int = Type.getType(s).toJavaSQLType - private[thriftserver] def addToClassPath(loader: ClassLoader, auxJars: String): ClassLoader = { - val addAction = new AddToClassPathAction(loader, StringUtils.split(auxJars, ",").toList.asJava) + private[thriftserver] def addToClassPath( + loader: ClassLoader, + auxJars: Array[String]): ClassLoader = { + val addAction = new AddToClassPathAction(loader, auxJars.toList.asJava) AccessController.doPrivileged(addAction) } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala index e9eb0d2c69be..752ed9ac338e 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala @@ -123,8 +123,7 @@ private[hive] object IsolatedClientLoader extends Logging { SparkSubmitUtils.resolveMavenCoordinates( hiveArtifacts.mkString(","), SparkSubmitUtils.buildIvySettings( - // TODO Remove this once Hive 2.3.6 released - Some("https://repository.apache.org/content/repositories/staging"), + Some("http://www.datanucleus.org/downloads/maven2"), ivyPath), exclusions = version.exclusions) }