diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 60c54dfc98a58..622fea8c54f13 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -476,6 +476,7 @@ object SparkParallelTestGrouping { "org.apache.spark.ml.classification.LogisticRegressionSuite", "org.apache.spark.ml.classification.LinearSVCSuite", "org.apache.spark.sql.SQLQueryTestSuite", + "org.apache.spark.sql.hive.client.HadoopVersionInfoSuite", "org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperationSuite", "org.apache.spark.sql.hive.thriftserver.ThriftServerQueryTestSuite", "org.apache.spark.sql.hive.thriftserver.SparkSQLEnvSuite", diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala index 2faf42028f3a2..571c25e356c08 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala @@ -61,6 +61,10 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat import HiveExternalCatalog._ import CatalogTableType._ + // SPARK-32256: Make sure `VersionInfo` is initialized before touching the isolated classloader. + // This is to ensure Hive can get the Hadoop version when using the isolated classloader. + org.apache.hadoop.util.VersionInfo.getVersion() + /** * A Hive client used to interact with the metastore. */ diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HadoopVersionInfoSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HadoopVersionInfoSuite.scala new file mode 100644 index 0000000000000..65492abf38cc0 --- /dev/null +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HadoopVersionInfoSuite.scala @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.hive.client + +import java.io.File +import java.net.URLClassLoader + +import org.apache.hadoop.conf.Configuration + +import org.apache.spark.{SparkConf, SparkFunSuite} +import org.apache.spark.sql.hive.{HiveExternalCatalog, HiveUtils} +import org.apache.spark.util.Utils + +/** + * This test suite requires a clean JVM because it's testing the initialization of static codes in + * `org.apache.hadoop.util.VersionInfo`. + */ +class HadoopVersionInfoSuite extends SparkFunSuite { + override protected val enableAutoThreadAudit = false + + test("SPARK-32256: Hadoop VersionInfo should be preloaded") { + val ivyPath = + Utils.createTempDir(namePrefix = s"${classOf[HadoopVersionInfoSuite].getSimpleName}-ivy") + try { + val hadoopConf = new Configuration() + hadoopConf.set("test", "success") + hadoopConf.set("datanucleus.schema.autoCreateAll", "true") + hadoopConf.set("hive.metastore.schema.verification", "false") + + // Download jars for Hive 2.0 + val client = IsolatedClientLoader.forVersion( + hiveMetastoreVersion = "2.0", + hadoopVersion = "2.7.4", + sparkConf = new SparkConf(), + hadoopConf = hadoopConf, + config = HiveClientBuilder.buildConf(Map.empty), + ivyPath = Some(ivyPath.getCanonicalPath), + sharesHadoopClasses = true) + val jars = client.classLoader.getParent.asInstanceOf[URLClassLoader].getURLs + .map(u => new File(u.toURI)) + // Drop all Hadoop jars to use the existing Hadoop jars on the classpath + .filter(!_.getName.startsWith("org.apache.hadoop_hadoop-")) + + val sparkConf = new SparkConf() + sparkConf.set(HiveUtils.HIVE_METASTORE_VERSION, "2.0") + sparkConf.set( + HiveUtils.HIVE_METASTORE_JARS, + jars.map(_.getCanonicalPath).mkString(File.pathSeparator)) + HiveClientBuilder.buildConf(Map.empty).foreach { case (k, v) => + hadoopConf.set(k, v) + } + new HiveExternalCatalog(sparkConf, hadoopConf).client.getState + } finally { + Utils.deleteRecursively(ivyPath) + } + } +} diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientBuilder.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientBuilder.scala index ab73f668c6ca6..2ad3afcb214b3 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientBuilder.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientBuilder.scala @@ -33,7 +33,7 @@ private[client] object HiveClientBuilder { Some(new File(sys.props("java.io.tmpdir"), "hive-ivy-cache").getAbsolutePath)) } - private def buildConf(extraConf: Map[String, String]) = { + private[client] def buildConf(extraConf: Map[String, String]): Map[String, String] = { lazy val warehousePath = Utils.createTempDir() lazy val metastorePath = Utils.createTempDir() metastorePath.delete()