apache · zsxwing · Jul 9, 2020 · Jul 9, 2020 · Jul 9, 2020 · Jul 10, 2020
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
@@ -476,6 +476,7 @@ object SparkParallelTestGrouping {
     "org.apache.spark.ml.classification.LogisticRegressionSuite",
     "org.apache.spark.ml.classification.LinearSVCSuite",
     "org.apache.spark.sql.SQLQueryTestSuite",
+    "org.apache.spark.sql.hive.client.HadoopVersionInfoSuite",
     "org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperationSuite",
     "org.apache.spark.sql.hive.thriftserver.ThriftServerQueryTestSuite",
     "org.apache.spark.sql.hive.thriftserver.SparkSQLEnvSuite",

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -61,6 +61,10 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
   import HiveExternalCatalog._
   import CatalogTableType._
 
+  // SPARK-32256: Make sure `VersionInfo` is initialized before touching the isolated classloader.
+  // This is to ensure Hive can get the Hadoop version when using the isolated classloader.
+  org.apache.hadoop.util.VersionInfo.getVersion()
+
   /**
    * A Hive client used to interact with the metastore.
    */

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HadoopVersionInfoSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HadoopVersionInfoSuite.scala
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.client
+
+import java.io.File
+import java.net.URLClassLoader
+
+import org.apache.hadoop.conf.Configuration
+
+import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.sql.hive.{HiveExternalCatalog, HiveUtils}
+import org.apache.spark.util.Utils
+
+/**
+ * This test suite requires a clean JVM because it's testing the initialization of static codes in
+ * `org.apache.hadoop.util.VersionInfo`.
+ */
+class HadoopVersionInfoSuite extends SparkFunSuite {
+  override protected val enableAutoThreadAudit = false
+
+  test("SPARK-32256: Hadoop VersionInfo should be preloaded") {
+    val ivyPath =
+      Utils.createTempDir(namePrefix = s"${classOf[HadoopVersionInfoSuite].getSimpleName}-ivy")
+    try {
+      val hadoopConf = new Configuration()
+      hadoopConf.set("test", "success")
+      hadoopConf.set("datanucleus.schema.autoCreateAll", "true")
+      hadoopConf.set("hive.metastore.schema.verification", "false")
+
+      // Download jars for Hive 2.0
+      val client = IsolatedClientLoader.forVersion(
+        hiveMetastoreVersion = "2.0",
+        hadoopVersion = "2.7.4",
+        sparkConf = new SparkConf(),
+        hadoopConf = hadoopConf,
+        config = HiveClientBuilder.buildConf(Map.empty),
+        ivyPath = Some(ivyPath.getCanonicalPath),
+        sharesHadoopClasses = true)
+      val jars = client.classLoader.getParent.asInstanceOf[URLClassLoader].getURLs
+        .map(u => new File(u.toURI))
+        // Drop all Hadoop jars to use the existing Hadoop jars on the classpath
+        .filter(!_.getName.startsWith("org.apache.hadoop_hadoop-"))
+
+      val sparkConf = new SparkConf()
+      sparkConf.set(HiveUtils.HIVE_METASTORE_VERSION, "2.0")
+      sparkConf.set(
+        HiveUtils.HIVE_METASTORE_JARS,
+        jars.map(_.getCanonicalPath).mkString(File.pathSeparator))
+      HiveClientBuilder.buildConf(Map.empty).foreach { case (k, v) =>
+        hadoopConf.set(k, v)
+      }
+      new HiveExternalCatalog(sparkConf, hadoopConf).client.getState
+    } finally {
+      Utils.deleteRecursively(ivyPath)
+    }
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientBuilder.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientBuilder.scala
@@ -33,7 +33,7 @@ private[client] object HiveClientBuilder {
       Some(new File(sys.props("java.io.tmpdir"), "hive-ivy-cache").getAbsolutePath))
   }
 
-  private def buildConf(extraConf: Map[String, String]) = {
+  private[client] def buildConf(extraConf: Map[String, String]): Map[String, String] = {
     lazy val warehousePath = Utils.createTempDir()
     lazy val metastorePath = Utils.createTempDir()
     metastorePath.delete()