diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 0225db81925c..9938e5d769e1 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -65,7 +65,13 @@ org.apache.hadoop - hadoop-client + ${hadoop-client-api.artifact} + ${hadoop.version} + + + org.apache.hadoop + ${hadoop-client-runtime.artifact} + ${hadoop.version} org.slf4j diff --git a/core/pom.xml b/core/pom.xml index 14b217d7fb22..7a56c4ca3c63 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -66,7 +66,13 @@ org.apache.hadoop - hadoop-client + ${hadoop-client-api.artifact} + ${hadoop.version} + + + org.apache.hadoop + ${hadoop-client-runtime.artifact} + ${hadoop.version} org.apache.spark @@ -177,6 +183,14 @@ org.apache.commons commons-text + + commons-io + commons-io + + + commons-collections + commons-collections + com.google.code.findbugs jsr305 diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 8363d570d732..93370f5dae72 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -1182,10 +1182,12 @@ private[spark] object SparkSubmitUtils { def resolveDependencyPaths( artifacts: Array[AnyRef], cacheDirectory: File): String = { - artifacts.map { artifactInfo => - val artifact = artifactInfo.asInstanceOf[Artifact].getModuleRevisionId + artifacts.map { ai => + val artifactInfo = ai.asInstanceOf[Artifact] + val artifact = artifactInfo.getModuleRevisionId + val testSuffix = if (artifactInfo.getType == "test-jar") "-tests" else "" cacheDirectory.getAbsolutePath + File.separator + - s"${artifact.getOrganisation}_${artifact.getName}-${artifact.getRevision}.jar" + s"${artifact.getOrganisation}_${artifact.getName}-${artifact.getRevision}${testSuffix}.jar" }.mkString(",") } diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 index f049ad1f5bb7..7e75048df63c 100644 --- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 @@ -126,7 +126,7 @@ javax.inject/1//javax.inject-1.jar javax.jdo/3.2.0-m3//javax.jdo-3.2.0-m3.jar javax.servlet-api/3.1.0//javax.servlet-api-3.1.0.jar javolution/5.5.1//javolution-5.5.1.jar -jaxb-api/2.2.2//jaxb-api-2.2.2.jar +jaxb-api/2.2.11//jaxb-api-2.2.11.jar jaxb-runtime/2.3.2//jaxb-runtime-2.3.2.jar jcl-over-slf4j/1.7.30//jcl-over-slf4j-1.7.30.jar jdo-api/3.0.1//jdo-api-3.0.1.jar @@ -226,7 +226,6 @@ spire-macros_2.12/0.17.0-M1//spire-macros_2.12-0.17.0-M1.jar spire-platform_2.12/0.17.0-M1//spire-platform_2.12-0.17.0-M1.jar spire-util_2.12/0.17.0-M1//spire-util_2.12-0.17.0-M1.jar spire_2.12/0.17.0-M1//spire_2.12-0.17.0-M1.jar -stax-api/1.0-2//stax-api-1.0-2.jar stax-api/1.0.1//stax-api-1.0.1.jar stream/2.9.6//stream-2.9.6.jar super-csv/2.2.0//super-csv-2.2.0.jar diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 index a4dbeb112473..c5b324539221 100644 --- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 @@ -3,14 +3,12 @@ JLargeArrays/1.5//JLargeArrays-1.5.jar JTransforms/3.1//JTransforms-3.1.jar RoaringBitmap/0.9.0//RoaringBitmap-0.9.0.jar ST4/4.0.4//ST4-4.0.4.jar -accessors-smart/1.2//accessors-smart-1.2.jar activation/1.1.1//activation-1.1.1.jar aircompressor/0.10//aircompressor-0.10.jar algebra_2.12/2.0.0-M2//algebra_2.12-2.0.0-M2.jar antlr-runtime/3.5.2//antlr-runtime-3.5.2.jar antlr4-runtime/4.7.1//antlr4-runtime-4.7.1.jar aopalliance-repackaged/2.6.1//aopalliance-repackaged-2.6.1.jar -aopalliance/1.0//aopalliance-1.0.jar arpack_combined_all/0.1//arpack_combined_all-0.1.jar arrow-format/1.0.1//arrow-format-1.0.1.jar arrow-memory-core/1.0.1//arrow-memory-core-1.0.1.jar @@ -27,15 +25,12 @@ breeze_2.12/1.0//breeze_2.12-1.0.jar cats-kernel_2.12/2.0.0-M4//cats-kernel_2.12-2.0.0-M4.jar chill-java/0.9.5//chill-java-0.9.5.jar chill_2.12/0.9.5//chill_2.12-0.9.5.jar -commons-beanutils/1.9.4//commons-beanutils-1.9.4.jar commons-cli/1.2//commons-cli-1.2.jar commons-codec/1.10//commons-codec-1.10.jar commons-collections/3.2.2//commons-collections-3.2.2.jar commons-compiler/3.0.16//commons-compiler-3.0.16.jar commons-compress/1.8.1//commons-compress-1.8.1.jar -commons-configuration2/2.1.1//commons-configuration2-2.1.1.jar commons-crypto/1.0.0//commons-crypto-1.0.0.jar -commons-daemon/1.0.13//commons-daemon-1.0.13.jar commons-dbcp/1.4//commons-dbcp-1.4.jar commons-httpclient/3.1//commons-httpclient-3.1.jar commons-io/2.5//commons-io-2.5.jar @@ -55,30 +50,13 @@ datanucleus-api-jdo/4.2.4//datanucleus-api-jdo-4.2.4.jar datanucleus-core/4.1.17//datanucleus-core-4.1.17.jar datanucleus-rdbms/4.1.19//datanucleus-rdbms-4.1.19.jar derby/10.12.1.1//derby-10.12.1.1.jar -dnsjava/2.1.7//dnsjava-2.1.7.jar dropwizard-metrics-hadoop-metrics2-reporter/0.1.2//dropwizard-metrics-hadoop-metrics2-reporter-0.1.2.jar -ehcache/3.3.1//ehcache-3.3.1.jar flatbuffers-java/1.9.0//flatbuffers-java-1.9.0.jar generex/1.0.2//generex-1.0.2.jar -geronimo-jcache_1.0_spec/1.0-alpha-1//geronimo-jcache_1.0_spec-1.0-alpha-1.jar gson/2.2.4//gson-2.2.4.jar guava/14.0.1//guava-14.0.1.jar -guice-servlet/4.0//guice-servlet-4.0.jar -guice/4.0//guice-4.0.jar -hadoop-annotations/3.2.0//hadoop-annotations-3.2.0.jar -hadoop-auth/3.2.0//hadoop-auth-3.2.0.jar -hadoop-client/3.2.0//hadoop-client-3.2.0.jar -hadoop-common/3.2.0//hadoop-common-3.2.0.jar -hadoop-hdfs-client/3.2.0//hadoop-hdfs-client-3.2.0.jar -hadoop-mapreduce-client-common/3.2.0//hadoop-mapreduce-client-common-3.2.0.jar -hadoop-mapreduce-client-core/3.2.0//hadoop-mapreduce-client-core-3.2.0.jar -hadoop-mapreduce-client-jobclient/3.2.0//hadoop-mapreduce-client-jobclient-3.2.0.jar -hadoop-yarn-api/3.2.0//hadoop-yarn-api-3.2.0.jar -hadoop-yarn-client/3.2.0//hadoop-yarn-client-3.2.0.jar -hadoop-yarn-common/3.2.0//hadoop-yarn-common-3.2.0.jar -hadoop-yarn-registry/3.2.0//hadoop-yarn-registry-3.2.0.jar -hadoop-yarn-server-common/3.2.0//hadoop-yarn-server-common-3.2.0.jar -hadoop-yarn-server-web-proxy/3.2.0//hadoop-yarn-server-web-proxy-3.2.0.jar +hadoop-client-api/3.2.0//hadoop-client-api-3.2.0.jar +hadoop-client-runtime/3.2.0//hadoop-client-runtime-3.2.0.jar hive-beeline/2.3.7//hive-beeline-2.3.7.jar hive-cli/2.3.7//hive-cli-2.3.7.jar hive-common/2.3.7//hive-common-2.3.7.jar @@ -107,8 +85,6 @@ jackson-core/2.10.0//jackson-core-2.10.0.jar jackson-databind/2.10.0//jackson-databind-2.10.0.jar jackson-dataformat-yaml/2.10.0//jackson-dataformat-yaml-2.10.0.jar jackson-datatype-jsr310/2.10.3//jackson-datatype-jsr310-2.10.3.jar -jackson-jaxrs-base/2.9.5//jackson-jaxrs-base-2.9.5.jar -jackson-jaxrs-json-provider/2.9.5//jackson-jaxrs-json-provider-2.9.5.jar jackson-mapper-asl/1.9.13//jackson-mapper-asl-1.9.13.jar jackson-module-jaxb-annotations/2.10.0//jackson-module-jaxb-annotations-2.10.0.jar jackson-module-paranamer/2.10.0//jackson-module-paranamer-2.10.0.jar @@ -121,13 +97,11 @@ jakarta.ws.rs-api/2.1.6//jakarta.ws.rs-api-2.1.6.jar jakarta.xml.bind-api/2.3.2//jakarta.xml.bind-api-2.3.2.jar janino/3.0.16//janino-3.0.16.jar javassist/3.25.0-GA//javassist-3.25.0-GA.jar -javax.inject/1//javax.inject-1.jar javax.jdo/3.2.0-m3//javax.jdo-3.2.0-m3.jar javax.servlet-api/3.1.0//javax.servlet-api-3.1.0.jar javolution/5.5.1//javolution-5.5.1.jar jaxb-api/2.2.11//jaxb-api-2.2.11.jar jaxb-runtime/2.3.2//jaxb-runtime-2.3.2.jar -jcip-annotations/1.0-1//jcip-annotations-1.0-1.jar jcl-over-slf4j/1.7.30//jcl-over-slf4j-1.7.30.jar jdo-api/3.0.1//jdo-api-3.0.1.jar jersey-client/2.30//jersey-client-2.30.jar @@ -141,30 +115,14 @@ jline/2.14.6//jline-2.14.6.jar joda-time/2.10.5//joda-time-2.10.5.jar jodd-core/3.5.2//jodd-core-3.5.2.jar jpam/1.1//jpam-1.1.jar -json-smart/2.3//json-smart-2.3.jar json/1.8//json-1.8.jar json4s-ast_2.12/3.7.0-M5//json4s-ast_2.12-3.7.0-M5.jar json4s-core_2.12/3.7.0-M5//json4s-core_2.12-3.7.0-M5.jar json4s-jackson_2.12/3.7.0-M5//json4s-jackson_2.12-3.7.0-M5.jar json4s-scalap_2.12/3.7.0-M5//json4s-scalap_2.12-3.7.0-M5.jar -jsp-api/2.1//jsp-api-2.1.jar jsr305/3.0.0//jsr305-3.0.0.jar jta/1.1//jta-1.1.jar jul-to-slf4j/1.7.30//jul-to-slf4j-1.7.30.jar -kerb-admin/1.0.1//kerb-admin-1.0.1.jar -kerb-client/1.0.1//kerb-client-1.0.1.jar -kerb-common/1.0.1//kerb-common-1.0.1.jar -kerb-core/1.0.1//kerb-core-1.0.1.jar -kerb-crypto/1.0.1//kerb-crypto-1.0.1.jar -kerb-identity/1.0.1//kerb-identity-1.0.1.jar -kerb-server/1.0.1//kerb-server-1.0.1.jar -kerb-simplekdc/1.0.1//kerb-simplekdc-1.0.1.jar -kerb-util/1.0.1//kerb-util-1.0.1.jar -kerby-asn1/1.0.1//kerby-asn1-1.0.1.jar -kerby-config/1.0.1//kerby-config-1.0.1.jar -kerby-pkix/1.0.1//kerby-pkix-1.0.1.jar -kerby-util/1.0.1//kerby-util-1.0.1.jar -kerby-xdr/1.0.1//kerby-xdr-1.0.1.jar kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar kubernetes-client/4.10.3//kubernetes-client-4.10.3.jar kubernetes-model-admissionregistration/4.10.3//kubernetes-model-admissionregistration-4.10.3.jar @@ -202,9 +160,7 @@ metrics-json/4.1.1//metrics-json-4.1.1.jar metrics-jvm/4.1.1//metrics-jvm-4.1.1.jar minlog/1.3.0//minlog-1.3.0.jar netty-all/4.1.51.Final//netty-all-4.1.51.Final.jar -nimbus-jose-jwt/4.41.1//nimbus-jose-jwt-4.41.1.jar objenesis/2.6//objenesis-2.6.jar -okhttp/2.7.5//okhttp-2.7.5.jar okhttp/3.12.12//okhttp-3.12.12.jar okio/1.14.0//okio-1.14.0.jar opencsv/2.3//opencsv-2.3.jar @@ -224,7 +180,6 @@ parquet-jackson/1.10.1//parquet-jackson-1.10.1.jar protobuf-java/2.5.0//protobuf-java-2.5.0.jar py4j/0.10.9//py4j-0.10.9.jar pyrolite/4.30//pyrolite-4.30.jar -re2j/1.1//re2j-1.1.jar scala-collection-compat_2.12/2.1.1//scala-collection-compat_2.12-2.1.1.jar scala-compiler/2.12.10//scala-compiler-2.12.10.jar scala-library/2.12.10//scala-library-2.12.10.jar @@ -242,15 +197,12 @@ spire-platform_2.12/0.17.0-M1//spire-platform_2.12-0.17.0-M1.jar spire-util_2.12/0.17.0-M1//spire-util_2.12-0.17.0-M1.jar spire_2.12/0.17.0-M1//spire_2.12-0.17.0-M1.jar stax-api/1.0.1//stax-api-1.0.1.jar -stax2-api/3.1.4//stax2-api-3.1.4.jar stream/2.9.6//stream-2.9.6.jar super-csv/2.2.0//super-csv-2.2.0.jar threeten-extra/1.5.0//threeten-extra-1.5.0.jar -token-provider/1.0.1//token-provider-1.0.1.jar transaction-api/1.1//transaction-api-1.1.jar univocity-parsers/2.9.0//univocity-parsers-2.9.0.jar velocity/1.5//velocity-1.5.jar -woodstox-core/5.0.3//woodstox-core-5.0.3.jar xbean-asm7-shaded/4.15//xbean-asm7-shaded-4.15.jar xz/1.5//xz-1.5.jar zjsonpatch/0.3.0//zjsonpatch-0.3.0.jar diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml index d9d9fb7f55c7..b1e306c49938 100644 --- a/external/kafka-0-10-assembly/pom.xml +++ b/external/kafka-0-10-assembly/pom.xml @@ -71,9 +71,15 @@ org.apache.hadoop - hadoop-client + ${hadoop-client-api.artifact} + ${hadoop.version} provided + + org.apache.hadoop + ${hadoop-client-runtime.artifact} + ${hadoop.version} + org.apache.avro avro-mapred diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml index 95a99ac88412..06a6bef005e6 100644 --- a/external/kafka-0-10-sql/pom.xml +++ b/external/kafka-0-10-sql/pom.xml @@ -79,6 +79,10 @@ kafka-clients ${kafka.version} + + com.google.code.findbugs + jsr305 + org.apache.commons commons-pool2 diff --git a/external/kafka-0-10-token-provider/pom.xml b/external/kafka-0-10-token-provider/pom.xml index 941946f30e96..1b0d6d322917 100644 --- a/external/kafka-0-10-token-provider/pom.xml +++ b/external/kafka-0-10-token-provider/pom.xml @@ -58,6 +58,11 @@ mockito-core test + + org.apache.hadoop + ${hadoop-client-runtime.artifact} + ${hadoop.deps.scope} + org.apache.spark spark-tags_${scala.binary.version} diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml index 76ee5bb7b2f8..5a49358a8424 100644 --- a/external/kinesis-asl-assembly/pom.xml +++ b/external/kinesis-asl-assembly/pom.xml @@ -91,9 +91,15 @@ org.apache.hadoop - hadoop-client + ${hadoop-client-api.artifact} + ${hadoop.version} provided + + org.apache.hadoop + ${hadoop-client-runtime.artifact} + ${hadoop.version} + org.apache.avro avro-ipc diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml index 8689e0b8a9ea..a5642a5a68fe 100644 --- a/hadoop-cloud/pom.xml +++ b/hadoop-cloud/pom.xml @@ -58,10 +58,15 @@ org.apache.hadoop - hadoop-client + ${hadoop-client-api.artifact} ${hadoop.version} provided + + org.apache.hadoop + ${hadoop-client-runtime.artifact} + ${hadoop.version} + org.apache.hadoop - hadoop-client + ${hadoop-client-api.artifact} + ${hadoop.version} + test + + + org.apache.hadoop + ${hadoop-client-runtime.artifact} + ${hadoop.version} test diff --git a/pom.xml b/pom.xml index 75b6776cbe47..78b29b1a2582 100644 --- a/pom.xml +++ b/pom.xml @@ -243,6 +243,15 @@ compile test + + hadoop-client-api + hadoop-client-runtime + hadoop-client-minicluster + + + org.apache.hadoop + hadoop-client-api + ${hadoop.version} + ${hadoop.deps.scope} + + + org.apache.hadoop + hadoop-client-runtime + ${hadoop.version} + runtime + + + org.apache.hadoop + hadoop-client-minicluster + ${yarn.version} + test + + org.apache.hadoop hadoop-client @@ -1638,6 +1672,14 @@ org.apache.ant ant + + org.apache.hadoop + hadoop-common + + + org.apache.hadoop + hadoop-auth + org.apache.zookeeper zookeeper @@ -2393,17 +2435,6 @@ - - enforce-no-duplicate-dependencies - - enforce - - - - - - - @@ -2863,6 +2894,7 @@ maven-shade-plugin false + false org.spark-project.spark:unused @@ -3124,6 +3156,9 @@ 2.7.4 2.7.1 2.4 + hadoop-client + hadoop-client + hadoop-client diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml index bc80769be239..da715c6bdc59 100644 --- a/resource-managers/yarn/pom.xml +++ b/resource-managers/yarn/pom.xml @@ -40,6 +40,33 @@ true + + hadoop-2.7 + + + org.apache.hadoop + hadoop-yarn-api + + + org.apache.hadoop + hadoop-yarn-common + + + org.apache.hadoop + hadoop-yarn-server-web-proxy + + + org.apache.hadoop + hadoop-yarn-client + + + org.apache.hadoop + hadoop-yarn-server-tests + tests + test + + + @@ -69,23 +96,20 @@ org.apache.hadoop - hadoop-yarn-api - - - org.apache.hadoop - hadoop-yarn-common - - - org.apache.hadoop - hadoop-yarn-server-web-proxy + ${hadoop-client-api.artifact} + ${hadoop.version} org.apache.hadoop - hadoop-yarn-client + ${hadoop-client-runtime.artifact} + ${hadoop.version} + ${hadoop.deps.scope} org.apache.hadoop - hadoop-client + ${hadoop-client-minicluster.artifact} + ${hadoop.version} + test @@ -142,13 +166,6 @@ test - - org.apache.hadoop - hadoop-yarn-server-tests - tests - test - - org.mockito mockito-core diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala index 5f632fbb259f..9b99e8ff9265 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala @@ -19,7 +19,7 @@ package org.apache.spark.deploy.yarn import java.io.{File, IOException} import java.lang.reflect.{InvocationTargetException, Modifier} -import java.net.{URI, URL} +import java.net.{URI, URL, URLEncoder} import java.security.PrivilegedExceptionAction import java.util.concurrent.{TimeoutException, TimeUnit} @@ -36,7 +36,6 @@ import org.apache.hadoop.yarn.api._ import org.apache.hadoop.yarn.api.records._ import org.apache.hadoop.yarn.conf.YarnConfiguration import org.apache.hadoop.yarn.exceptions.ApplicationAttemptNotFoundException -import org.apache.hadoop.yarn.server.webproxy.ProxyUriUtils import org.apache.hadoop.yarn.util.{ConverterUtils, Records} import org.apache.spark._ @@ -308,7 +307,8 @@ private[spark] class ApplicationMaster( // The client-mode AM doesn't listen for incoming connections, so report an invalid port. registerAM(Utils.localHostName, -1, sparkConf, sparkConf.getOption("spark.driver.appUIAddress"), appAttemptId) - addAmIpFilter(Some(driverRef), ProxyUriUtils.getPath(appAttemptId.getApplicationId)) + val encodedAppId = URLEncoder.encode(appAttemptId.getApplicationId.toString, "UTF-8") + addAmIpFilter(Some(driverRef), s"/proxy/$encodedAppId") createAllocator(driverRef, sparkConf, clientRpcEnv, appAttemptId, cachedResourcesConf) reporterThread.join() } catch { diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala index 20f5339c46fe..a813b9913f23 100644 --- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala +++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala @@ -80,6 +80,16 @@ abstract class BaseYarnClusterSuite yarnConf.set("yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage", "100.0") + // capacity-scheduler.xml is missing in hadoop-client-minicluster so this is a workaround + yarnConf.set("yarn.scheduler.capacity.root.queues", "default") + yarnConf.setInt("yarn.scheduler.capacity.root.default.capacity", 100) + yarnConf.setFloat("yarn.scheduler.capacity.root.default.user-limit-factor", 1) + yarnConf.setInt("yarn.scheduler.capacity.root.default.maximum-capacity", 100) + yarnConf.set("yarn.scheduler.capacity.root.default.state", "RUNNING") + yarnConf.set("yarn.scheduler.capacity.root.default.acl_submit_applications", "*") + yarnConf.set("yarn.scheduler.capacity.root.default.acl_administer_queue", "*") + yarnConf.setInt("yarn.scheduler.capacity.node-locality-delay", -1) + yarnCluster = new MiniYARNCluster(getClass().getName(), 1, 1, 1) yarnCluster.init(yarnConf) yarnCluster.start() diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml index 6b79eb722fcd..af976fa1fa98 100644 --- a/sql/catalyst/pom.xml +++ b/sql/catalyst/pom.xml @@ -104,6 +104,10 @@ org.antlr antlr4-runtime + + javax.xml.bind + jaxb-api + commons-codec commons-codec diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index 0453094cf8b7..4fca6264c059 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -162,6 +162,11 @@ org.datanucleus datanucleus-core + + org.apache.hadoop + ${hadoop-client-runtime.artifact} + ${hadoop.deps.scope} + org.apache.thrift libthrift diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala index 42a0ec0253b8..f9946fe8e061 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala @@ -118,11 +118,24 @@ private[hive] object IsolatedClientLoader extends Logging { hadoopVersion: String, ivyPath: Option[String], remoteRepos: String): Seq[URL] = { + val hadoopJarNames = if (hadoopVersion.startsWith("3")) { + Seq(s"org.apache.hadoop:hadoop-client-api:$hadoopVersion", + s"org.apache.hadoop:hadoop-client-runtime:$hadoopVersion") + } else { + Seq(s"org.apache.hadoop:hadoop-client:$hadoopVersion") + } val hiveArtifacts = version.extraDeps ++ Seq("hive-metastore", "hive-exec", "hive-common", "hive-serde") .map(a => s"org.apache.hive:$a:${version.fullVersion}") ++ - Seq("com.google.guava:guava:14.0.1", - s"org.apache.hadoop:hadoop-client:$hadoopVersion") + Seq("com.google.guava:guava:14.0.1") ++ hadoopJarNames + + val extraExclusions = if (hadoopVersion.startsWith("3")) { + // this introduced from lower version of Hive could conflict with jars in Hadoop 3.2+, so + // exclude here in favor of the ones in Hadoop 3.2+ + Seq("org.apache.hadoop:hadoop-auth") + } else { + Seq.empty + } val classpath = quietly { SparkSubmitUtils.resolveMavenCoordinates( @@ -130,7 +143,7 @@ private[hive] object IsolatedClientLoader extends Logging { SparkSubmitUtils.buildIvySettings( Some(remoteRepos), ivyPath), - exclusions = version.exclusions) + exclusions = version.exclusions ++ extraExclusions) } val allFiles = classpath.split(",").map(new File(_)).toSet