diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 667371dacf5d..31bafc93a280 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -32,52 +32,11 @@ jobs:
# Note that the modules below are from sparktestsupport/modules.py.
modules:
- >-
- core, unsafe, kvstore, avro,
- network-common, network-shuffle, repl, launcher,
- examples, sketch, graphx
- - >-
- catalyst, hive-thriftserver
- - >-
- streaming, sql-kafka-0-10, streaming-kafka-0-10,
- mllib-local, mllib,
- yarn, mesos, kubernetes, hadoop-cloud, spark-ganglia-lgpl
- - >-
- pyspark-sql, pyspark-mllib, pyspark-resource
- - >-
- pyspark-core, pyspark-streaming, pyspark-ml
- - >-
- sparkr
+ yarn
# Here, we split Hive and SQL tests into some of slow ones and the rest of them.
included-tags: [""]
excluded-tags: [""]
comment: [""]
- include:
- # Hive tests
- - modules: hive
- java: 1.8
- hadoop: hadoop3.2
- hive: hive2.3
- included-tags: org.apache.spark.tags.SlowHiveTest
- comment: "- slow tests"
- - modules: hive
- java: 1.8
- hadoop: hadoop3.2
- hive: hive2.3
- excluded-tags: org.apache.spark.tags.SlowHiveTest
- comment: "- other tests"
- # SQL tests
- - modules: sql
- java: 1.8
- hadoop: hadoop3.2
- hive: hive2.3
- included-tags: org.apache.spark.tags.ExtendedSQLTest
- comment: "- slow tests"
- - modules: sql
- java: 1.8
- hadoop: hadoop3.2
- hive: hive2.3
- excluded-tags: org.apache.spark.tags.ExtendedSQLTest
- comment: "- other tests"
env:
MODULES_TO_TEST: ${{ matrix.modules }}
EXCLUDED_TAGS: ${{ matrix.excluded-tags }}
@@ -183,6 +142,8 @@ jobs:
# Run the tests.
- name: Run tests
run: |
+ which python
+ python --version
# Hive tests become flaky when running in parallel as it's too intensive.
if [[ "$MODULES_TO_TEST" == "hive" ]]; then export SERIAL_SBT_TESTS=1; fi
mkdir -p ~/.m2
@@ -201,121 +162,3 @@ jobs:
name: unit-tests-log-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }}
path: "**/target/unit-tests.log"
- # Static analysis, and documentation build
- lint:
- name: Linters, licenses, dependencies and documentation generation
- runs-on: ubuntu-latest
- steps:
- - name: Checkout Spark repository
- uses: actions/checkout@v2
- - name: Cache Maven local repository
- uses: actions/cache@v2
- with:
- path: ~/.m2/repository
- key: docs-maven-repo-${{ hashFiles('**/pom.xml') }}
- restore-keys: |
- docs-maven-
- - name: Install JDK 1.8
- uses: actions/setup-java@v1
- with:
- java-version: 1.8
- - name: Install Python 3.6
- uses: actions/setup-python@v2
- with:
- python-version: 3.6
- architecture: x64
- - name: Install Python linter dependencies
- run: |
- # TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes.
- # See also https://github.com/sphinx-doc/sphinx/issues/7551.
- pip3 install flake8 'sphinx<3.1.0' numpy pydata_sphinx_theme ipython nbsphinx
- - name: Install R 4.0
- uses: r-lib/actions/setup-r@v1
- with:
- r-version: 4.0
- - name: Install R linter dependencies and SparkR
- run: |
- sudo apt-get install -y libcurl4-openssl-dev
- sudo Rscript -e "install.packages(c('devtools'), repos='https://cloud.r-project.org/')"
- sudo Rscript -e "devtools::install_github('jimhester/lintr@v2.0.0')"
- ./R/install-dev.sh
- - name: Install Ruby 2.7 for documentation generation
- uses: actions/setup-ruby@v1
- with:
- ruby-version: 2.7
- - name: Install dependencies for documentation generation
- run: |
- # pandoc is required to generate PySpark APIs as well in nbsphinx.
- sudo apt-get install -y libcurl4-openssl-dev pandoc
- # TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes.
- # See also https://github.com/sphinx-doc/sphinx/issues/7551.
- pip install 'sphinx<3.1.0' mkdocs numpy pydata_sphinx_theme ipython nbsphinx
- gem install jekyll jekyll-redirect-from rouge
- sudo Rscript -e "install.packages(c('devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2'), repos='https://cloud.r-project.org/')"
- - name: Scala linter
- run: ./dev/lint-scala
- - name: Java linter
- run: ./dev/lint-java
- - name: Python linter
- run: ./dev/lint-python
- - name: R linter
- run: ./dev/lint-r
- - name: License test
- run: ./dev/check-license
- - name: Dependencies test
- run: ./dev/test-dependencies.sh
- - name: Run documentation build
- run: |
- cd docs
- jekyll build
-
- java11:
- name: Java 11 build
- runs-on: ubuntu-latest
- steps:
- - name: Checkout Spark repository
- uses: actions/checkout@v2
- - name: Cache Maven local repository
- uses: actions/cache@v2
- with:
- path: ~/.m2/repository
- key: java11-maven-${{ hashFiles('**/pom.xml') }}
- restore-keys: |
- java11-maven-
- - name: Install Java 11
- uses: actions/setup-java@v1
- with:
- java-version: 11
- - name: Build with Maven
- run: |
- export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN"
- export MAVEN_CLI_OPTS="--no-transfer-progress"
- mkdir -p ~/.m2
- ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Djava.version=11 install
- rm -rf ~/.m2/repository/org/apache/spark
-
- scala-213:
- name: Scala 2.13 build
- runs-on: ubuntu-latest
- steps:
- - name: Checkout Spark repository
- uses: actions/checkout@v2
- - name: Cache Maven local repository
- uses: actions/cache@v2
- with:
- path: ~/.m2/repository
- key: scala-213-maven-${{ hashFiles('**/pom.xml') }}
- restore-keys: |
- scala-213-maven-
- - name: Install Java 11
- uses: actions/setup-java@v1
- with:
- java-version: 11
- - name: Build with Maven
- run: |
- export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN"
- export MAVEN_CLI_OPTS="--no-transfer-progress"
- mkdir -p ~/.m2
- ./dev/change-scala-version.sh 2.13
- ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Djava.version=11 -Pscala-2.13 install
- rm -rf ~/.m2/repository/org/apache/spark
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 0225db81925c..50ad3340a1c4 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -63,10 +63,6 @@
-
- org.apache.hadoop
- hadoop-client
-
org.slf4j
slf4j-api
@@ -74,6 +70,29 @@
+
+
+ hadoop-2.7
+
+
+ org.apache.hadoop
+ hadoop-client
+ ${hadoop.version}
+
+
+
+
+ hadoop-3.2
+
+
+ org.apache.hadoop
+ hadoop-client-runtime
+ ${hadoop.version}
+
+
+
+
+
target/scala-${scala.binary.version}/classes
target/scala-${scala.binary.version}/test-classes
diff --git a/core/pom.xml b/core/pom.xml
index 14b217d7fb22..62d43d1e59a0 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -64,10 +64,6 @@
org.apache.xbean
xbean-asm7-shaded
-
- org.apache.hadoop
- hadoop-client
-
org.apache.spark
spark-launcher_${scala.binary.version}
@@ -177,6 +173,14 @@
org.apache.commons
commons-text
+
+ commons-io
+ commons-io
+
+
+ commons-collections
+ commons-collections
+
com.google.code.findbugs
jsr305
@@ -539,6 +543,26 @@
+
+ hadoop-2.7
+
+
+ org.apache.hadoop
+ hadoop-client
+ ${hadoop.version}
+
+
+
+
+ hadoop-3.2
+
+
+ org.apache.hadoop
+ hadoop-client-runtime
+ ${hadoop.version}
+
+
+
Windows
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index 8363d570d732..93370f5dae72 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -1182,10 +1182,12 @@ private[spark] object SparkSubmitUtils {
def resolveDependencyPaths(
artifacts: Array[AnyRef],
cacheDirectory: File): String = {
- artifacts.map { artifactInfo =>
- val artifact = artifactInfo.asInstanceOf[Artifact].getModuleRevisionId
+ artifacts.map { ai =>
+ val artifactInfo = ai.asInstanceOf[Artifact]
+ val artifact = artifactInfo.getModuleRevisionId
+ val testSuffix = if (artifactInfo.getType == "test-jar") "-tests" else ""
cacheDirectory.getAbsolutePath + File.separator +
- s"${artifact.getOrganisation}_${artifact.getName}-${artifact.getRevision}.jar"
+ s"${artifact.getOrganisation}_${artifact.getName}-${artifact.getRevision}${testSuffix}.jar"
}.mkString(",")
}
diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-1.2 b/dev/deps/spark-deps-hadoop-2.7-hive-1.2
index d07b04608328..a1d0362988eb 100644
--- a/dev/deps/spark-deps-hadoop-2.7-hive-1.2
+++ b/dev/deps/spark-deps-hadoop-2.7-hive-1.2
@@ -37,7 +37,7 @@ commons-cli/1.2//commons-cli-1.2.jar
commons-codec/1.10//commons-codec-1.10.jar
commons-collections/3.2.2//commons-collections-3.2.2.jar
commons-compiler/3.0.16//commons-compiler-3.0.16.jar
-commons-compress/1.8.1//commons-compress-1.8.1.jar
+commons-compress/1.4.1//commons-compress-1.4.1.jar
commons-configuration/1.6//commons-configuration-1.6.jar
commons-crypto/1.0.0//commons-crypto-1.0.0.jar
commons-dbcp/1.4//commons-dbcp-1.4.jar
@@ -112,7 +112,7 @@ javassist/3.25.0-GA//javassist-3.25.0-GA.jar
javax.inject/1//javax.inject-1.jar
javax.servlet-api/3.1.0//javax.servlet-api-3.1.0.jar
javolution/5.5.1//javolution-5.5.1.jar
-jaxb-api/2.2.2//jaxb-api-2.2.2.jar
+jaxb-api/2.2.11//jaxb-api-2.2.11.jar
jaxb-runtime/2.3.2//jaxb-runtime-2.3.2.jar
jcl-over-slf4j/1.7.30//jcl-over-slf4j-1.7.30.jar
jdo-api/3.0.1//jdo-api-3.0.1.jar
@@ -213,7 +213,6 @@ spire-macros_2.12/0.17.0-M1//spire-macros_2.12-0.17.0-M1.jar
spire-platform_2.12/0.17.0-M1//spire-platform_2.12-0.17.0-M1.jar
spire-util_2.12/0.17.0-M1//spire-util_2.12-0.17.0-M1.jar
spire_2.12/0.17.0-M1//spire_2.12-0.17.0-M1.jar
-stax-api/1.0-2//stax-api-1.0-2.jar
stax-api/1.0.1//stax-api-1.0.1.jar
stream/2.9.6//stream-2.9.6.jar
stringtemplate/3.2.1//stringtemplate-3.2.1.jar
diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
index 979bb1419ce7..c6218cfc4b3e 100644
--- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
@@ -35,7 +35,7 @@ commons-cli/1.2//commons-cli-1.2.jar
commons-codec/1.10//commons-codec-1.10.jar
commons-collections/3.2.2//commons-collections-3.2.2.jar
commons-compiler/3.0.16//commons-compiler-3.0.16.jar
-commons-compress/1.8.1//commons-compress-1.8.1.jar
+commons-compress/1.4.1//commons-compress-1.4.1.jar
commons-configuration/1.6//commons-configuration-1.6.jar
commons-crypto/1.0.0//commons-crypto-1.0.0.jar
commons-dbcp/1.4//commons-dbcp-1.4.jar
@@ -126,7 +126,7 @@ javax.inject/1//javax.inject-1.jar
javax.jdo/3.2.0-m3//javax.jdo-3.2.0-m3.jar
javax.servlet-api/3.1.0//javax.servlet-api-3.1.0.jar
javolution/5.5.1//javolution-5.5.1.jar
-jaxb-api/2.2.2//jaxb-api-2.2.2.jar
+jaxb-api/2.2.11//jaxb-api-2.2.11.jar
jaxb-runtime/2.3.2//jaxb-runtime-2.3.2.jar
jcl-over-slf4j/1.7.30//jcl-over-slf4j-1.7.30.jar
jdo-api/3.0.1//jdo-api-3.0.1.jar
@@ -226,7 +226,6 @@ spire-macros_2.12/0.17.0-M1//spire-macros_2.12-0.17.0-M1.jar
spire-platform_2.12/0.17.0-M1//spire-platform_2.12-0.17.0-M1.jar
spire-util_2.12/0.17.0-M1//spire-util_2.12-0.17.0-M1.jar
spire_2.12/0.17.0-M1//spire_2.12-0.17.0-M1.jar
-stax-api/1.0-2//stax-api-1.0-2.jar
stax-api/1.0.1//stax-api-1.0.1.jar
stream/2.9.6//stream-2.9.6.jar
super-csv/2.2.0//super-csv-2.2.0.jar
diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
index ebaff6d1977c..62365e6626ef 100644
--- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
@@ -3,14 +3,12 @@ JLargeArrays/1.5//JLargeArrays-1.5.jar
JTransforms/3.1//JTransforms-3.1.jar
RoaringBitmap/0.9.0//RoaringBitmap-0.9.0.jar
ST4/4.0.4//ST4-4.0.4.jar
-accessors-smart/1.2//accessors-smart-1.2.jar
activation/1.1.1//activation-1.1.1.jar
aircompressor/0.10//aircompressor-0.10.jar
algebra_2.12/2.0.0-M2//algebra_2.12-2.0.0-M2.jar
antlr-runtime/3.5.2//antlr-runtime-3.5.2.jar
antlr4-runtime/4.7.1//antlr4-runtime-4.7.1.jar
aopalliance-repackaged/2.6.1//aopalliance-repackaged-2.6.1.jar
-aopalliance/1.0//aopalliance-1.0.jar
arpack_combined_all/0.1//arpack_combined_all-0.1.jar
arrow-format/1.0.1//arrow-format-1.0.1.jar
arrow-memory-core/1.0.1//arrow-memory-core-1.0.1.jar
@@ -27,15 +25,12 @@ breeze_2.12/1.0//breeze_2.12-1.0.jar
cats-kernel_2.12/2.0.0-M4//cats-kernel_2.12-2.0.0-M4.jar
chill-java/0.9.5//chill-java-0.9.5.jar
chill_2.12/0.9.5//chill_2.12-0.9.5.jar
-commons-beanutils/1.9.4//commons-beanutils-1.9.4.jar
commons-cli/1.2//commons-cli-1.2.jar
commons-codec/1.10//commons-codec-1.10.jar
commons-collections/3.2.2//commons-collections-3.2.2.jar
commons-compiler/3.0.16//commons-compiler-3.0.16.jar
commons-compress/1.8.1//commons-compress-1.8.1.jar
-commons-configuration2/2.1.1//commons-configuration2-2.1.1.jar
commons-crypto/1.0.0//commons-crypto-1.0.0.jar
-commons-daemon/1.0.13//commons-daemon-1.0.13.jar
commons-dbcp/1.4//commons-dbcp-1.4.jar
commons-httpclient/3.1//commons-httpclient-3.1.jar
commons-io/2.5//commons-io-2.5.jar
@@ -55,30 +50,13 @@ datanucleus-api-jdo/4.2.4//datanucleus-api-jdo-4.2.4.jar
datanucleus-core/4.1.17//datanucleus-core-4.1.17.jar
datanucleus-rdbms/4.1.19//datanucleus-rdbms-4.1.19.jar
derby/10.12.1.1//derby-10.12.1.1.jar
-dnsjava/2.1.7//dnsjava-2.1.7.jar
dropwizard-metrics-hadoop-metrics2-reporter/0.1.2//dropwizard-metrics-hadoop-metrics2-reporter-0.1.2.jar
-ehcache/3.3.1//ehcache-3.3.1.jar
flatbuffers-java/1.9.0//flatbuffers-java-1.9.0.jar
generex/1.0.2//generex-1.0.2.jar
-geronimo-jcache_1.0_spec/1.0-alpha-1//geronimo-jcache_1.0_spec-1.0-alpha-1.jar
gson/2.2.4//gson-2.2.4.jar
guava/14.0.1//guava-14.0.1.jar
-guice-servlet/4.0//guice-servlet-4.0.jar
-guice/4.0//guice-4.0.jar
-hadoop-annotations/3.2.0//hadoop-annotations-3.2.0.jar
-hadoop-auth/3.2.0//hadoop-auth-3.2.0.jar
-hadoop-client/3.2.0//hadoop-client-3.2.0.jar
-hadoop-common/3.2.0//hadoop-common-3.2.0.jar
-hadoop-hdfs-client/3.2.0//hadoop-hdfs-client-3.2.0.jar
-hadoop-mapreduce-client-common/3.2.0//hadoop-mapreduce-client-common-3.2.0.jar
-hadoop-mapreduce-client-core/3.2.0//hadoop-mapreduce-client-core-3.2.0.jar
-hadoop-mapreduce-client-jobclient/3.2.0//hadoop-mapreduce-client-jobclient-3.2.0.jar
-hadoop-yarn-api/3.2.0//hadoop-yarn-api-3.2.0.jar
-hadoop-yarn-client/3.2.0//hadoop-yarn-client-3.2.0.jar
-hadoop-yarn-common/3.2.0//hadoop-yarn-common-3.2.0.jar
-hadoop-yarn-registry/3.2.0//hadoop-yarn-registry-3.2.0.jar
-hadoop-yarn-server-common/3.2.0//hadoop-yarn-server-common-3.2.0.jar
-hadoop-yarn-server-web-proxy/3.2.0//hadoop-yarn-server-web-proxy-3.2.0.jar
+hadoop-client-api/3.2.1//hadoop-client-api-3.2.1.jar
+hadoop-client-runtime/3.2.1//hadoop-client-runtime-3.2.1.jar
hive-beeline/2.3.7//hive-beeline-2.3.7.jar
hive-cli/2.3.7//hive-cli-2.3.7.jar
hive-common/2.3.7//hive-common-2.3.7.jar
@@ -107,8 +85,6 @@ jackson-core/2.10.0//jackson-core-2.10.0.jar
jackson-databind/2.10.0//jackson-databind-2.10.0.jar
jackson-dataformat-yaml/2.10.0//jackson-dataformat-yaml-2.10.0.jar
jackson-datatype-jsr310/2.10.3//jackson-datatype-jsr310-2.10.3.jar
-jackson-jaxrs-base/2.9.5//jackson-jaxrs-base-2.9.5.jar
-jackson-jaxrs-json-provider/2.9.5//jackson-jaxrs-json-provider-2.9.5.jar
jackson-mapper-asl/1.9.13//jackson-mapper-asl-1.9.13.jar
jackson-module-jaxb-annotations/2.10.0//jackson-module-jaxb-annotations-2.10.0.jar
jackson-module-paranamer/2.10.0//jackson-module-paranamer-2.10.0.jar
@@ -121,13 +97,11 @@ jakarta.ws.rs-api/2.1.6//jakarta.ws.rs-api-2.1.6.jar
jakarta.xml.bind-api/2.3.2//jakarta.xml.bind-api-2.3.2.jar
janino/3.0.16//janino-3.0.16.jar
javassist/3.25.0-GA//javassist-3.25.0-GA.jar
-javax.inject/1//javax.inject-1.jar
javax.jdo/3.2.0-m3//javax.jdo-3.2.0-m3.jar
javax.servlet-api/3.1.0//javax.servlet-api-3.1.0.jar
javolution/5.5.1//javolution-5.5.1.jar
jaxb-api/2.2.11//jaxb-api-2.2.11.jar
jaxb-runtime/2.3.2//jaxb-runtime-2.3.2.jar
-jcip-annotations/1.0-1//jcip-annotations-1.0-1.jar
jcl-over-slf4j/1.7.30//jcl-over-slf4j-1.7.30.jar
jdo-api/3.0.1//jdo-api-3.0.1.jar
jersey-client/2.30//jersey-client-2.30.jar
@@ -141,30 +115,14 @@ jline/2.14.6//jline-2.14.6.jar
joda-time/2.10.5//joda-time-2.10.5.jar
jodd-core/3.5.2//jodd-core-3.5.2.jar
jpam/1.1//jpam-1.1.jar
-json-smart/2.3//json-smart-2.3.jar
json/1.8//json-1.8.jar
json4s-ast_2.12/3.7.0-M5//json4s-ast_2.12-3.7.0-M5.jar
json4s-core_2.12/3.7.0-M5//json4s-core_2.12-3.7.0-M5.jar
json4s-jackson_2.12/3.7.0-M5//json4s-jackson_2.12-3.7.0-M5.jar
json4s-scalap_2.12/3.7.0-M5//json4s-scalap_2.12-3.7.0-M5.jar
-jsp-api/2.1//jsp-api-2.1.jar
jsr305/3.0.0//jsr305-3.0.0.jar
jta/1.1//jta-1.1.jar
jul-to-slf4j/1.7.30//jul-to-slf4j-1.7.30.jar
-kerb-admin/1.0.1//kerb-admin-1.0.1.jar
-kerb-client/1.0.1//kerb-client-1.0.1.jar
-kerb-common/1.0.1//kerb-common-1.0.1.jar
-kerb-core/1.0.1//kerb-core-1.0.1.jar
-kerb-crypto/1.0.1//kerb-crypto-1.0.1.jar
-kerb-identity/1.0.1//kerb-identity-1.0.1.jar
-kerb-server/1.0.1//kerb-server-1.0.1.jar
-kerb-simplekdc/1.0.1//kerb-simplekdc-1.0.1.jar
-kerb-util/1.0.1//kerb-util-1.0.1.jar
-kerby-asn1/1.0.1//kerby-asn1-1.0.1.jar
-kerby-config/1.0.1//kerby-config-1.0.1.jar
-kerby-pkix/1.0.1//kerby-pkix-1.0.1.jar
-kerby-util/1.0.1//kerby-util-1.0.1.jar
-kerby-xdr/1.0.1//kerby-xdr-1.0.1.jar
kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar
kubernetes-client/4.10.3//kubernetes-client-4.10.3.jar
kubernetes-model-admissionregistration/4.10.3//kubernetes-model-admissionregistration-4.10.3.jar
@@ -202,10 +160,9 @@ metrics-json/4.1.1//metrics-json-4.1.1.jar
metrics-jvm/4.1.1//metrics-jvm-4.1.1.jar
minlog/1.3.0//minlog-1.3.0.jar
netty-all/4.1.51.Final//netty-all-4.1.51.Final.jar
-nimbus-jose-jwt/4.41.1//nimbus-jose-jwt-4.41.1.jar
objenesis/2.6//objenesis-2.6.jar
okhttp/2.7.5//okhttp-2.7.5.jar
-okhttp/3.12.12//okhttp-3.12.12.jar
+okhttp/3.12.6//okhttp-3.12.6.jar
okio/1.14.0//okio-1.14.0.jar
opencsv/2.3//opencsv-2.3.jar
openshift-model/4.10.3//openshift-model-4.10.3.jar
@@ -224,7 +181,6 @@ parquet-jackson/1.10.1//parquet-jackson-1.10.1.jar
protobuf-java/2.5.0//protobuf-java-2.5.0.jar
py4j/0.10.9//py4j-0.10.9.jar
pyrolite/4.30//pyrolite-4.30.jar
-re2j/1.1//re2j-1.1.jar
scala-collection-compat_2.12/2.1.1//scala-collection-compat_2.12-2.1.1.jar
scala-compiler/2.12.10//scala-compiler-2.12.10.jar
scala-library/2.12.10//scala-library-2.12.10.jar
@@ -242,15 +198,12 @@ spire-platform_2.12/0.17.0-M1//spire-platform_2.12-0.17.0-M1.jar
spire-util_2.12/0.17.0-M1//spire-util_2.12-0.17.0-M1.jar
spire_2.12/0.17.0-M1//spire_2.12-0.17.0-M1.jar
stax-api/1.0.1//stax-api-1.0.1.jar
-stax2-api/3.1.4//stax2-api-3.1.4.jar
stream/2.9.6//stream-2.9.6.jar
super-csv/2.2.0//super-csv-2.2.0.jar
threeten-extra/1.5.0//threeten-extra-1.5.0.jar
-token-provider/1.0.1//token-provider-1.0.1.jar
transaction-api/1.1//transaction-api-1.1.jar
univocity-parsers/2.9.0//univocity-parsers-2.9.0.jar
velocity/1.5//velocity-1.5.jar
-woodstox-core/5.0.3//woodstox-core-5.0.3.jar
xbean-asm7-shaded/4.15//xbean-asm7-shaded-4.15.jar
xz/1.5//xz-1.5.jar
zjsonpatch/0.3.0//zjsonpatch-0.3.0.jar
diff --git a/dev/run-tests.py b/dev/run-tests.py
index 48191e9bb024..ce7c5b40e613 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -715,64 +715,64 @@ def main():
setup_test_environ(test_environ)
should_run_java_style_checks = False
- if not should_only_test_modules:
- # license checks
- run_apache_rat_checks()
-
- # style checks
- if not changed_files or any(f.endswith(".scala")
- or f.endswith("scalastyle-config.xml")
- for f in changed_files):
- run_scala_style_checks(extra_profiles)
- if not changed_files or any(f.endswith(".java")
- or f.endswith("checkstyle.xml")
- or f.endswith("checkstyle-suppressions.xml")
- for f in changed_files):
- # Run SBT Checkstyle after the build to prevent a side-effect to the build.
- should_run_java_style_checks = True
- if not changed_files or any(f.endswith("lint-python")
- or f.endswith("tox.ini")
- or f.endswith(".py")
- for f in changed_files):
- run_python_style_checks()
- if not changed_files or any(f.endswith(".R")
- or f.endswith("lint-r")
- or f.endswith(".lintr")
- for f in changed_files):
- run_sparkr_style_checks()
+ # if not should_only_test_modules:
+ # # license checks
+ # run_apache_rat_checks()
+ #
+ # # style checks
+ # if not changed_files or any(f.endswith(".scala")
+ # or f.endswith("scalastyle-config.xml")
+ # for f in changed_files):
+ # run_scala_style_checks(extra_profiles)
+ # if not changed_files or any(f.endswith(".java")
+ # or f.endswith("checkstyle.xml")
+ # or f.endswith("checkstyle-suppressions.xml")
+ # for f in changed_files):
+ # # Run SBT Checkstyle after the build to prevent a side-effect to the build.
+ # should_run_java_style_checks = True
+ # if not changed_files or any(f.endswith("lint-python")
+ # or f.endswith("tox.ini")
+ # or f.endswith(".py")
+ # for f in changed_files):
+ # run_python_style_checks()
+ # if not changed_files or any(f.endswith(".R")
+ # or f.endswith("lint-r")
+ # or f.endswith(".lintr")
+ # for f in changed_files):
+ # run_sparkr_style_checks()
# determine if docs were changed and if we're inside the amplab environment
# note - the below commented out until *all* Jenkins workers can get `jekyll` installed
# if "DOCS" in changed_modules and test_env == "amplab_jenkins":
# build_spark_documentation()
- if any(m.should_run_build_tests for m in test_modules) and test_env != "amplab_jenkins":
- run_build_tests()
-
- # spark build
- build_apache_spark(build_tool, extra_profiles)
+ # if any(m.should_run_build_tests for m in test_modules) and test_env != "amplab_jenkins":
+ # run_build_tests()
+ #
+ # # spark build
+ # build_apache_spark(build_tool, extra_profiles)
# backwards compatibility checks
- if build_tool == "sbt":
- # Note: compatibility tests only supported in sbt for now
- detect_binary_inop_with_mima(extra_profiles)
- # Since we did not build assembly/package before running dev/mima, we need to
- # do it here because the tests still rely on it; see SPARK-13294 for details.
- build_spark_assembly_sbt(extra_profiles, should_run_java_style_checks)
+ # if build_tool == "sbt":
+ # # Note: compatibility tests only supported in sbt for now
+ # detect_binary_inop_with_mima(extra_profiles)
+ # # Since we did not build assembly/package before running dev/mima, we need to
+ # # do it here because the tests still rely on it; see SPARK-13294 for details.
+ # build_spark_assembly_sbt(extra_profiles, should_run_java_style_checks)
# run the test suites
run_scala_tests(build_tool, extra_profiles, test_modules, excluded_tags, included_tags)
- modules_with_python_tests = [m for m in test_modules if m.python_test_goals]
- if modules_with_python_tests:
- # We only run PySpark tests with coverage report in one specific job with
- # Spark master with SBT in Jenkins.
- is_sbt_master_job = "SPARK_MASTER_SBT_HADOOP_2_7" in os.environ
- run_python_tests(
- modules_with_python_tests, opts.parallelism, with_coverage=is_sbt_master_job)
- run_python_packaging_tests()
- if any(m.should_run_r_tests for m in test_modules):
- run_sparkr_tests()
+ # modules_with_python_tests = [m for m in test_modules if m.python_test_goals]
+ # if modules_with_python_tests:
+ # # We only run PySpark tests with coverage report in one specific job with
+ # # Spark master with SBT in Jenkins.
+ # is_sbt_master_job = "SPARK_MASTER_SBT_HADOOP_2_7" in os.environ
+ # run_python_tests(
+ # modules_with_python_tests, opts.parallelism, with_coverage=is_sbt_master_job)
+ # run_python_packaging_tests()
+ # if any(m.should_run_r_tests for m in test_modules):
+ # run_sparkr_tests()
def _test():
diff --git a/docs/_plugins/copy_api_dirs.rb b/docs/_plugins/copy_api_dirs.rb
index 17da22bf8a43..133b4945238a 100644
--- a/docs/_plugins/copy_api_dirs.rb
+++ b/docs/_plugins/copy_api_dirs.rb
@@ -26,8 +26,8 @@
curr_dir = pwd
cd("..")
- puts "Running 'build/sbt -Pkinesis-asl clean compile unidoc' from " + pwd + "; this may take a few minutes..."
- system("build/sbt -Pkinesis-asl clean compile unidoc") || raise("Unidoc generation failed")
+ puts "Running 'build/sbt -Pkinesis-asl -Phadoop-3.2 clean compile unidoc' from " + pwd + "; this may take a few minutes..."
+ system("build/sbt -Pkinesis-asl -Phadoop-3.2 clean compile unidoc") || raise("Unidoc generation failed")
puts "Moving back into docs dir."
cd("docs")
@@ -157,8 +157,8 @@
curr_dir = pwd
cd("..")
- puts "Running 'build/sbt clean package -Phive' from " + pwd + "; this may take a few minutes..."
- system("build/sbt clean package -Phive") || raise("SQL doc generation failed")
+ puts "Running 'build/sbt clean package -Phive -Phadoop-3.2' from " + pwd + "; this may take a few minutes..."
+ system("build/sbt clean package -Phive -Phadoop-3.2") || raise("SQL doc generation failed")
puts "Moving back into docs dir."
cd("docs")
diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml
index d9d9fb7f55c7..edf544028bcc 100644
--- a/external/kafka-0-10-assembly/pom.xml
+++ b/external/kafka-0-10-assembly/pom.xml
@@ -69,11 +69,6 @@
lz4-java
provided
-
- org.apache.hadoop
- hadoop-client
- provided
-
org.apache.avro
avro-mapred
@@ -117,6 +112,31 @@
+
+
+ hadoop-2.7
+
+
+ org.apache.hadoop
+ hadoop-client
+ ${hadoop.version}
+ provided
+
+
+
+
+ hadoop-3.2
+
+
+ org.apache.hadoop
+ hadoop-client-runtime
+ ${hadoop.version}
+ provided
+
+
+
+
+
target/scala-${scala.binary.version}/classes
target/scala-${scala.binary.version}/test-classes
diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml
index 95a99ac88412..06a6bef005e6 100644
--- a/external/kafka-0-10-sql/pom.xml
+++ b/external/kafka-0-10-sql/pom.xml
@@ -79,6 +79,10 @@
kafka-clients
${kafka.version}
+
+ com.google.code.findbugs
+ jsr305
+
org.apache.commons
commons-pool2
diff --git a/external/kafka-0-10-token-provider/pom.xml b/external/kafka-0-10-token-provider/pom.xml
index 941946f30e96..36cfd3441366 100644
--- a/external/kafka-0-10-token-provider/pom.xml
+++ b/external/kafka-0-10-token-provider/pom.xml
@@ -76,6 +76,19 @@
+
+
+ hadoop-3.2
+
+
+ org.apache.hadoop
+ hadoop-client-runtime
+ ${hadoop.version}
+
+
+
+
+
target/scala-${scala.binary.version}/classes
target/scala-${scala.binary.version}/test-classes
diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml
index 76ee5bb7b2f8..33262538f392 100644
--- a/external/kinesis-asl-assembly/pom.xml
+++ b/external/kinesis-asl-assembly/pom.xml
@@ -89,11 +89,6 @@
log4j
provided
-
- org.apache.hadoop
- hadoop-client
- provided
-
org.apache.avro
avro-ipc
@@ -132,6 +127,31 @@
+
+
+ hadoop-2.7
+
+
+ org.apache.hadoop
+ hadoop-client
+ ${hadoop.version}
+ provided
+
+
+
+
+ hadoop-3.2
+
+
+ org.apache.hadoop
+ hadoop-client-runtime
+ ${hadoop.version}
+ provided
+
+
+
+
+
target/scala-${scala.binary.version}/classes
target/scala-${scala.binary.version}/test-classes
diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml
index 8689e0b8a9ea..3416532b89e0 100644
--- a/hadoop-cloud/pom.xml
+++ b/hadoop-cloud/pom.xml
@@ -56,12 +56,6 @@
test-jar
test
-
- org.apache.hadoop
- hadoop-client
- ${hadoop.version}
- provided
-
-
- org.apache.hadoop
- hadoop-client
- test
-
+
+
+
+ hadoop-2.7
+
+
+ org.apache.hadoop
+ hadoop-client
+ ${hadoop.version}
+
+
+
+
+ hadoop-3.2
+
+
+ org.apache.hadoop
+ hadoop-client-runtime
+ ${hadoop.version}
+
+
+
+
+
target/scala-${scala.binary.version}/classes
target/scala-${scala.binary.version}/test-classes
diff --git a/pom.xml b/pom.xml
index b13d5ab81856..5dbf19f33d9a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -120,7 +120,7 @@
spark
1.7.30
1.2.17
- 3.2.0
+ 3.2.1
2.5.0
${hadoop.version}
3.4.14
@@ -855,6 +855,11 @@
javax.ws.rs-api
2.0.1
+
+ javax.xml.bind
+ jaxb-api
+ 2.2.11
+
org.scalanlp
breeze_${scala.binary.version}
@@ -1063,88 +1068,6 @@
${curator.version}
test
-
- org.apache.hadoop
- hadoop-client
- ${hadoop.version}
- ${hadoop.deps.scope}
-
-
- org.fusesource.leveldbjni
- leveldbjni-all
-
-
- asm
- asm
-
-
- org.codehaus.jackson
- jackson-mapper-asl
-
-
- org.ow2.asm
- asm
-
-
- org.jboss.netty
- netty
-
-
- io.netty
- netty
-
-
-
- commons-beanutils
- commons-beanutils-core
-
-
- commons-logging
- commons-logging
-
-
- org.mockito
- mockito-all
-
-
- org.mortbay.jetty
- servlet-api-2.5
-
-
- javax.servlet
- servlet-api
-
-
- junit
- junit
-
-
- com.sun.jersey
- *
-
-
- com.sun.jersey.jersey-test-framework
- *
-
-
- com.sun.jersey.contribs
- *
-
-
- net.java.dev.jets3t
- jets3t
-
-
-
- javax.ws.rs
- jsr311-api
-
-
- org.eclipse.jetty
- jetty-webapp
-
-
-
org.apache.hadoop
hadoop-minikdc
@@ -1242,228 +1165,6 @@
1.1.1
${hadoop.deps.scope}
-
- org.apache.hadoop
- hadoop-yarn-api
- ${yarn.version}
- ${hadoop.deps.scope}
-
-
- javax.servlet
- servlet-api
-
-
- asm
- asm
-
-
- org.ow2.asm
- asm
-
-
- org.jboss.netty
- netty
-
-
- commons-logging
- commons-logging
-
-
- com.sun.jersey
- *
-
-
- com.sun.jersey.jersey-test-framework
- *
-
-
- com.sun.jersey.contribs
- *
-
-
- jdk.tools
- jdk.tools
-
-
-
-
- org.apache.hadoop
- hadoop-yarn-common
- ${yarn.version}
- ${hadoop.deps.scope}
-
-
- asm
- asm
-
-
- org.ow2.asm
- asm
-
-
- org.jboss.netty
- netty
-
-
- javax.servlet
- servlet-api
-
-
- commons-logging
- commons-logging
-
-
- com.sun.jersey
- *
-
-
- com.sun.jersey.jersey-test-framework
- *
-
-
- com.sun.jersey.contribs
- *
-
-
-
-
- org.apache.hadoop
- hadoop-yarn-server-tests
- ${yarn.version}
- tests
- test
-
-
- org.fusesource.leveldbjni
- leveldbjni-all
-
-
- asm
- asm
-
-
- org.ow2.asm
- asm
-
-
- org.jboss.netty
- netty
-
-
- javax.servlet
- servlet-api
-
-
- commons-logging
- commons-logging
-
-
- com.sun.jersey
- *
-
-
- com.sun.jersey.jersey-test-framework
- *
-
-
- com.sun.jersey.contribs
- *
-
-
-
-
- org.apache.hadoop
- hadoop-yarn-server-web-proxy
- ${yarn.version}
- ${hadoop.deps.scope}
-
-
- org.fusesource.leveldbjni
- leveldbjni-all
-
-
- asm
- asm
-
-
- org.ow2.asm
- asm
-
-
- org.jboss.netty
- netty
-
-
- javax.servlet
- servlet-api
-
-
- commons-logging
- commons-logging
-
-
- com.sun.jersey
- *
-
-
- com.sun.jersey.jersey-test-framework
- *
-
-
- com.sun.jersey.contribs
- *
-
-
-
- com.zaxxer
- HikariCP-java7
-
-
- com.microsoft.sqlserver
- mssql-jdbc
-
-
-
-
- org.apache.hadoop
- hadoop-yarn-client
- ${yarn.version}
- ${hadoop.deps.scope}
-
-
- asm
- asm
-
-
- org.ow2.asm
- asm
-
-
- org.jboss.netty
- netty
-
-
- javax.servlet
- servlet-api
-
-
- commons-logging
- commons-logging
-
-
- com.sun.jersey
- *
-
-
- com.sun.jersey.jersey-test-framework
- *
-
-
- com.sun.jersey.contribs
- *
-
-
-
org.apache.zookeeper
zookeeper
@@ -1642,6 +1343,14 @@
org.apache.ant
ant
+
+ org.apache.hadoop
+ hadoop-common
+
+
+ org.apache.hadoop
+ hadoop-auth
+
org.apache.zookeeper
zookeeper
@@ -3137,11 +2846,335 @@
2.7.1
2.4
+
+
+ org.apache.hadoop
+ hadoop-client
+ ${hadoop.version}
+ ${hadoop.deps.scope}
+
+
+ org.fusesource.leveldbjni
+ leveldbjni-all
+
+
+ asm
+ asm
+
+
+ org.codehaus.jackson
+ jackson-mapper-asl
+
+
+ org.ow2.asm
+ asm
+
+
+ org.jboss.netty
+ netty
+
+
+ io.netty
+ netty
+
+
+
+ commons-beanutils
+ commons-beanutils-core
+
+
+ commons-logging
+ commons-logging
+
+
+ org.mockito
+ mockito-all
+
+
+ org.mortbay.jetty
+ servlet-api-2.5
+
+
+ javax.servlet
+ servlet-api
+
+
+ junit
+ junit
+
+
+ com.sun.jersey
+ *
+
+
+ com.sun.jersey.jersey-test-framework
+ *
+
+
+ com.sun.jersey.contribs
+ *
+
+
+ net.java.dev.jets3t
+ jets3t
+
+
+
+ javax.ws.rs
+ jsr311-api
+
+
+ org.eclipse.jetty
+ jetty-webapp
+
+
+
+
+ org.apache.hadoop
+ hadoop-yarn-api
+ ${yarn.version}
+ ${hadoop.deps.scope}
+
+
+ javax.servlet
+ servlet-api
+
+
+ asm
+ asm
+
+
+ org.ow2.asm
+ asm
+
+
+ org.jboss.netty
+ netty
+
+
+ commons-logging
+ commons-logging
+
+
+ com.sun.jersey
+ *
+
+
+ com.sun.jersey.jersey-test-framework
+ *
+
+
+ com.sun.jersey.contribs
+ *
+
+
+ jdk.tools
+ jdk.tools
+
+
+
+
+ org.apache.hadoop
+ hadoop-yarn-common
+ ${yarn.version}
+ ${hadoop.deps.scope}
+
+
+ asm
+ asm
+
+
+ org.ow2.asm
+ asm
+
+
+ org.jboss.netty
+ netty
+
+
+ javax.servlet
+ servlet-api
+
+
+ commons-logging
+ commons-logging
+
+
+ com.sun.jersey
+ *
+
+
+ com.sun.jersey.jersey-test-framework
+ *
+
+
+ com.sun.jersey.contribs
+ *
+
+
+
+
+ org.apache.hadoop
+ hadoop-yarn-server-tests
+ ${yarn.version}
+ tests
+ test
+
+
+ org.fusesource.leveldbjni
+ leveldbjni-all
+
+
+ asm
+ asm
+
+
+ org.ow2.asm
+ asm
+
+
+ org.jboss.netty
+ netty
+
+
+ javax.servlet
+ servlet-api
+
+
+ commons-logging
+ commons-logging
+
+
+ com.sun.jersey
+ *
+
+
+ com.sun.jersey.jersey-test-framework
+ *
+
+
+ com.sun.jersey.contribs
+ *
+
+
+
+
+ org.apache.hadoop
+ hadoop-yarn-server-web-proxy
+ ${yarn.version}
+ ${hadoop.deps.scope}
+
+
+ org.fusesource.leveldbjni
+ leveldbjni-all
+
+
+ asm
+ asm
+
+
+ org.ow2.asm
+ asm
+
+
+ org.jboss.netty
+ netty
+
+
+ javax.servlet
+ servlet-api
+
+
+ commons-logging
+ commons-logging
+
+
+ com.sun.jersey
+ *
+
+
+ com.sun.jersey.jersey-test-framework
+ *
+
+
+ com.sun.jersey.contribs
+ *
+
+
+
+ com.zaxxer
+ HikariCP-java7
+
+
+ com.microsoft.sqlserver
+ mssql-jdbc
+
+
+
+
+ org.apache.hadoop
+ hadoop-yarn-client
+ ${yarn.version}
+ ${hadoop.deps.scope}
+
+
+ asm
+ asm
+
+
+ org.ow2.asm
+ asm
+
+
+ org.jboss.netty
+ netty
+
+
+ javax.servlet
+ servlet-api
+
+
+ commons-logging
+ commons-logging
+
+
+ com.sun.jersey
+ *
+
+
+ com.sun.jersey.jersey-test-framework
+ *
+
+
+ com.sun.jersey.contribs
+ *
+
+
+
+
-
hadoop-3.2
-
+
+
+ org.apache.hadoop
+ hadoop-client-runtime
+ ${hadoop.version}
+ ${hadoop.deps.scope}
+
+
+ org.apache.hadoop
+ hadoop-client-api
+ ${hadoop.version}
+ ${hadoop.deps.scope}
+
+
+ org.apache.hadoop
+ hadoop-client-minicluster
+ ${yarn.version}
+ test
+
+
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index bc80769be239..ec74bde93831 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -40,6 +40,58 @@
true
+
+ hadoop-2.7
+
+
+ org.apache.hadoop
+ hadoop-client
+ ${hadoop.version}
+
+
+ org.apache.hadoop
+ hadoop-yarn-api
+ ${hadoop.version}
+
+
+ org.apache.hadoop
+ hadoop-yarn-common
+ ${hadoop.version}
+
+
+ org.apache.hadoop
+ hadoop-yarn-server-web-proxy
+ ${hadoop.version}
+
+
+ org.apache.hadoop
+ hadoop-yarn-client
+ ${hadoop.version}
+
+
+ org.apache.hadoop
+ hadoop-yarn-server-tests
+ ${hadoop.version}
+ tests
+ test
+
+
+
+
+ hadoop-3.2
+
+
+ org.apache.hadoop
+ hadoop-client-runtime
+ ${hadoop.version}
+
+
+ org.apache.hadoop
+ hadoop-client-minicluster
+ ${hadoop.version}
+
+
+
@@ -67,26 +119,6 @@
test-jar
test
-
- org.apache.hadoop
- hadoop-yarn-api
-
-
- org.apache.hadoop
- hadoop-yarn-common
-
-
- org.apache.hadoop
- hadoop-yarn-server-web-proxy
-
-
- org.apache.hadoop
- hadoop-yarn-client
-
-
- org.apache.hadoop
- hadoop-client
-
jakarta.servlet
@@ -142,13 +174,6 @@
test
-
- org.apache.hadoop
- hadoop-yarn-server-tests
- tests
- test
-
-
org.mockito
mockito-core
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index 5f632fbb259f..9b99e8ff9265 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -19,7 +19,7 @@ package org.apache.spark.deploy.yarn
import java.io.{File, IOException}
import java.lang.reflect.{InvocationTargetException, Modifier}
-import java.net.{URI, URL}
+import java.net.{URI, URL, URLEncoder}
import java.security.PrivilegedExceptionAction
import java.util.concurrent.{TimeoutException, TimeUnit}
@@ -36,7 +36,6 @@ import org.apache.hadoop.yarn.api._
import org.apache.hadoop.yarn.api.records._
import org.apache.hadoop.yarn.conf.YarnConfiguration
import org.apache.hadoop.yarn.exceptions.ApplicationAttemptNotFoundException
-import org.apache.hadoop.yarn.server.webproxy.ProxyUriUtils
import org.apache.hadoop.yarn.util.{ConverterUtils, Records}
import org.apache.spark._
@@ -308,7 +307,8 @@ private[spark] class ApplicationMaster(
// The client-mode AM doesn't listen for incoming connections, so report an invalid port.
registerAM(Utils.localHostName, -1, sparkConf,
sparkConf.getOption("spark.driver.appUIAddress"), appAttemptId)
- addAmIpFilter(Some(driverRef), ProxyUriUtils.getPath(appAttemptId.getApplicationId))
+ val encodedAppId = URLEncoder.encode(appAttemptId.getApplicationId.toString, "UTF-8")
+ addAmIpFilter(Some(driverRef), s"/proxy/$encodedAppId")
createAllocator(driverRef, sparkConf, clientRpcEnv, appAttemptId, cachedResourcesConf)
reporterThread.join()
} catch {
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
index 20f5339c46fe..a813b9913f23 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
@@ -80,6 +80,16 @@ abstract class BaseYarnClusterSuite
yarnConf.set("yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage",
"100.0")
+ // capacity-scheduler.xml is missing in hadoop-client-minicluster so this is a workaround
+ yarnConf.set("yarn.scheduler.capacity.root.queues", "default")
+ yarnConf.setInt("yarn.scheduler.capacity.root.default.capacity", 100)
+ yarnConf.setFloat("yarn.scheduler.capacity.root.default.user-limit-factor", 1)
+ yarnConf.setInt("yarn.scheduler.capacity.root.default.maximum-capacity", 100)
+ yarnConf.set("yarn.scheduler.capacity.root.default.state", "RUNNING")
+ yarnConf.set("yarn.scheduler.capacity.root.default.acl_submit_applications", "*")
+ yarnConf.set("yarn.scheduler.capacity.root.default.acl_administer_queue", "*")
+ yarnConf.setInt("yarn.scheduler.capacity.node-locality-delay", -1)
+
yarnCluster = new MiniYARNCluster(getClass().getName(), 1, 1, 1)
yarnCluster.init(yarnConf)
yarnCluster.start()
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
index 14438bc14105..1cb68005293a 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
@@ -162,7 +162,7 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
}
test("run Python application in yarn-client mode") {
- testPySpark(true)
+ testPySpark(true, extraConf = Map("spark.executorEnv.PATH" -> sys.env("PATH")))
}
test("run Python application in yarn-cluster mode") {
@@ -175,9 +175,9 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
clientMode = false,
extraConf = Map(
"spark.yarn.appMasterEnv.PYSPARK_DRIVER_PYTHON"
- -> sys.env.getOrElse("PYSPARK_DRIVER_PYTHON", "python"),
+ -> sys.env.getOrElse("PYSPARK_DRIVER_PYTHON", "python3"),
"spark.yarn.appMasterEnv.PYSPARK_PYTHON"
- -> sys.env.getOrElse("PYSPARK_PYTHON", "python")),
+ -> sys.env.getOrElse("PYSPARK_PYTHON", "python3")),
extraEnv = Map(
"PYSPARK_DRIVER_PYTHON" -> "not python",
"PYSPARK_PYTHON" -> "not python"))
@@ -275,7 +275,10 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
s"$sparkHome/python")
val extraEnvVars = Map(
"PYSPARK_ARCHIVES_PATH" -> pythonPath.map("local:" + _).mkString(File.pathSeparator),
- "PYTHONPATH" -> pythonPath.mkString(File.pathSeparator)) ++ extraEnv
+ "PYTHONPATH" -> pythonPath.mkString(File.pathSeparator),
+ "PYSPARK_DRIVER_PYTHON" -> "python3",
+ "PYSPARK_PYTHON" -> "python3"
+ ) ++ extraEnv
val moduleDir = {
val subdir = new File(tempDir, "pyModules")
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 6b79eb722fcd..af976fa1fa98 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -104,6 +104,10 @@
org.antlr
antlr4-runtime
+
+ javax.xml.bind
+ jaxb-api
+
commons-codec
commons-codec
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 474c6066ed04..d3843257b593 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -191,6 +191,16 @@
+
+ hadoop-3.2
+
+
+ org.apache.hadoop
+ hadoop-client-runtime
+ ${hadoop.version}
+
+
+
hive
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
index 42a0ec0253b8..fbbf9c2a1e06 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
@@ -118,11 +118,15 @@ private[hive] object IsolatedClientLoader extends Logging {
hadoopVersion: String,
ivyPath: Option[String],
remoteRepos: String): Seq[URL] = {
+ val hadoopJarName = if (hadoopVersion.startsWith("3")) {
+ s"org.apache.hadoop:hadoop-client-runtime:$hadoopVersion"
+ } else {
+ s"org.apache.hadoop:hadoop-client:$hadoopVersion"
+ }
val hiveArtifacts = version.extraDeps ++
Seq("hive-metastore", "hive-exec", "hive-common", "hive-serde")
.map(a => s"org.apache.hive:$a:${version.fullVersion}") ++
- Seq("com.google.guava:guava:14.0.1",
- s"org.apache.hadoop:hadoop-client:$hadoopVersion")
+ Seq("com.google.guava:guava:14.0.1", hadoopJarName)
val classpath = quietly {
SparkSubmitUtils.resolveMavenCoordinates(