diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 667371dacf5d..31bafc93a280 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -32,52 +32,11 @@ jobs: # Note that the modules below are from sparktestsupport/modules.py. modules: - >- - core, unsafe, kvstore, avro, - network-common, network-shuffle, repl, launcher, - examples, sketch, graphx - - >- - catalyst, hive-thriftserver - - >- - streaming, sql-kafka-0-10, streaming-kafka-0-10, - mllib-local, mllib, - yarn, mesos, kubernetes, hadoop-cloud, spark-ganglia-lgpl - - >- - pyspark-sql, pyspark-mllib, pyspark-resource - - >- - pyspark-core, pyspark-streaming, pyspark-ml - - >- - sparkr + yarn # Here, we split Hive and SQL tests into some of slow ones and the rest of them. included-tags: [""] excluded-tags: [""] comment: [""] - include: - # Hive tests - - modules: hive - java: 1.8 - hadoop: hadoop3.2 - hive: hive2.3 - included-tags: org.apache.spark.tags.SlowHiveTest - comment: "- slow tests" - - modules: hive - java: 1.8 - hadoop: hadoop3.2 - hive: hive2.3 - excluded-tags: org.apache.spark.tags.SlowHiveTest - comment: "- other tests" - # SQL tests - - modules: sql - java: 1.8 - hadoop: hadoop3.2 - hive: hive2.3 - included-tags: org.apache.spark.tags.ExtendedSQLTest - comment: "- slow tests" - - modules: sql - java: 1.8 - hadoop: hadoop3.2 - hive: hive2.3 - excluded-tags: org.apache.spark.tags.ExtendedSQLTest - comment: "- other tests" env: MODULES_TO_TEST: ${{ matrix.modules }} EXCLUDED_TAGS: ${{ matrix.excluded-tags }} @@ -183,6 +142,8 @@ jobs: # Run the tests. - name: Run tests run: | + which python + python --version # Hive tests become flaky when running in parallel as it's too intensive. if [[ "$MODULES_TO_TEST" == "hive" ]]; then export SERIAL_SBT_TESTS=1; fi mkdir -p ~/.m2 @@ -201,121 +162,3 @@ jobs: name: unit-tests-log-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }} path: "**/target/unit-tests.log" - # Static analysis, and documentation build - lint: - name: Linters, licenses, dependencies and documentation generation - runs-on: ubuntu-latest - steps: - - name: Checkout Spark repository - uses: actions/checkout@v2 - - name: Cache Maven local repository - uses: actions/cache@v2 - with: - path: ~/.m2/repository - key: docs-maven-repo-${{ hashFiles('**/pom.xml') }} - restore-keys: | - docs-maven- - - name: Install JDK 1.8 - uses: actions/setup-java@v1 - with: - java-version: 1.8 - - name: Install Python 3.6 - uses: actions/setup-python@v2 - with: - python-version: 3.6 - architecture: x64 - - name: Install Python linter dependencies - run: | - # TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes. - # See also https://github.com/sphinx-doc/sphinx/issues/7551. - pip3 install flake8 'sphinx<3.1.0' numpy pydata_sphinx_theme ipython nbsphinx - - name: Install R 4.0 - uses: r-lib/actions/setup-r@v1 - with: - r-version: 4.0 - - name: Install R linter dependencies and SparkR - run: | - sudo apt-get install -y libcurl4-openssl-dev - sudo Rscript -e "install.packages(c('devtools'), repos='https://cloud.r-project.org/')" - sudo Rscript -e "devtools::install_github('jimhester/lintr@v2.0.0')" - ./R/install-dev.sh - - name: Install Ruby 2.7 for documentation generation - uses: actions/setup-ruby@v1 - with: - ruby-version: 2.7 - - name: Install dependencies for documentation generation - run: | - # pandoc is required to generate PySpark APIs as well in nbsphinx. - sudo apt-get install -y libcurl4-openssl-dev pandoc - # TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes. - # See also https://github.com/sphinx-doc/sphinx/issues/7551. - pip install 'sphinx<3.1.0' mkdocs numpy pydata_sphinx_theme ipython nbsphinx - gem install jekyll jekyll-redirect-from rouge - sudo Rscript -e "install.packages(c('devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2'), repos='https://cloud.r-project.org/')" - - name: Scala linter - run: ./dev/lint-scala - - name: Java linter - run: ./dev/lint-java - - name: Python linter - run: ./dev/lint-python - - name: R linter - run: ./dev/lint-r - - name: License test - run: ./dev/check-license - - name: Dependencies test - run: ./dev/test-dependencies.sh - - name: Run documentation build - run: | - cd docs - jekyll build - - java11: - name: Java 11 build - runs-on: ubuntu-latest - steps: - - name: Checkout Spark repository - uses: actions/checkout@v2 - - name: Cache Maven local repository - uses: actions/cache@v2 - with: - path: ~/.m2/repository - key: java11-maven-${{ hashFiles('**/pom.xml') }} - restore-keys: | - java11-maven- - - name: Install Java 11 - uses: actions/setup-java@v1 - with: - java-version: 11 - - name: Build with Maven - run: | - export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN" - export MAVEN_CLI_OPTS="--no-transfer-progress" - mkdir -p ~/.m2 - ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Djava.version=11 install - rm -rf ~/.m2/repository/org/apache/spark - - scala-213: - name: Scala 2.13 build - runs-on: ubuntu-latest - steps: - - name: Checkout Spark repository - uses: actions/checkout@v2 - - name: Cache Maven local repository - uses: actions/cache@v2 - with: - path: ~/.m2/repository - key: scala-213-maven-${{ hashFiles('**/pom.xml') }} - restore-keys: | - scala-213-maven- - - name: Install Java 11 - uses: actions/setup-java@v1 - with: - java-version: 11 - - name: Build with Maven - run: | - export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN" - export MAVEN_CLI_OPTS="--no-transfer-progress" - mkdir -p ~/.m2 - ./dev/change-scala-version.sh 2.13 - ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Djava.version=11 -Pscala-2.13 install - rm -rf ~/.m2/repository/org/apache/spark diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 0225db81925c..50ad3340a1c4 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -63,10 +63,6 @@ - - org.apache.hadoop - hadoop-client - org.slf4j slf4j-api @@ -74,6 +70,29 @@ + + + hadoop-2.7 + + + org.apache.hadoop + hadoop-client + ${hadoop.version} + + + + + hadoop-3.2 + + + org.apache.hadoop + hadoop-client-runtime + ${hadoop.version} + + + + + target/scala-${scala.binary.version}/classes target/scala-${scala.binary.version}/test-classes diff --git a/core/pom.xml b/core/pom.xml index 14b217d7fb22..62d43d1e59a0 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -64,10 +64,6 @@ org.apache.xbean xbean-asm7-shaded - - org.apache.hadoop - hadoop-client - org.apache.spark spark-launcher_${scala.binary.version} @@ -177,6 +173,14 @@ org.apache.commons commons-text + + commons-io + commons-io + + + commons-collections + commons-collections + com.google.code.findbugs jsr305 @@ -539,6 +543,26 @@ + + hadoop-2.7 + + + org.apache.hadoop + hadoop-client + ${hadoop.version} + + + + + hadoop-3.2 + + + org.apache.hadoop + hadoop-client-runtime + ${hadoop.version} + + + Windows diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 8363d570d732..93370f5dae72 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -1182,10 +1182,12 @@ private[spark] object SparkSubmitUtils { def resolveDependencyPaths( artifacts: Array[AnyRef], cacheDirectory: File): String = { - artifacts.map { artifactInfo => - val artifact = artifactInfo.asInstanceOf[Artifact].getModuleRevisionId + artifacts.map { ai => + val artifactInfo = ai.asInstanceOf[Artifact] + val artifact = artifactInfo.getModuleRevisionId + val testSuffix = if (artifactInfo.getType == "test-jar") "-tests" else "" cacheDirectory.getAbsolutePath + File.separator + - s"${artifact.getOrganisation}_${artifact.getName}-${artifact.getRevision}.jar" + s"${artifact.getOrganisation}_${artifact.getName}-${artifact.getRevision}${testSuffix}.jar" }.mkString(",") } diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-1.2 b/dev/deps/spark-deps-hadoop-2.7-hive-1.2 index d07b04608328..a1d0362988eb 100644 --- a/dev/deps/spark-deps-hadoop-2.7-hive-1.2 +++ b/dev/deps/spark-deps-hadoop-2.7-hive-1.2 @@ -37,7 +37,7 @@ commons-cli/1.2//commons-cli-1.2.jar commons-codec/1.10//commons-codec-1.10.jar commons-collections/3.2.2//commons-collections-3.2.2.jar commons-compiler/3.0.16//commons-compiler-3.0.16.jar -commons-compress/1.8.1//commons-compress-1.8.1.jar +commons-compress/1.4.1//commons-compress-1.4.1.jar commons-configuration/1.6//commons-configuration-1.6.jar commons-crypto/1.0.0//commons-crypto-1.0.0.jar commons-dbcp/1.4//commons-dbcp-1.4.jar @@ -112,7 +112,7 @@ javassist/3.25.0-GA//javassist-3.25.0-GA.jar javax.inject/1//javax.inject-1.jar javax.servlet-api/3.1.0//javax.servlet-api-3.1.0.jar javolution/5.5.1//javolution-5.5.1.jar -jaxb-api/2.2.2//jaxb-api-2.2.2.jar +jaxb-api/2.2.11//jaxb-api-2.2.11.jar jaxb-runtime/2.3.2//jaxb-runtime-2.3.2.jar jcl-over-slf4j/1.7.30//jcl-over-slf4j-1.7.30.jar jdo-api/3.0.1//jdo-api-3.0.1.jar @@ -213,7 +213,6 @@ spire-macros_2.12/0.17.0-M1//spire-macros_2.12-0.17.0-M1.jar spire-platform_2.12/0.17.0-M1//spire-platform_2.12-0.17.0-M1.jar spire-util_2.12/0.17.0-M1//spire-util_2.12-0.17.0-M1.jar spire_2.12/0.17.0-M1//spire_2.12-0.17.0-M1.jar -stax-api/1.0-2//stax-api-1.0-2.jar stax-api/1.0.1//stax-api-1.0.1.jar stream/2.9.6//stream-2.9.6.jar stringtemplate/3.2.1//stringtemplate-3.2.1.jar diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 index 979bb1419ce7..c6218cfc4b3e 100644 --- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 @@ -35,7 +35,7 @@ commons-cli/1.2//commons-cli-1.2.jar commons-codec/1.10//commons-codec-1.10.jar commons-collections/3.2.2//commons-collections-3.2.2.jar commons-compiler/3.0.16//commons-compiler-3.0.16.jar -commons-compress/1.8.1//commons-compress-1.8.1.jar +commons-compress/1.4.1//commons-compress-1.4.1.jar commons-configuration/1.6//commons-configuration-1.6.jar commons-crypto/1.0.0//commons-crypto-1.0.0.jar commons-dbcp/1.4//commons-dbcp-1.4.jar @@ -126,7 +126,7 @@ javax.inject/1//javax.inject-1.jar javax.jdo/3.2.0-m3//javax.jdo-3.2.0-m3.jar javax.servlet-api/3.1.0//javax.servlet-api-3.1.0.jar javolution/5.5.1//javolution-5.5.1.jar -jaxb-api/2.2.2//jaxb-api-2.2.2.jar +jaxb-api/2.2.11//jaxb-api-2.2.11.jar jaxb-runtime/2.3.2//jaxb-runtime-2.3.2.jar jcl-over-slf4j/1.7.30//jcl-over-slf4j-1.7.30.jar jdo-api/3.0.1//jdo-api-3.0.1.jar @@ -226,7 +226,6 @@ spire-macros_2.12/0.17.0-M1//spire-macros_2.12-0.17.0-M1.jar spire-platform_2.12/0.17.0-M1//spire-platform_2.12-0.17.0-M1.jar spire-util_2.12/0.17.0-M1//spire-util_2.12-0.17.0-M1.jar spire_2.12/0.17.0-M1//spire_2.12-0.17.0-M1.jar -stax-api/1.0-2//stax-api-1.0-2.jar stax-api/1.0.1//stax-api-1.0.1.jar stream/2.9.6//stream-2.9.6.jar super-csv/2.2.0//super-csv-2.2.0.jar diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 index ebaff6d1977c..62365e6626ef 100644 --- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 @@ -3,14 +3,12 @@ JLargeArrays/1.5//JLargeArrays-1.5.jar JTransforms/3.1//JTransforms-3.1.jar RoaringBitmap/0.9.0//RoaringBitmap-0.9.0.jar ST4/4.0.4//ST4-4.0.4.jar -accessors-smart/1.2//accessors-smart-1.2.jar activation/1.1.1//activation-1.1.1.jar aircompressor/0.10//aircompressor-0.10.jar algebra_2.12/2.0.0-M2//algebra_2.12-2.0.0-M2.jar antlr-runtime/3.5.2//antlr-runtime-3.5.2.jar antlr4-runtime/4.7.1//antlr4-runtime-4.7.1.jar aopalliance-repackaged/2.6.1//aopalliance-repackaged-2.6.1.jar -aopalliance/1.0//aopalliance-1.0.jar arpack_combined_all/0.1//arpack_combined_all-0.1.jar arrow-format/1.0.1//arrow-format-1.0.1.jar arrow-memory-core/1.0.1//arrow-memory-core-1.0.1.jar @@ -27,15 +25,12 @@ breeze_2.12/1.0//breeze_2.12-1.0.jar cats-kernel_2.12/2.0.0-M4//cats-kernel_2.12-2.0.0-M4.jar chill-java/0.9.5//chill-java-0.9.5.jar chill_2.12/0.9.5//chill_2.12-0.9.5.jar -commons-beanutils/1.9.4//commons-beanutils-1.9.4.jar commons-cli/1.2//commons-cli-1.2.jar commons-codec/1.10//commons-codec-1.10.jar commons-collections/3.2.2//commons-collections-3.2.2.jar commons-compiler/3.0.16//commons-compiler-3.0.16.jar commons-compress/1.8.1//commons-compress-1.8.1.jar -commons-configuration2/2.1.1//commons-configuration2-2.1.1.jar commons-crypto/1.0.0//commons-crypto-1.0.0.jar -commons-daemon/1.0.13//commons-daemon-1.0.13.jar commons-dbcp/1.4//commons-dbcp-1.4.jar commons-httpclient/3.1//commons-httpclient-3.1.jar commons-io/2.5//commons-io-2.5.jar @@ -55,30 +50,13 @@ datanucleus-api-jdo/4.2.4//datanucleus-api-jdo-4.2.4.jar datanucleus-core/4.1.17//datanucleus-core-4.1.17.jar datanucleus-rdbms/4.1.19//datanucleus-rdbms-4.1.19.jar derby/10.12.1.1//derby-10.12.1.1.jar -dnsjava/2.1.7//dnsjava-2.1.7.jar dropwizard-metrics-hadoop-metrics2-reporter/0.1.2//dropwizard-metrics-hadoop-metrics2-reporter-0.1.2.jar -ehcache/3.3.1//ehcache-3.3.1.jar flatbuffers-java/1.9.0//flatbuffers-java-1.9.0.jar generex/1.0.2//generex-1.0.2.jar -geronimo-jcache_1.0_spec/1.0-alpha-1//geronimo-jcache_1.0_spec-1.0-alpha-1.jar gson/2.2.4//gson-2.2.4.jar guava/14.0.1//guava-14.0.1.jar -guice-servlet/4.0//guice-servlet-4.0.jar -guice/4.0//guice-4.0.jar -hadoop-annotations/3.2.0//hadoop-annotations-3.2.0.jar -hadoop-auth/3.2.0//hadoop-auth-3.2.0.jar -hadoop-client/3.2.0//hadoop-client-3.2.0.jar -hadoop-common/3.2.0//hadoop-common-3.2.0.jar -hadoop-hdfs-client/3.2.0//hadoop-hdfs-client-3.2.0.jar -hadoop-mapreduce-client-common/3.2.0//hadoop-mapreduce-client-common-3.2.0.jar -hadoop-mapreduce-client-core/3.2.0//hadoop-mapreduce-client-core-3.2.0.jar -hadoop-mapreduce-client-jobclient/3.2.0//hadoop-mapreduce-client-jobclient-3.2.0.jar -hadoop-yarn-api/3.2.0//hadoop-yarn-api-3.2.0.jar -hadoop-yarn-client/3.2.0//hadoop-yarn-client-3.2.0.jar -hadoop-yarn-common/3.2.0//hadoop-yarn-common-3.2.0.jar -hadoop-yarn-registry/3.2.0//hadoop-yarn-registry-3.2.0.jar -hadoop-yarn-server-common/3.2.0//hadoop-yarn-server-common-3.2.0.jar -hadoop-yarn-server-web-proxy/3.2.0//hadoop-yarn-server-web-proxy-3.2.0.jar +hadoop-client-api/3.2.1//hadoop-client-api-3.2.1.jar +hadoop-client-runtime/3.2.1//hadoop-client-runtime-3.2.1.jar hive-beeline/2.3.7//hive-beeline-2.3.7.jar hive-cli/2.3.7//hive-cli-2.3.7.jar hive-common/2.3.7//hive-common-2.3.7.jar @@ -107,8 +85,6 @@ jackson-core/2.10.0//jackson-core-2.10.0.jar jackson-databind/2.10.0//jackson-databind-2.10.0.jar jackson-dataformat-yaml/2.10.0//jackson-dataformat-yaml-2.10.0.jar jackson-datatype-jsr310/2.10.3//jackson-datatype-jsr310-2.10.3.jar -jackson-jaxrs-base/2.9.5//jackson-jaxrs-base-2.9.5.jar -jackson-jaxrs-json-provider/2.9.5//jackson-jaxrs-json-provider-2.9.5.jar jackson-mapper-asl/1.9.13//jackson-mapper-asl-1.9.13.jar jackson-module-jaxb-annotations/2.10.0//jackson-module-jaxb-annotations-2.10.0.jar jackson-module-paranamer/2.10.0//jackson-module-paranamer-2.10.0.jar @@ -121,13 +97,11 @@ jakarta.ws.rs-api/2.1.6//jakarta.ws.rs-api-2.1.6.jar jakarta.xml.bind-api/2.3.2//jakarta.xml.bind-api-2.3.2.jar janino/3.0.16//janino-3.0.16.jar javassist/3.25.0-GA//javassist-3.25.0-GA.jar -javax.inject/1//javax.inject-1.jar javax.jdo/3.2.0-m3//javax.jdo-3.2.0-m3.jar javax.servlet-api/3.1.0//javax.servlet-api-3.1.0.jar javolution/5.5.1//javolution-5.5.1.jar jaxb-api/2.2.11//jaxb-api-2.2.11.jar jaxb-runtime/2.3.2//jaxb-runtime-2.3.2.jar -jcip-annotations/1.0-1//jcip-annotations-1.0-1.jar jcl-over-slf4j/1.7.30//jcl-over-slf4j-1.7.30.jar jdo-api/3.0.1//jdo-api-3.0.1.jar jersey-client/2.30//jersey-client-2.30.jar @@ -141,30 +115,14 @@ jline/2.14.6//jline-2.14.6.jar joda-time/2.10.5//joda-time-2.10.5.jar jodd-core/3.5.2//jodd-core-3.5.2.jar jpam/1.1//jpam-1.1.jar -json-smart/2.3//json-smart-2.3.jar json/1.8//json-1.8.jar json4s-ast_2.12/3.7.0-M5//json4s-ast_2.12-3.7.0-M5.jar json4s-core_2.12/3.7.0-M5//json4s-core_2.12-3.7.0-M5.jar json4s-jackson_2.12/3.7.0-M5//json4s-jackson_2.12-3.7.0-M5.jar json4s-scalap_2.12/3.7.0-M5//json4s-scalap_2.12-3.7.0-M5.jar -jsp-api/2.1//jsp-api-2.1.jar jsr305/3.0.0//jsr305-3.0.0.jar jta/1.1//jta-1.1.jar jul-to-slf4j/1.7.30//jul-to-slf4j-1.7.30.jar -kerb-admin/1.0.1//kerb-admin-1.0.1.jar -kerb-client/1.0.1//kerb-client-1.0.1.jar -kerb-common/1.0.1//kerb-common-1.0.1.jar -kerb-core/1.0.1//kerb-core-1.0.1.jar -kerb-crypto/1.0.1//kerb-crypto-1.0.1.jar -kerb-identity/1.0.1//kerb-identity-1.0.1.jar -kerb-server/1.0.1//kerb-server-1.0.1.jar -kerb-simplekdc/1.0.1//kerb-simplekdc-1.0.1.jar -kerb-util/1.0.1//kerb-util-1.0.1.jar -kerby-asn1/1.0.1//kerby-asn1-1.0.1.jar -kerby-config/1.0.1//kerby-config-1.0.1.jar -kerby-pkix/1.0.1//kerby-pkix-1.0.1.jar -kerby-util/1.0.1//kerby-util-1.0.1.jar -kerby-xdr/1.0.1//kerby-xdr-1.0.1.jar kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar kubernetes-client/4.10.3//kubernetes-client-4.10.3.jar kubernetes-model-admissionregistration/4.10.3//kubernetes-model-admissionregistration-4.10.3.jar @@ -202,10 +160,9 @@ metrics-json/4.1.1//metrics-json-4.1.1.jar metrics-jvm/4.1.1//metrics-jvm-4.1.1.jar minlog/1.3.0//minlog-1.3.0.jar netty-all/4.1.51.Final//netty-all-4.1.51.Final.jar -nimbus-jose-jwt/4.41.1//nimbus-jose-jwt-4.41.1.jar objenesis/2.6//objenesis-2.6.jar okhttp/2.7.5//okhttp-2.7.5.jar -okhttp/3.12.12//okhttp-3.12.12.jar +okhttp/3.12.6//okhttp-3.12.6.jar okio/1.14.0//okio-1.14.0.jar opencsv/2.3//opencsv-2.3.jar openshift-model/4.10.3//openshift-model-4.10.3.jar @@ -224,7 +181,6 @@ parquet-jackson/1.10.1//parquet-jackson-1.10.1.jar protobuf-java/2.5.0//protobuf-java-2.5.0.jar py4j/0.10.9//py4j-0.10.9.jar pyrolite/4.30//pyrolite-4.30.jar -re2j/1.1//re2j-1.1.jar scala-collection-compat_2.12/2.1.1//scala-collection-compat_2.12-2.1.1.jar scala-compiler/2.12.10//scala-compiler-2.12.10.jar scala-library/2.12.10//scala-library-2.12.10.jar @@ -242,15 +198,12 @@ spire-platform_2.12/0.17.0-M1//spire-platform_2.12-0.17.0-M1.jar spire-util_2.12/0.17.0-M1//spire-util_2.12-0.17.0-M1.jar spire_2.12/0.17.0-M1//spire_2.12-0.17.0-M1.jar stax-api/1.0.1//stax-api-1.0.1.jar -stax2-api/3.1.4//stax2-api-3.1.4.jar stream/2.9.6//stream-2.9.6.jar super-csv/2.2.0//super-csv-2.2.0.jar threeten-extra/1.5.0//threeten-extra-1.5.0.jar -token-provider/1.0.1//token-provider-1.0.1.jar transaction-api/1.1//transaction-api-1.1.jar univocity-parsers/2.9.0//univocity-parsers-2.9.0.jar velocity/1.5//velocity-1.5.jar -woodstox-core/5.0.3//woodstox-core-5.0.3.jar xbean-asm7-shaded/4.15//xbean-asm7-shaded-4.15.jar xz/1.5//xz-1.5.jar zjsonpatch/0.3.0//zjsonpatch-0.3.0.jar diff --git a/dev/run-tests.py b/dev/run-tests.py index 48191e9bb024..ce7c5b40e613 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -715,64 +715,64 @@ def main(): setup_test_environ(test_environ) should_run_java_style_checks = False - if not should_only_test_modules: - # license checks - run_apache_rat_checks() - - # style checks - if not changed_files or any(f.endswith(".scala") - or f.endswith("scalastyle-config.xml") - for f in changed_files): - run_scala_style_checks(extra_profiles) - if not changed_files or any(f.endswith(".java") - or f.endswith("checkstyle.xml") - or f.endswith("checkstyle-suppressions.xml") - for f in changed_files): - # Run SBT Checkstyle after the build to prevent a side-effect to the build. - should_run_java_style_checks = True - if not changed_files or any(f.endswith("lint-python") - or f.endswith("tox.ini") - or f.endswith(".py") - for f in changed_files): - run_python_style_checks() - if not changed_files or any(f.endswith(".R") - or f.endswith("lint-r") - or f.endswith(".lintr") - for f in changed_files): - run_sparkr_style_checks() + # if not should_only_test_modules: + # # license checks + # run_apache_rat_checks() + # + # # style checks + # if not changed_files or any(f.endswith(".scala") + # or f.endswith("scalastyle-config.xml") + # for f in changed_files): + # run_scala_style_checks(extra_profiles) + # if not changed_files or any(f.endswith(".java") + # or f.endswith("checkstyle.xml") + # or f.endswith("checkstyle-suppressions.xml") + # for f in changed_files): + # # Run SBT Checkstyle after the build to prevent a side-effect to the build. + # should_run_java_style_checks = True + # if not changed_files or any(f.endswith("lint-python") + # or f.endswith("tox.ini") + # or f.endswith(".py") + # for f in changed_files): + # run_python_style_checks() + # if not changed_files or any(f.endswith(".R") + # or f.endswith("lint-r") + # or f.endswith(".lintr") + # for f in changed_files): + # run_sparkr_style_checks() # determine if docs were changed and if we're inside the amplab environment # note - the below commented out until *all* Jenkins workers can get `jekyll` installed # if "DOCS" in changed_modules and test_env == "amplab_jenkins": # build_spark_documentation() - if any(m.should_run_build_tests for m in test_modules) and test_env != "amplab_jenkins": - run_build_tests() - - # spark build - build_apache_spark(build_tool, extra_profiles) + # if any(m.should_run_build_tests for m in test_modules) and test_env != "amplab_jenkins": + # run_build_tests() + # + # # spark build + # build_apache_spark(build_tool, extra_profiles) # backwards compatibility checks - if build_tool == "sbt": - # Note: compatibility tests only supported in sbt for now - detect_binary_inop_with_mima(extra_profiles) - # Since we did not build assembly/package before running dev/mima, we need to - # do it here because the tests still rely on it; see SPARK-13294 for details. - build_spark_assembly_sbt(extra_profiles, should_run_java_style_checks) + # if build_tool == "sbt": + # # Note: compatibility tests only supported in sbt for now + # detect_binary_inop_with_mima(extra_profiles) + # # Since we did not build assembly/package before running dev/mima, we need to + # # do it here because the tests still rely on it; see SPARK-13294 for details. + # build_spark_assembly_sbt(extra_profiles, should_run_java_style_checks) # run the test suites run_scala_tests(build_tool, extra_profiles, test_modules, excluded_tags, included_tags) - modules_with_python_tests = [m for m in test_modules if m.python_test_goals] - if modules_with_python_tests: - # We only run PySpark tests with coverage report in one specific job with - # Spark master with SBT in Jenkins. - is_sbt_master_job = "SPARK_MASTER_SBT_HADOOP_2_7" in os.environ - run_python_tests( - modules_with_python_tests, opts.parallelism, with_coverage=is_sbt_master_job) - run_python_packaging_tests() - if any(m.should_run_r_tests for m in test_modules): - run_sparkr_tests() + # modules_with_python_tests = [m for m in test_modules if m.python_test_goals] + # if modules_with_python_tests: + # # We only run PySpark tests with coverage report in one specific job with + # # Spark master with SBT in Jenkins. + # is_sbt_master_job = "SPARK_MASTER_SBT_HADOOP_2_7" in os.environ + # run_python_tests( + # modules_with_python_tests, opts.parallelism, with_coverage=is_sbt_master_job) + # run_python_packaging_tests() + # if any(m.should_run_r_tests for m in test_modules): + # run_sparkr_tests() def _test(): diff --git a/docs/_plugins/copy_api_dirs.rb b/docs/_plugins/copy_api_dirs.rb index 17da22bf8a43..133b4945238a 100644 --- a/docs/_plugins/copy_api_dirs.rb +++ b/docs/_plugins/copy_api_dirs.rb @@ -26,8 +26,8 @@ curr_dir = pwd cd("..") - puts "Running 'build/sbt -Pkinesis-asl clean compile unidoc' from " + pwd + "; this may take a few minutes..." - system("build/sbt -Pkinesis-asl clean compile unidoc") || raise("Unidoc generation failed") + puts "Running 'build/sbt -Pkinesis-asl -Phadoop-3.2 clean compile unidoc' from " + pwd + "; this may take a few minutes..." + system("build/sbt -Pkinesis-asl -Phadoop-3.2 clean compile unidoc") || raise("Unidoc generation failed") puts "Moving back into docs dir." cd("docs") @@ -157,8 +157,8 @@ curr_dir = pwd cd("..") - puts "Running 'build/sbt clean package -Phive' from " + pwd + "; this may take a few minutes..." - system("build/sbt clean package -Phive") || raise("SQL doc generation failed") + puts "Running 'build/sbt clean package -Phive -Phadoop-3.2' from " + pwd + "; this may take a few minutes..." + system("build/sbt clean package -Phive -Phadoop-3.2") || raise("SQL doc generation failed") puts "Moving back into docs dir." cd("docs") diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml index d9d9fb7f55c7..edf544028bcc 100644 --- a/external/kafka-0-10-assembly/pom.xml +++ b/external/kafka-0-10-assembly/pom.xml @@ -69,11 +69,6 @@ lz4-java provided - - org.apache.hadoop - hadoop-client - provided - org.apache.avro avro-mapred @@ -117,6 +112,31 @@ + + + hadoop-2.7 + + + org.apache.hadoop + hadoop-client + ${hadoop.version} + provided + + + + + hadoop-3.2 + + + org.apache.hadoop + hadoop-client-runtime + ${hadoop.version} + provided + + + + + target/scala-${scala.binary.version}/classes target/scala-${scala.binary.version}/test-classes diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml index 95a99ac88412..06a6bef005e6 100644 --- a/external/kafka-0-10-sql/pom.xml +++ b/external/kafka-0-10-sql/pom.xml @@ -79,6 +79,10 @@ kafka-clients ${kafka.version} + + com.google.code.findbugs + jsr305 + org.apache.commons commons-pool2 diff --git a/external/kafka-0-10-token-provider/pom.xml b/external/kafka-0-10-token-provider/pom.xml index 941946f30e96..36cfd3441366 100644 --- a/external/kafka-0-10-token-provider/pom.xml +++ b/external/kafka-0-10-token-provider/pom.xml @@ -76,6 +76,19 @@ + + + hadoop-3.2 + + + org.apache.hadoop + hadoop-client-runtime + ${hadoop.version} + + + + + target/scala-${scala.binary.version}/classes target/scala-${scala.binary.version}/test-classes diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml index 76ee5bb7b2f8..33262538f392 100644 --- a/external/kinesis-asl-assembly/pom.xml +++ b/external/kinesis-asl-assembly/pom.xml @@ -89,11 +89,6 @@ log4j provided - - org.apache.hadoop - hadoop-client - provided - org.apache.avro avro-ipc @@ -132,6 +127,31 @@ + + + hadoop-2.7 + + + org.apache.hadoop + hadoop-client + ${hadoop.version} + provided + + + + + hadoop-3.2 + + + org.apache.hadoop + hadoop-client-runtime + ${hadoop.version} + provided + + + + + target/scala-${scala.binary.version}/classes target/scala-${scala.binary.version}/test-classes diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml index 8689e0b8a9ea..3416532b89e0 100644 --- a/hadoop-cloud/pom.xml +++ b/hadoop-cloud/pom.xml @@ -56,12 +56,6 @@ test-jar test - - org.apache.hadoop - hadoop-client - ${hadoop.version} - provided - - - org.apache.hadoop - hadoop-client - test - + + + + hadoop-2.7 + + + org.apache.hadoop + hadoop-client + ${hadoop.version} + + + + + hadoop-3.2 + + + org.apache.hadoop + hadoop-client-runtime + ${hadoop.version} + + + + + target/scala-${scala.binary.version}/classes target/scala-${scala.binary.version}/test-classes diff --git a/pom.xml b/pom.xml index b13d5ab81856..5dbf19f33d9a 100644 --- a/pom.xml +++ b/pom.xml @@ -120,7 +120,7 @@ spark 1.7.30 1.2.17 - 3.2.0 + 3.2.1 2.5.0 ${hadoop.version} 3.4.14 @@ -855,6 +855,11 @@ javax.ws.rs-api 2.0.1 + + javax.xml.bind + jaxb-api + 2.2.11 + org.scalanlp breeze_${scala.binary.version} @@ -1063,88 +1068,6 @@ ${curator.version} test - - org.apache.hadoop - hadoop-client - ${hadoop.version} - ${hadoop.deps.scope} - - - org.fusesource.leveldbjni - leveldbjni-all - - - asm - asm - - - org.codehaus.jackson - jackson-mapper-asl - - - org.ow2.asm - asm - - - org.jboss.netty - netty - - - io.netty - netty - - - - commons-beanutils - commons-beanutils-core - - - commons-logging - commons-logging - - - org.mockito - mockito-all - - - org.mortbay.jetty - servlet-api-2.5 - - - javax.servlet - servlet-api - - - junit - junit - - - com.sun.jersey - * - - - com.sun.jersey.jersey-test-framework - * - - - com.sun.jersey.contribs - * - - - net.java.dev.jets3t - jets3t - - - - javax.ws.rs - jsr311-api - - - org.eclipse.jetty - jetty-webapp - - - org.apache.hadoop hadoop-minikdc @@ -1242,228 +1165,6 @@ 1.1.1 ${hadoop.deps.scope} - - org.apache.hadoop - hadoop-yarn-api - ${yarn.version} - ${hadoop.deps.scope} - - - javax.servlet - servlet-api - - - asm - asm - - - org.ow2.asm - asm - - - org.jboss.netty - netty - - - commons-logging - commons-logging - - - com.sun.jersey - * - - - com.sun.jersey.jersey-test-framework - * - - - com.sun.jersey.contribs - * - - - jdk.tools - jdk.tools - - - - - org.apache.hadoop - hadoop-yarn-common - ${yarn.version} - ${hadoop.deps.scope} - - - asm - asm - - - org.ow2.asm - asm - - - org.jboss.netty - netty - - - javax.servlet - servlet-api - - - commons-logging - commons-logging - - - com.sun.jersey - * - - - com.sun.jersey.jersey-test-framework - * - - - com.sun.jersey.contribs - * - - - - - org.apache.hadoop - hadoop-yarn-server-tests - ${yarn.version} - tests - test - - - org.fusesource.leveldbjni - leveldbjni-all - - - asm - asm - - - org.ow2.asm - asm - - - org.jboss.netty - netty - - - javax.servlet - servlet-api - - - commons-logging - commons-logging - - - com.sun.jersey - * - - - com.sun.jersey.jersey-test-framework - * - - - com.sun.jersey.contribs - * - - - - - org.apache.hadoop - hadoop-yarn-server-web-proxy - ${yarn.version} - ${hadoop.deps.scope} - - - org.fusesource.leveldbjni - leveldbjni-all - - - asm - asm - - - org.ow2.asm - asm - - - org.jboss.netty - netty - - - javax.servlet - servlet-api - - - commons-logging - commons-logging - - - com.sun.jersey - * - - - com.sun.jersey.jersey-test-framework - * - - - com.sun.jersey.contribs - * - - - - com.zaxxer - HikariCP-java7 - - - com.microsoft.sqlserver - mssql-jdbc - - - - - org.apache.hadoop - hadoop-yarn-client - ${yarn.version} - ${hadoop.deps.scope} - - - asm - asm - - - org.ow2.asm - asm - - - org.jboss.netty - netty - - - javax.servlet - servlet-api - - - commons-logging - commons-logging - - - com.sun.jersey - * - - - com.sun.jersey.jersey-test-framework - * - - - com.sun.jersey.contribs - * - - - org.apache.zookeeper zookeeper @@ -1642,6 +1343,14 @@ org.apache.ant ant + + org.apache.hadoop + hadoop-common + + + org.apache.hadoop + hadoop-auth + org.apache.zookeeper zookeeper @@ -3137,11 +2846,335 @@ 2.7.1 2.4 + + + org.apache.hadoop + hadoop-client + ${hadoop.version} + ${hadoop.deps.scope} + + + org.fusesource.leveldbjni + leveldbjni-all + + + asm + asm + + + org.codehaus.jackson + jackson-mapper-asl + + + org.ow2.asm + asm + + + org.jboss.netty + netty + + + io.netty + netty + + + + commons-beanutils + commons-beanutils-core + + + commons-logging + commons-logging + + + org.mockito + mockito-all + + + org.mortbay.jetty + servlet-api-2.5 + + + javax.servlet + servlet-api + + + junit + junit + + + com.sun.jersey + * + + + com.sun.jersey.jersey-test-framework + * + + + com.sun.jersey.contribs + * + + + net.java.dev.jets3t + jets3t + + + + javax.ws.rs + jsr311-api + + + org.eclipse.jetty + jetty-webapp + + + + + org.apache.hadoop + hadoop-yarn-api + ${yarn.version} + ${hadoop.deps.scope} + + + javax.servlet + servlet-api + + + asm + asm + + + org.ow2.asm + asm + + + org.jboss.netty + netty + + + commons-logging + commons-logging + + + com.sun.jersey + * + + + com.sun.jersey.jersey-test-framework + * + + + com.sun.jersey.contribs + * + + + jdk.tools + jdk.tools + + + + + org.apache.hadoop + hadoop-yarn-common + ${yarn.version} + ${hadoop.deps.scope} + + + asm + asm + + + org.ow2.asm + asm + + + org.jboss.netty + netty + + + javax.servlet + servlet-api + + + commons-logging + commons-logging + + + com.sun.jersey + * + + + com.sun.jersey.jersey-test-framework + * + + + com.sun.jersey.contribs + * + + + + + org.apache.hadoop + hadoop-yarn-server-tests + ${yarn.version} + tests + test + + + org.fusesource.leveldbjni + leveldbjni-all + + + asm + asm + + + org.ow2.asm + asm + + + org.jboss.netty + netty + + + javax.servlet + servlet-api + + + commons-logging + commons-logging + + + com.sun.jersey + * + + + com.sun.jersey.jersey-test-framework + * + + + com.sun.jersey.contribs + * + + + + + org.apache.hadoop + hadoop-yarn-server-web-proxy + ${yarn.version} + ${hadoop.deps.scope} + + + org.fusesource.leveldbjni + leveldbjni-all + + + asm + asm + + + org.ow2.asm + asm + + + org.jboss.netty + netty + + + javax.servlet + servlet-api + + + commons-logging + commons-logging + + + com.sun.jersey + * + + + com.sun.jersey.jersey-test-framework + * + + + com.sun.jersey.contribs + * + + + + com.zaxxer + HikariCP-java7 + + + com.microsoft.sqlserver + mssql-jdbc + + + + + org.apache.hadoop + hadoop-yarn-client + ${yarn.version} + ${hadoop.deps.scope} + + + asm + asm + + + org.ow2.asm + asm + + + org.jboss.netty + netty + + + javax.servlet + servlet-api + + + commons-logging + commons-logging + + + com.sun.jersey + * + + + com.sun.jersey.jersey-test-framework + * + + + com.sun.jersey.contribs + * + + + + - hadoop-3.2 - + + + org.apache.hadoop + hadoop-client-runtime + ${hadoop.version} + ${hadoop.deps.scope} + + + org.apache.hadoop + hadoop-client-api + ${hadoop.version} + ${hadoop.deps.scope} + + + org.apache.hadoop + hadoop-client-minicluster + ${yarn.version} + test + + diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml index bc80769be239..ec74bde93831 100644 --- a/resource-managers/yarn/pom.xml +++ b/resource-managers/yarn/pom.xml @@ -40,6 +40,58 @@ true + + hadoop-2.7 + + + org.apache.hadoop + hadoop-client + ${hadoop.version} + + + org.apache.hadoop + hadoop-yarn-api + ${hadoop.version} + + + org.apache.hadoop + hadoop-yarn-common + ${hadoop.version} + + + org.apache.hadoop + hadoop-yarn-server-web-proxy + ${hadoop.version} + + + org.apache.hadoop + hadoop-yarn-client + ${hadoop.version} + + + org.apache.hadoop + hadoop-yarn-server-tests + ${hadoop.version} + tests + test + + + + + hadoop-3.2 + + + org.apache.hadoop + hadoop-client-runtime + ${hadoop.version} + + + org.apache.hadoop + hadoop-client-minicluster + ${hadoop.version} + + + @@ -67,26 +119,6 @@ test-jar test - - org.apache.hadoop - hadoop-yarn-api - - - org.apache.hadoop - hadoop-yarn-common - - - org.apache.hadoop - hadoop-yarn-server-web-proxy - - - org.apache.hadoop - hadoop-yarn-client - - - org.apache.hadoop - hadoop-client - jakarta.servlet @@ -142,13 +174,6 @@ test - - org.apache.hadoop - hadoop-yarn-server-tests - tests - test - - org.mockito mockito-core diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala index 5f632fbb259f..9b99e8ff9265 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala @@ -19,7 +19,7 @@ package org.apache.spark.deploy.yarn import java.io.{File, IOException} import java.lang.reflect.{InvocationTargetException, Modifier} -import java.net.{URI, URL} +import java.net.{URI, URL, URLEncoder} import java.security.PrivilegedExceptionAction import java.util.concurrent.{TimeoutException, TimeUnit} @@ -36,7 +36,6 @@ import org.apache.hadoop.yarn.api._ import org.apache.hadoop.yarn.api.records._ import org.apache.hadoop.yarn.conf.YarnConfiguration import org.apache.hadoop.yarn.exceptions.ApplicationAttemptNotFoundException -import org.apache.hadoop.yarn.server.webproxy.ProxyUriUtils import org.apache.hadoop.yarn.util.{ConverterUtils, Records} import org.apache.spark._ @@ -308,7 +307,8 @@ private[spark] class ApplicationMaster( // The client-mode AM doesn't listen for incoming connections, so report an invalid port. registerAM(Utils.localHostName, -1, sparkConf, sparkConf.getOption("spark.driver.appUIAddress"), appAttemptId) - addAmIpFilter(Some(driverRef), ProxyUriUtils.getPath(appAttemptId.getApplicationId)) + val encodedAppId = URLEncoder.encode(appAttemptId.getApplicationId.toString, "UTF-8") + addAmIpFilter(Some(driverRef), s"/proxy/$encodedAppId") createAllocator(driverRef, sparkConf, clientRpcEnv, appAttemptId, cachedResourcesConf) reporterThread.join() } catch { diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala index 20f5339c46fe..a813b9913f23 100644 --- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala +++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala @@ -80,6 +80,16 @@ abstract class BaseYarnClusterSuite yarnConf.set("yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage", "100.0") + // capacity-scheduler.xml is missing in hadoop-client-minicluster so this is a workaround + yarnConf.set("yarn.scheduler.capacity.root.queues", "default") + yarnConf.setInt("yarn.scheduler.capacity.root.default.capacity", 100) + yarnConf.setFloat("yarn.scheduler.capacity.root.default.user-limit-factor", 1) + yarnConf.setInt("yarn.scheduler.capacity.root.default.maximum-capacity", 100) + yarnConf.set("yarn.scheduler.capacity.root.default.state", "RUNNING") + yarnConf.set("yarn.scheduler.capacity.root.default.acl_submit_applications", "*") + yarnConf.set("yarn.scheduler.capacity.root.default.acl_administer_queue", "*") + yarnConf.setInt("yarn.scheduler.capacity.node-locality-delay", -1) + yarnCluster = new MiniYARNCluster(getClass().getName(), 1, 1, 1) yarnCluster.init(yarnConf) yarnCluster.start() diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala index 14438bc14105..1cb68005293a 100644 --- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala +++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala @@ -162,7 +162,7 @@ class YarnClusterSuite extends BaseYarnClusterSuite { } test("run Python application in yarn-client mode") { - testPySpark(true) + testPySpark(true, extraConf = Map("spark.executorEnv.PATH" -> sys.env("PATH"))) } test("run Python application in yarn-cluster mode") { @@ -175,9 +175,9 @@ class YarnClusterSuite extends BaseYarnClusterSuite { clientMode = false, extraConf = Map( "spark.yarn.appMasterEnv.PYSPARK_DRIVER_PYTHON" - -> sys.env.getOrElse("PYSPARK_DRIVER_PYTHON", "python"), + -> sys.env.getOrElse("PYSPARK_DRIVER_PYTHON", "python3"), "spark.yarn.appMasterEnv.PYSPARK_PYTHON" - -> sys.env.getOrElse("PYSPARK_PYTHON", "python")), + -> sys.env.getOrElse("PYSPARK_PYTHON", "python3")), extraEnv = Map( "PYSPARK_DRIVER_PYTHON" -> "not python", "PYSPARK_PYTHON" -> "not python")) @@ -275,7 +275,10 @@ class YarnClusterSuite extends BaseYarnClusterSuite { s"$sparkHome/python") val extraEnvVars = Map( "PYSPARK_ARCHIVES_PATH" -> pythonPath.map("local:" + _).mkString(File.pathSeparator), - "PYTHONPATH" -> pythonPath.mkString(File.pathSeparator)) ++ extraEnv + "PYTHONPATH" -> pythonPath.mkString(File.pathSeparator), + "PYSPARK_DRIVER_PYTHON" -> "python3", + "PYSPARK_PYTHON" -> "python3" + ) ++ extraEnv val moduleDir = { val subdir = new File(tempDir, "pyModules") diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml index 6b79eb722fcd..af976fa1fa98 100644 --- a/sql/catalyst/pom.xml +++ b/sql/catalyst/pom.xml @@ -104,6 +104,10 @@ org.antlr antlr4-runtime + + javax.xml.bind + jaxb-api + commons-codec commons-codec diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index 474c6066ed04..d3843257b593 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -191,6 +191,16 @@ + + hadoop-3.2 + + + org.apache.hadoop + hadoop-client-runtime + ${hadoop.version} + + + hive diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala index 42a0ec0253b8..fbbf9c2a1e06 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala @@ -118,11 +118,15 @@ private[hive] object IsolatedClientLoader extends Logging { hadoopVersion: String, ivyPath: Option[String], remoteRepos: String): Seq[URL] = { + val hadoopJarName = if (hadoopVersion.startsWith("3")) { + s"org.apache.hadoop:hadoop-client-runtime:$hadoopVersion" + } else { + s"org.apache.hadoop:hadoop-client:$hadoopVersion" + } val hiveArtifacts = version.extraDeps ++ Seq("hive-metastore", "hive-exec", "hive-common", "hive-serde") .map(a => s"org.apache.hive:$a:${version.fullVersion}") ++ - Seq("com.google.guava:guava:14.0.1", - s"org.apache.hadoop:hadoop-client:$hadoopVersion") + Seq("com.google.guava:guava:14.0.1", hadoopJarName) val classpath = quietly { SparkSubmitUtils.resolveMavenCoordinates(