From 37b9be4dc22d065e82085f6e07a01614f8a99c6a Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Tue, 25 Jul 2023 11:35:13 +0800 Subject: [PATCH 1/8] init --- dev/run-tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/run-tests.py b/dev/run-tests.py index c0c281b549e3..66ee362ed2c0 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -655,7 +655,7 @@ def main(): opts.parallelism, with_coverage=os.environ.get("PYSPARK_CODECOV", "false") == "true", ) - run_python_packaging_tests() + # run_python_packaging_tests() if any(m.should_run_r_tests for m in test_modules) and not os.environ.get("SKIP_R"): run_sparkr_tests() From 6e1e79a67abd40c78976faed28b71476b33c2ab0 Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Tue, 25 Jul 2023 16:54:10 +0800 Subject: [PATCH 2/8] del conda install --- .github/workflows/build_and_test.yml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 7107af66129e..e850d0a3a8a4 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -413,15 +413,10 @@ jobs: run: | python3.9 -m pip list pypy3 -m pip list - - name: Install Conda for pip packaging test - run: | - curl -s https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh > miniconda.sh - bash miniconda.sh -b -p $HOME/miniconda # Run the tests. - name: Run tests env: ${{ fromJSON(inputs.envs) }} run: | - export PATH=$PATH:$HOME/miniconda/bin ./dev/run-tests --parallelism 1 --modules "$MODULES_TO_TEST" - name: Upload coverage to Codecov if: fromJSON(inputs.envs).PYSPARK_CODECOV == 'true' From 7ee9003a8784e51505814588a30631e650a098e2 Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Tue, 25 Jul 2023 20:46:53 +0800 Subject: [PATCH 3/8] add packaging test back --- .github/workflows/build_and_test.yml | 84 ++++++++++++++++++++++++++++ dev/run-tests.py | 3 +- 2 files changed, 86 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index e850d0a3a8a4..ae7f855eacb0 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -205,6 +205,7 @@ jobs: HIVE_PROFILE: ${{ matrix.hive }} GITHUB_PREV_SHA: ${{ github.event.before }} SPARK_LOCAL_IP: localhost + SKIP_PACKAGING: true steps: - name: Checkout Spark repository uses: actions/checkout@v3 @@ -268,6 +269,7 @@ jobs: # Hive "other tests" test needs larger metaspace size based on experiment. if [[ "$MODULES_TO_TEST" == "hive" ]] && [[ "$EXCLUDED_TAGS" == "org.apache.spark.tags.SlowHiveTest" ]]; then export METASPACE_SIZE=2g; fi export SERIAL_SBT_TESTS=1 + export SKIP_PACKAGING=True ./dev/run-tests --parallelism 1 --modules "$MODULES_TO_TEST" --included-tags "$INCLUDED_TAGS" --excluded-tags "$EXCLUDED_TAGS" - name: Upload test results to report if: always() @@ -366,6 +368,7 @@ jobs: SPARK_LOCAL_IP: localhost SKIP_UNIDOC: true SKIP_MIMA: true + SKIP_PACKAGING: true METASPACE_SIZE: 1g steps: - name: Checkout Spark repository @@ -417,6 +420,7 @@ jobs: - name: Run tests env: ${{ fromJSON(inputs.envs) }} run: | + export SKIP_PACKAGING=True ./dev/run-tests --parallelism 1 --modules "$MODULES_TO_TEST" - name: Upload coverage to Codecov if: fromJSON(inputs.envs).PYSPARK_CODECOV == 'true' @@ -438,6 +442,82 @@ jobs: name: unit-tests-log-${{ matrix.modules }}--8-${{ inputs.hadoop }}-hive2.3 path: "**/target/unit-tests.log" + pyspark-packaging: + needs: [precondition, infra-image] + # always run if pyspark == 'true', even infra-image is skip (such as non-master job) + if: (!cancelled()) && fromJson(needs.precondition.outputs.required).pyspark == 'true' + name: "Build modules: pyspark-packaging" + runs-on: ubuntu-22.04 + container: + image: ${{ needs.precondition.outputs.image_url }} + env: + HADOOP_PROFILE: ${{ inputs.hadoop }} + HIVE_PROFILE: hive2.3 + GITHUB_PREV_SHA: ${{ github.event.before }} + SPARK_LOCAL_IP: localhost + SKIP_UNIDOC: true + SKIP_MIMA: true + METASPACE_SIZE: 1g + steps: + - name: Checkout Spark repository + uses: actions/checkout@v3 + # In order to fetch changed files + with: + fetch-depth: 0 + repository: apache/spark + ref: ${{ inputs.branch }} + - name: Add GITHUB_WORKSPACE to git trust safe.directory + run: | + git config --global --add safe.directory ${GITHUB_WORKSPACE} + - name: Sync the current branch with the latest in Apache Spark + if: github.repository != 'apache/spark' + run: | + echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV + git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/} + git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD + git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty + # Cache local repositories. Note that GitHub Actions cache has a 2G limit. + - name: Cache Scala, SBT and Maven + uses: actions/cache@v3 + with: + path: | + build/apache-maven-* + build/scala-* + build/*.jar + ~/.sbt + key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }} + restore-keys: | + build- + - name: Cache Coursier local repository + uses: actions/cache@v3 + with: + path: ~/.cache/coursier + key: pyspark-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }} + restore-keys: | + pyspark-coursier- + - name: Install Java 8 + uses: actions/setup-java@v3 + with: + distribution: temurin + java-version: 8 + - name: Install Conda for pip packaging test + run: | + curl -s https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh > miniconda.sh + bash miniconda.sh -b -p $HOME/miniconda + # Run the tests. + - name: Run tests + env: ${{ fromJSON(inputs.envs) }} + run: | + export PATH=$PATH:$HOME/miniconda/bin + export SKIP_MIMA=True + ./dev/run-tests --parallelism 1 + - name: Upload test results to report + if: always() + uses: actions/upload-artifact@v3 + with: + name: test-results-pyspark-packaging--8-${{ inputs.hadoop }}-hive2.3 + path: "**/target/test-reports/*.xml" + sparkr: needs: [precondition, infra-image] # always run if sparkr == 'true', even infra-image is skip (such as non-master job) @@ -452,6 +532,7 @@ jobs: GITHUB_PREV_SHA: ${{ github.event.before }} SPARK_LOCAL_IP: localhost SKIP_MIMA: true + SKIP_PACKAGING: true steps: - name: Checkout Spark repository uses: actions/checkout@v3 @@ -501,6 +582,7 @@ jobs: # R issues at docker environment export TZ=UTC export _R_CHECK_SYSTEM_CLOCK_=FALSE + export SKIP_PACKAGING=True ./dev/run-tests --parallelism 1 --modules sparkr - name: Upload test results to report if: always() @@ -906,6 +988,7 @@ jobs: SPARK_LOCAL_IP: localhost ORACLE_DOCKER_IMAGE_NAME: gvenzl/oracle-xe:21.3.0 SKIP_MIMA: true + SKIP_PACKAGING: true steps: - name: Checkout Spark repository uses: actions/checkout@v3 @@ -945,6 +1028,7 @@ jobs: java-version: 8 - name: Run tests run: | + export SKIP_PACKAGING=True ./dev/run-tests --parallelism 1 --modules docker-integration-tests --included-tags org.apache.spark.tags.DockerTest - name: Upload test results to report if: always() diff --git a/dev/run-tests.py b/dev/run-tests.py index 66ee362ed2c0..fc7b9b77fb66 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -655,7 +655,8 @@ def main(): opts.parallelism, with_coverage=os.environ.get("PYSPARK_CODECOV", "false") == "true", ) - # run_python_packaging_tests() + if not os.environ.get("SKIP_PACKAGING"): + run_python_packaging_tests() if any(m.should_run_r_tests for m in test_modules) and not os.environ.get("SKIP_R"): run_sparkr_tests() From e4bebafd907ac475bd7de59195185916f3ab547e Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Tue, 25 Jul 2023 20:58:24 +0800 Subject: [PATCH 4/8] del exports --- .github/workflows/build_and_test.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index ae7f855eacb0..f97112e644fc 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -269,7 +269,6 @@ jobs: # Hive "other tests" test needs larger metaspace size based on experiment. if [[ "$MODULES_TO_TEST" == "hive" ]] && [[ "$EXCLUDED_TAGS" == "org.apache.spark.tags.SlowHiveTest" ]]; then export METASPACE_SIZE=2g; fi export SERIAL_SBT_TESTS=1 - export SKIP_PACKAGING=True ./dev/run-tests --parallelism 1 --modules "$MODULES_TO_TEST" --included-tags "$INCLUDED_TAGS" --excluded-tags "$EXCLUDED_TAGS" - name: Upload test results to report if: always() @@ -420,7 +419,6 @@ jobs: - name: Run tests env: ${{ fromJSON(inputs.envs) }} run: | - export SKIP_PACKAGING=True ./dev/run-tests --parallelism 1 --modules "$MODULES_TO_TEST" - name: Upload coverage to Codecov if: fromJSON(inputs.envs).PYSPARK_CODECOV == 'true' @@ -509,7 +507,6 @@ jobs: env: ${{ fromJSON(inputs.envs) }} run: | export PATH=$PATH:$HOME/miniconda/bin - export SKIP_MIMA=True ./dev/run-tests --parallelism 1 - name: Upload test results to report if: always() @@ -1028,7 +1025,6 @@ jobs: java-version: 8 - name: Run tests run: | - export SKIP_PACKAGING=True ./dev/run-tests --parallelism 1 --modules docker-integration-tests --included-tags org.apache.spark.tags.DockerTest - name: Upload test results to report if: always() From d9dcf5ac3262399b5e9a729973fb33d1fda8d2f0 Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Tue, 25 Jul 2023 21:05:29 +0800 Subject: [PATCH 5/8] refine --- .github/workflows/build_and_test.yml | 2 +- dev/run-tests.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index f97112e644fc..48d7c387201f 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -507,7 +507,7 @@ jobs: env: ${{ fromJSON(inputs.envs) }} run: | export PATH=$PATH:$HOME/miniconda/bin - ./dev/run-tests --parallelism 1 + ./dev/run-pip-tests - name: Upload test results to report if: always() uses: actions/upload-artifact@v3 diff --git a/dev/run-tests.py b/dev/run-tests.py index fc7b9b77fb66..c59dd4658a4f 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -395,7 +395,7 @@ def run_python_tests(test_modules, parallelism, with_coverage=False): def run_python_packaging_tests(): - if not os.environ.get("SPARK_JENKINS"): + if not os.environ.get("SPARK_JENKINS") and not os.environ.get("SKIP_PACKAGING"): set_title_and_block("Running PySpark packaging tests", "BLOCK_PYSPARK_PIP_TESTS") command = [os.path.join(SPARK_HOME, "dev", "run-pip-tests")] run_cmd(command) @@ -655,7 +655,6 @@ def main(): opts.parallelism, with_coverage=os.environ.get("PYSPARK_CODECOV", "false") == "true", ) - if not os.environ.get("SKIP_PACKAGING"): run_python_packaging_tests() if any(m.should_run_r_tests for m in test_modules) and not os.environ.get("SKIP_R"): run_sparkr_tests() From 134a4ff95a1319ac62f98294426605b6b8e54e73 Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Wed, 26 Jul 2023 10:49:16 +0800 Subject: [PATCH 6/8] simplify --- .github/workflows/build_and_test.yml | 86 +++------------------------- dev/run-tests.py | 2 +- 2 files changed, 10 insertions(+), 78 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 48d7c387201f..e5330d6bfca2 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -345,6 +345,8 @@ jobs: java: - ${{ inputs.java }} modules: + - >- + pyspark-errors - >- pyspark-sql, pyspark-mllib, pyspark-resource, pyspark-testing - >- @@ -354,7 +356,7 @@ jobs: - >- pyspark-pandas-slow - >- - pyspark-connect, pyspark-errors + pyspark-connect - >- pyspark-pandas-connect - >- @@ -415,10 +417,16 @@ jobs: run: | python3.9 -m pip list pypy3 -m pip list + - name: Install Conda for pip packaging test + if: ${{ matrix.modules == 'pyspark-errors' }} + run: | + curl -s https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh > miniconda.sh + bash miniconda.sh -b -p $HOME/miniconda # Run the tests. - name: Run tests env: ${{ fromJSON(inputs.envs) }} run: | + if [[ "$MODULES_TO_TEST" == "pyspark-errors" ]]; then export PATH=$PATH:$HOME/miniconda/bin && export SKIP_PACKAGING=false; fi ./dev/run-tests --parallelism 1 --modules "$MODULES_TO_TEST" - name: Upload coverage to Codecov if: fromJSON(inputs.envs).PYSPARK_CODECOV == 'true' @@ -440,81 +448,6 @@ jobs: name: unit-tests-log-${{ matrix.modules }}--8-${{ inputs.hadoop }}-hive2.3 path: "**/target/unit-tests.log" - pyspark-packaging: - needs: [precondition, infra-image] - # always run if pyspark == 'true', even infra-image is skip (such as non-master job) - if: (!cancelled()) && fromJson(needs.precondition.outputs.required).pyspark == 'true' - name: "Build modules: pyspark-packaging" - runs-on: ubuntu-22.04 - container: - image: ${{ needs.precondition.outputs.image_url }} - env: - HADOOP_PROFILE: ${{ inputs.hadoop }} - HIVE_PROFILE: hive2.3 - GITHUB_PREV_SHA: ${{ github.event.before }} - SPARK_LOCAL_IP: localhost - SKIP_UNIDOC: true - SKIP_MIMA: true - METASPACE_SIZE: 1g - steps: - - name: Checkout Spark repository - uses: actions/checkout@v3 - # In order to fetch changed files - with: - fetch-depth: 0 - repository: apache/spark - ref: ${{ inputs.branch }} - - name: Add GITHUB_WORKSPACE to git trust safe.directory - run: | - git config --global --add safe.directory ${GITHUB_WORKSPACE} - - name: Sync the current branch with the latest in Apache Spark - if: github.repository != 'apache/spark' - run: | - echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV - git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/} - git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD - git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty - # Cache local repositories. Note that GitHub Actions cache has a 2G limit. - - name: Cache Scala, SBT and Maven - uses: actions/cache@v3 - with: - path: | - build/apache-maven-* - build/scala-* - build/*.jar - ~/.sbt - key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }} - restore-keys: | - build- - - name: Cache Coursier local repository - uses: actions/cache@v3 - with: - path: ~/.cache/coursier - key: pyspark-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }} - restore-keys: | - pyspark-coursier- - - name: Install Java 8 - uses: actions/setup-java@v3 - with: - distribution: temurin - java-version: 8 - - name: Install Conda for pip packaging test - run: | - curl -s https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh > miniconda.sh - bash miniconda.sh -b -p $HOME/miniconda - # Run the tests. - - name: Run tests - env: ${{ fromJSON(inputs.envs) }} - run: | - export PATH=$PATH:$HOME/miniconda/bin - ./dev/run-pip-tests - - name: Upload test results to report - if: always() - uses: actions/upload-artifact@v3 - with: - name: test-results-pyspark-packaging--8-${{ inputs.hadoop }}-hive2.3 - path: "**/target/test-reports/*.xml" - sparkr: needs: [precondition, infra-image] # always run if sparkr == 'true', even infra-image is skip (such as non-master job) @@ -579,7 +512,6 @@ jobs: # R issues at docker environment export TZ=UTC export _R_CHECK_SYSTEM_CLOCK_=FALSE - export SKIP_PACKAGING=True ./dev/run-tests --parallelism 1 --modules sparkr - name: Upload test results to report if: always() diff --git a/dev/run-tests.py b/dev/run-tests.py index c59dd4658a4f..9bf3095edb71 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -395,7 +395,7 @@ def run_python_tests(test_modules, parallelism, with_coverage=False): def run_python_packaging_tests(): - if not os.environ.get("SPARK_JENKINS") and not os.environ.get("SKIP_PACKAGING"): + if not os.environ.get("SPARK_JENKINS") and os.environ.get("SKIP_PACKAGING", "false") != "true": set_title_and_block("Running PySpark packaging tests", "BLOCK_PYSPARK_PIP_TESTS") command = [os.path.join(SPARK_HOME, "dev", "run-pip-tests")] run_cmd(command) From 89d365fc17d03d49e979d060f42c4a93f617229e Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Wed, 26 Jul 2023 11:34:23 +0800 Subject: [PATCH 7/8] fix bash --- .github/workflows/build_and_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index e5330d6bfca2..be40e8457ca9 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -426,7 +426,7 @@ jobs: - name: Run tests env: ${{ fromJSON(inputs.envs) }} run: | - if [[ "$MODULES_TO_TEST" == "pyspark-errors" ]]; then export PATH=$PATH:$HOME/miniconda/bin && export SKIP_PACKAGING=false; fi + if [ "$MODULES_TO_TEST" == "pyspark-errors" ]; then export PATH=$PATH:$HOME/miniconda/bin && export SKIP_PACKAGING=false; fi ./dev/run-tests --parallelism 1 --modules "$MODULES_TO_TEST" - name: Upload coverage to Codecov if: fromJSON(inputs.envs).PYSPARK_CODECOV == 'true' From 04c271e71d824984d68317650bcb1f88798de112 Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Wed, 26 Jul 2023 12:49:27 +0800 Subject: [PATCH 8/8] fix bash again fix bash again --- .github/workflows/build_and_test.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index be40e8457ca9..02b3814a018b 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -425,8 +425,13 @@ jobs: # Run the tests. - name: Run tests env: ${{ fromJSON(inputs.envs) }} + shell: 'script -q -e -c "bash {0}"' run: | - if [ "$MODULES_TO_TEST" == "pyspark-errors" ]; then export PATH=$PATH:$HOME/miniconda/bin && export SKIP_PACKAGING=false; fi + if [[ "$MODULES_TO_TEST" == "pyspark-errors" ]]; then + export PATH=$PATH:$HOME/miniconda/bin + export SKIP_PACKAGING=false + echo "Python Packaging Tests Enabled!" + fi ./dev/run-tests --parallelism 1 --modules "$MODULES_TO_TEST" - name: Upload coverage to Codecov if: fromJSON(inputs.envs).PYSPARK_CODECOV == 'true'