From 518986348d30ce27b6e8c4efb56abe53834d7b5b Mon Sep 17 00:00:00 2001 From: panbingkun Date: Sun, 17 Mar 2024 22:14:28 +0800 Subject: [PATCH 01/71] [WIP] Fix scheduled jobs for branch-3.4 & branch-3.5 --- .github/workflows/build_and_test.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index e505be7d4d98..45537d9c07a0 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -438,6 +438,14 @@ jobs: curl -s https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh > miniconda.sh bash miniconda.sh -b -p $HOME/miniconda rm miniconda.sh + - name: Install Python test dependencies for branch-3.4 + if: inputs.branch == 'branch-3.4' + run: | + python3.9 -m pip install 'numpy==1.24.4' 'pyarrow==12.0.1' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' + - name: Install Python test dependencies for branch-3.5 + if: inputs.branch == 'branch-3.5' + run: | + python3.9 -m pip install 'numpy==1.25.1' 'pyarrow==12.0.1' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' # Run the tests. - name: Run tests env: ${{ fromJSON(inputs.envs) }} From f436ca421b2301994210b2775ec31ef913fb7854 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Mon, 18 Mar 2024 08:52:13 +0800 Subject: [PATCH 02/71] test branch-3.5 scheduled --- .github/workflows/build_and_test.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 45537d9c07a0..215910aecb96 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -25,13 +25,13 @@ on: java: required: false type: string - default: 17 + default: 8 branch: description: Branch to run the build against required: false type: string # Change 'master' to 'branch-4.0' in branch-4.0 branch after cutting it. - default: master + default: branch-3.5 hadoop: description: Hadoop version to run with. HADOOP_PROFILE environment variable should accept it. required: false @@ -41,7 +41,7 @@ on: description: Additional environment variables to set when running the tests. Should be in JSON format. required: false type: string - default: '{}' + default: '{"SCALA_PROFILE": "scala2.13", "PYTHON_TO_TEST": "", "ORACLE_DOCKER_IMAGE_NAME": "gvenzl/oracle-xe:21.3.0"}' jobs: description: >- Jobs to run, and should be in JSON format. The values should be matched with the job's key defined From 7d3fe21c84fae06860a0245cc12e923d60b38426 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Mon, 18 Mar 2024 09:07:43 +0800 Subject: [PATCH 03/71] Revert "test branch-3.5 scheduled" This reverts commit f436ca421b2301994210b2775ec31ef913fb7854. --- .github/workflows/build_and_test.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 215910aecb96..45537d9c07a0 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -25,13 +25,13 @@ on: java: required: false type: string - default: 8 + default: 17 branch: description: Branch to run the build against required: false type: string # Change 'master' to 'branch-4.0' in branch-4.0 branch after cutting it. - default: branch-3.5 + default: master hadoop: description: Hadoop version to run with. HADOOP_PROFILE environment variable should accept it. required: false @@ -41,7 +41,7 @@ on: description: Additional environment variables to set when running the tests. Should be in JSON format. required: false type: string - default: '{"SCALA_PROFILE": "scala2.13", "PYTHON_TO_TEST": "", "ORACLE_DOCKER_IMAGE_NAME": "gvenzl/oracle-xe:21.3.0"}' + default: '{}' jobs: description: >- Jobs to run, and should be in JSON format. The values should be matched with the job's key defined From c5615d6c8779ee696f0d114833a6cf86befa2c0f Mon Sep 17 00:00:00 2001 From: panbingkun Date: Mon, 18 Mar 2024 09:11:24 +0800 Subject: [PATCH 04/71] test --- .github/workflows/build_and_test.yml | 2 ++ .github/workflows/build_branch35.yml | 3 --- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 45537d9c07a0..9adc7eebc54f 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -20,6 +20,8 @@ name: Build and test on: + schedule: + - cron: '0 15 * * *' workflow_call: inputs: java: diff --git a/.github/workflows/build_branch35.yml b/.github/workflows/build_branch35.yml index 9e6fe13c020e..1199329bfe09 100644 --- a/.github/workflows/build_branch35.yml +++ b/.github/workflows/build_branch35.yml @@ -19,9 +19,6 @@ name: "Build (branch-3.5, Scala 2.13, Hadoop 3, JDK 8)" -on: - schedule: - - cron: '0 11 * * *' jobs: run-build: From 3729bf6e27981cced7844d28507a0ce0eb1e2881 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Mon, 18 Mar 2024 09:18:02 +0800 Subject: [PATCH 05/71] fix --- .github/workflows/build_branch35.yml | 36 ++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/.github/workflows/build_branch35.yml b/.github/workflows/build_branch35.yml index 1199329bfe09..fec64f70f24d 100644 --- a/.github/workflows/build_branch35.yml +++ b/.github/workflows/build_branch35.yml @@ -19,6 +19,42 @@ name: "Build (branch-3.5, Scala 2.13, Hadoop 3, JDK 8)" +on: + workflow_call: + inputs: + java: + required: false + type: string + default: 8 + branch: + description: Branch to run the build against + required: false + type: string + default: branch-3.5 + hadoop: + description: Hadoop version to run with. HADOOP_PROFILE environment variable should accept it. + required: false + type: string + default: hadoop3 + envs: + description: Additional environment variables to set when running the tests. Should be in JSON format. + required: false + type: string + default: '{"SCALA_PROFILE": "scala2.13", "PYTHON_TO_TEST": "", "ORACLE_DOCKER_IMAGE_NAME": "gvenzl/oracle-xe:21.3.0"}' + jobs: + description: >- + Jobs to run, and should be in JSON format. The values should be matched with the job's key defined + in this file, e.g., build. See precondition job below. + required: false + type: string + default: '{ + "build": "true", + "pyspark": "true", + "sparkr": "true", + "tpcds-1g": "true", + "docker-integration-tests": "true", + "lint" : "true" + }' jobs: run-build: From 1483ba7285db7fb1af7a76097f4d71f545b713dc Mon Sep 17 00:00:00 2001 From: panbingkun Date: Mon, 18 Mar 2024 09:31:12 +0800 Subject: [PATCH 06/71] fix --- .github/workflows/build_main.yml | 2 +- .github/workflows/notify_test_workflow.yml | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build_main.yml b/.github/workflows/build_main.yml index 9ef52f326375..36481bb4c1b1 100644 --- a/.github/workflows/build_main.yml +++ b/.github/workflows/build_main.yml @@ -29,4 +29,4 @@ jobs: permissions: packages: write name: Run - uses: ./.github/workflows/build_and_test.yml + uses: ./.github/workflows/build_branch35.yml diff --git a/.github/workflows/notify_test_workflow.yml b/.github/workflows/notify_test_workflow.yml index 93b627763445..3c732fca8af8 100644 --- a/.github/workflows/notify_test_workflow.yml +++ b/.github/workflows/notify_test_workflow.yml @@ -58,6 +58,7 @@ jobs: ref: context.payload.pull_request.head.ref, } + console.log('Branch: ' + context.payload.pull_request.head.ref) console.log('Ref: ' + context.payload.pull_request.head.ref) console.log('SHA: ' + context.payload.pull_request.head.sha) From ad36f717ac5e0dcc0a5041d90337292020f1e5be Mon Sep 17 00:00:00 2001 From: panbingkun Date: Mon, 18 Mar 2024 09:33:11 +0800 Subject: [PATCH 07/71] fix --- .github/workflows/notify_test_workflow.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/notify_test_workflow.yml b/.github/workflows/notify_test_workflow.yml index 3c732fca8af8..93b627763445 100644 --- a/.github/workflows/notify_test_workflow.yml +++ b/.github/workflows/notify_test_workflow.yml @@ -58,7 +58,6 @@ jobs: ref: context.payload.pull_request.head.ref, } - console.log('Branch: ' + context.payload.pull_request.head.ref) console.log('Ref: ' + context.payload.pull_request.head.ref) console.log('SHA: ' + context.payload.pull_request.head.sha) From 22f8c20f304d22198477b051cd9eb991155badaf Mon Sep 17 00:00:00 2001 From: panbingkun Date: Mon, 18 Mar 2024 09:37:17 +0800 Subject: [PATCH 08/71] fix --- .github/workflows/build_and_test.yml | 8 ++++---- .github/workflows/build_main.yml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 9adc7eebc54f..d849a36d2b40 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -27,13 +27,13 @@ on: java: required: false type: string - default: 17 + default: 8 branch: description: Branch to run the build against required: false type: string # Change 'master' to 'branch-4.0' in branch-4.0 branch after cutting it. - default: master + default: branch-3.5 hadoop: description: Hadoop version to run with. HADOOP_PROFILE environment variable should accept it. required: false @@ -43,14 +43,14 @@ on: description: Additional environment variables to set when running the tests. Should be in JSON format. required: false type: string - default: '{}' + default: '{"SCALA_PROFILE": "scala2.13", "PYTHON_TO_TEST": "", "ORACLE_DOCKER_IMAGE_NAME": "gvenzl/oracle-xe:21.3.0"}' jobs: description: >- Jobs to run, and should be in JSON format. The values should be matched with the job's key defined in this file, e.g., build. See precondition job below. required: false type: string - default: '' + default: '{"build": "true","pyspark": "true","sparkr": "true","tpcds-1g": "true","docker-integration-tests": "true","lint" : "true"}' jobs: precondition: name: Check changes diff --git a/.github/workflows/build_main.yml b/.github/workflows/build_main.yml index 36481bb4c1b1..9ef52f326375 100644 --- a/.github/workflows/build_main.yml +++ b/.github/workflows/build_main.yml @@ -29,4 +29,4 @@ jobs: permissions: packages: write name: Run - uses: ./.github/workflows/build_branch35.yml + uses: ./.github/workflows/build_and_test.yml From 157436af22948e164d8c1129bee2a5d6384e2fa0 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Mon, 18 Mar 2024 09:49:56 +0800 Subject: [PATCH 09/71] fix --- .github/workflows/build_and_test.yml | 25 ++++++------------- .github/workflows/build_branch35.yml | 37 ++-------------------------- 2 files changed, 10 insertions(+), 52 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index d849a36d2b40..f8785eb35918 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -27,13 +27,13 @@ on: java: required: false type: string - default: 8 + default: 17 branch: description: Branch to run the build against required: false type: string # Change 'master' to 'branch-4.0' in branch-4.0 branch after cutting it. - default: branch-3.5 + default: master hadoop: description: Hadoop version to run with. HADOOP_PROFILE environment variable should accept it. required: false @@ -43,14 +43,14 @@ on: description: Additional environment variables to set when running the tests. Should be in JSON format. required: false type: string - default: '{"SCALA_PROFILE": "scala2.13", "PYTHON_TO_TEST": "", "ORACLE_DOCKER_IMAGE_NAME": "gvenzl/oracle-xe:21.3.0"}' + default: '{}' jobs: description: >- Jobs to run, and should be in JSON format. The values should be matched with the job's key defined in this file, e.g., build. See precondition job below. required: false type: string - default: '{"build": "true","pyspark": "true","sparkr": "true","tpcds-1g": "true","docker-integration-tests": "true","lint" : "true"}' + default: '' jobs: precondition: name: Check changes @@ -376,7 +376,8 @@ jobs: SKIP_MIMA: true SKIP_PACKAGING: true METASPACE_SIZE: 1g - BRANCH: ${{ inputs.branch }} + BRANCH: branch-3.5 + java: 8 steps: - name: Checkout Spark repository uses: actions/checkout@v4 @@ -384,17 +385,7 @@ jobs: with: fetch-depth: 0 repository: apache/spark - ref: ${{ inputs.branch }} - - name: Add GITHUB_WORKSPACE to git trust safe.directory - run: | - git config --global --add safe.directory ${GITHUB_WORKSPACE} - - name: Sync the current branch with the latest in Apache Spark - if: github.repository != 'apache/spark' - run: | - echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV - git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/} - git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD - git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty + ref: branch-3.5 # Cache local repositories. Note that GitHub Actions cache has a 10G limit. - name: Cache Scala, SBT and Maven uses: actions/cache@v4 @@ -424,7 +415,7 @@ jobs: uses: actions/setup-java@v4 with: distribution: zulu - java-version: ${{ matrix.java }} + java-version: 8 - name: List Python packages (${{ env.PYTHON_TO_TEST }}) env: ${{ fromJSON(inputs.envs) }} shell: 'script -q -e -c "bash {0}"' diff --git a/.github/workflows/build_branch35.yml b/.github/workflows/build_branch35.yml index fec64f70f24d..9e6fe13c020e 100644 --- a/.github/workflows/build_branch35.yml +++ b/.github/workflows/build_branch35.yml @@ -20,41 +20,8 @@ name: "Build (branch-3.5, Scala 2.13, Hadoop 3, JDK 8)" on: - workflow_call: - inputs: - java: - required: false - type: string - default: 8 - branch: - description: Branch to run the build against - required: false - type: string - default: branch-3.5 - hadoop: - description: Hadoop version to run with. HADOOP_PROFILE environment variable should accept it. - required: false - type: string - default: hadoop3 - envs: - description: Additional environment variables to set when running the tests. Should be in JSON format. - required: false - type: string - default: '{"SCALA_PROFILE": "scala2.13", "PYTHON_TO_TEST": "", "ORACLE_DOCKER_IMAGE_NAME": "gvenzl/oracle-xe:21.3.0"}' - jobs: - description: >- - Jobs to run, and should be in JSON format. The values should be matched with the job's key defined - in this file, e.g., build. See precondition job below. - required: false - type: string - default: '{ - "build": "true", - "pyspark": "true", - "sparkr": "true", - "tpcds-1g": "true", - "docker-integration-tests": "true", - "lint" : "true" - }' + schedule: + - cron: '0 11 * * *' jobs: run-build: From 411076934fe273962cb7851e74f44f4481fd78a8 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Mon, 18 Mar 2024 09:55:34 +0800 Subject: [PATCH 10/71] fix --- .github/workflows/build_and_test.yml | 61 ---------------------------- 1 file changed, 61 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index f8785eb35918..22c3633c6a9c 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -789,67 +789,6 @@ jobs: path: site.tar.bz2 retention-days: 1 - maven-build: - needs: precondition - if: fromJson(needs.precondition.outputs.required).maven-build == 'true' - name: Java ${{ matrix.java }} build with Maven (${{ matrix.os }}) - strategy: - fail-fast: false - matrix: - include: - - java: 17 - os: ubuntu-latest - - java: 21 - os: ubuntu-latest - - java: 21 - os: macos-14 - runs-on: ${{ matrix.os }} - timeout-minutes: 300 - steps: - - name: Checkout Spark repository - uses: actions/checkout@v4 - with: - fetch-depth: 0 - repository: apache/spark - ref: ${{ inputs.branch }} - - name: Sync the current branch with the latest in Apache Spark - if: github.repository != 'apache/spark' - run: | - git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/} - git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD - git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty - - name: Cache Scala, SBT and Maven - uses: actions/cache@v4 - with: - path: | - build/apache-maven-* - build/scala-* - build/*.jar - ~/.sbt - key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }} - restore-keys: | - build- - - name: Cache Maven local repository - uses: actions/cache@v4 - with: - path: ~/.m2/repository - key: java${{ matrix.java }}-maven-${{ hashFiles('**/pom.xml') }} - restore-keys: | - java${{ matrix.java }}-maven- - - name: Install Java ${{ matrix.java }} - uses: actions/setup-java@v4 - with: - distribution: zulu - java-version: ${{ matrix.java }} - - name: Build with Maven - run: | - export MAVEN_OPTS="-Xss64m -Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN" - export MAVEN_CLI_OPTS="--no-transfer-progress" - export JAVA_VERSION=${{ matrix.java }} - # It uses Maven's 'install' intentionally, see https://github.com/apache/spark/pull/26414. - ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Djava.version=${JAVA_VERSION/-ea} install - rm -rf ~/.m2/repository/org/apache/spark - # Any TPC-DS related updates on this job need to be applied to tpcds-1g-gen job of benchmark.yml as well tpcds-1g: needs: precondition From 669db8f4e953b1e18134b59ca639b11fc6391cf7 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Mon, 18 Mar 2024 11:12:33 +0800 Subject: [PATCH 11/71] fix it --- .github/workflows/build_and_test.yml | 420 +-------------------------- 1 file changed, 9 insertions(+), 411 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 22c3633c6a9c..459b84689b18 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -20,8 +20,6 @@ name: Build and test on: - schedule: - - cron: '0 15 * * *' workflow_call: inputs: java: @@ -119,171 +117,6 @@ jobs: IMG_URL="ghcr.io/$REPO_OWNER/$IMG_NAME" echo "image_url=$IMG_URL" >> $GITHUB_OUTPUT - # Build: build Spark and run the tests for specified modules. - build: - name: "Build modules: ${{ matrix.modules }} ${{ matrix.comment }}" - needs: precondition - if: fromJson(needs.precondition.outputs.required).build == 'true' - runs-on: ubuntu-latest - timeout-minutes: 300 - strategy: - fail-fast: false - matrix: - java: - - ${{ inputs.java }} - hadoop: - - ${{ inputs.hadoop }} - hive: - - hive2.3 - # Note that the modules below are from sparktestsupport/modules.py. - modules: - - >- - core, unsafe, kvstore, avro, utils, - network-common, network-shuffle, repl, launcher, - examples, sketch, variant - - >- - api, catalyst, hive-thriftserver - - >- - mllib-local, mllib, graphx - - >- - streaming, sql-kafka-0-10, streaming-kafka-0-10, streaming-kinesis-asl, - kubernetes, hadoop-cloud, spark-ganglia-lgpl, protobuf - - >- - yarn, connect - # Here, we split Hive and SQL tests into some of slow ones and the rest of them. - included-tags: [""] - excluded-tags: [""] - comment: [""] - include: - # Hive tests - - modules: hive - java: ${{ inputs.java }} - hadoop: ${{ inputs.hadoop }} - hive: hive2.3 - included-tags: org.apache.spark.tags.SlowHiveTest - comment: "- slow tests" - - modules: hive - java: ${{ inputs.java }} - hadoop: ${{ inputs.hadoop }} - hive: hive2.3 - excluded-tags: org.apache.spark.tags.SlowHiveTest - comment: "- other tests" - # SQL tests - - modules: sql - java: ${{ inputs.java }} - hadoop: ${{ inputs.hadoop }} - hive: hive2.3 - included-tags: org.apache.spark.tags.ExtendedSQLTest - comment: "- extended tests" - - modules: sql - java: ${{ inputs.java }} - hadoop: ${{ inputs.hadoop }} - hive: hive2.3 - included-tags: org.apache.spark.tags.SlowSQLTest - comment: "- slow tests" - - modules: sql - java: ${{ inputs.java }} - hadoop: ${{ inputs.hadoop }} - hive: hive2.3 - excluded-tags: org.apache.spark.tags.ExtendedSQLTest,org.apache.spark.tags.SlowSQLTest - comment: "- other tests" - env: - MODULES_TO_TEST: ${{ matrix.modules }} - EXCLUDED_TAGS: ${{ matrix.excluded-tags }} - INCLUDED_TAGS: ${{ matrix.included-tags }} - HADOOP_PROFILE: ${{ matrix.hadoop }} - HIVE_PROFILE: ${{ matrix.hive }} - GITHUB_PREV_SHA: ${{ github.event.before }} - SPARK_LOCAL_IP: localhost - SKIP_UNIDOC: true - SKIP_MIMA: true - SKIP_PACKAGING: true - steps: - - name: Checkout Spark repository - uses: actions/checkout@v4 - # In order to fetch changed files - with: - fetch-depth: 0 - repository: apache/spark - ref: ${{ inputs.branch }} - - name: Sync the current branch with the latest in Apache Spark - if: github.repository != 'apache/spark' - run: | - echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV - git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/} - git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD - git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty - # Cache local repositories. Note that GitHub Actions cache has a 10G limit. - - name: Cache Scala, SBT and Maven - uses: actions/cache@v4 - with: - path: | - build/apache-maven-* - build/scala-* - build/*.jar - ~/.sbt - key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }} - restore-keys: | - build- - - name: Cache Coursier local repository - uses: actions/cache@v4 - with: - path: ~/.cache/coursier - key: ${{ matrix.java }}-${{ matrix.hadoop }}-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }} - restore-keys: | - ${{ matrix.java }}-${{ matrix.hadoop }}-coursier- - - name: Free up disk space - run: | - if [ -f ./dev/free_disk_space ]; then - ./dev/free_disk_space - fi - - name: Install Java ${{ matrix.java }} - uses: actions/setup-java@v4 - with: - distribution: zulu - java-version: ${{ matrix.java }} - - name: Install Python 3.9 - uses: actions/setup-python@v5 - # We should install one Python that is higher than 3+ for SQL and Yarn because: - # - SQL component also has Python related tests, for example, IntegratedUDFTestUtils. - # - Yarn has a Python specific test too, for example, YarnClusterSuite. - if: contains(matrix.modules, 'yarn') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) || contains(matrix.modules, 'connect') - with: - python-version: '3.9' - architecture: x64 - - name: Install Python packages (Python 3.9) - if: (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) || contains(matrix.modules, 'connect') - run: | - python3.9 -m pip install 'numpy>=1.20.0' pyarrow pandas scipy unittest-xml-reporting 'lxml==4.9.4' 'grpcio==1.62.0' 'grpcio-status==1.62.0' 'protobuf==4.25.1' - python3.9 -m pip list - # Run the tests. - - name: Run tests - env: ${{ fromJSON(inputs.envs) }} - shell: 'script -q -e -c "bash {0}"' - run: | - # Fix for TTY related issues when launching the Ammonite REPL in tests. - export TERM=vt100 - # Hive "other tests" test needs larger metaspace size based on experiment. - if [[ "$MODULES_TO_TEST" == "hive" ]] && [[ "$EXCLUDED_TAGS" == "org.apache.spark.tags.SlowHiveTest" ]]; then export METASPACE_SIZE=2g; fi - # SPARK-46283: should delete the following env replacement after SPARK 3.x EOL - if [[ "$MODULES_TO_TEST" == *"streaming-kinesis-asl"* ]] && [[ "${{ inputs.branch }}" =~ ^branch-3 ]]; then - MODULES_TO_TEST=${MODULES_TO_TEST//streaming-kinesis-asl, /} - fi - export SERIAL_SBT_TESTS=1 - ./dev/run-tests --parallelism 1 --modules "$MODULES_TO_TEST" --included-tags "$INCLUDED_TAGS" --excluded-tags "$EXCLUDED_TAGS" - - name: Upload test results to report - if: always() - uses: actions/upload-artifact@v4 - with: - name: test-results-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }} - path: "**/target/test-reports/*.xml" - - name: Upload unit tests log files - if: ${{ !success() }} - uses: actions/upload-artifact@v4 - with: - name: unit-tests-log-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }} - path: "**/target/unit-tests.log" - infra-image: name: "Base image build" needs: precondition @@ -343,7 +176,9 @@ jobs: fail-fast: false matrix: java: - - ${{ inputs.java }} + - 8 + branch: + - branch-3.5 modules: - >- pyspark-sql, pyspark-resource, pyspark-testing @@ -367,7 +202,7 @@ jobs: pyspark-pandas-connect-part3 env: MODULES_TO_TEST: ${{ matrix.modules }} - PYTHON_TO_TEST: 'python3.9' + PYTHON_TO_TEST: '' HADOOP_PROFILE: ${{ inputs.hadoop }} HIVE_PROFILE: hive2.3 GITHUB_PREV_SHA: ${{ github.event.before }} @@ -376,8 +211,8 @@ jobs: SKIP_MIMA: true SKIP_PACKAGING: true METASPACE_SIZE: 1g - BRANCH: branch-3.5 - java: 8 + "SCALA_PROFILE": "scala2.13", + "ORACLE_DOCKER_IMAGE_NAME": "gvenzl/oracle-xe:21.3.0" steps: - name: Checkout Spark repository uses: actions/checkout@v4 @@ -386,6 +221,9 @@ jobs: fetch-depth: 0 repository: apache/spark ref: branch-3.5 + - name: Add GITHUB_WORKSPACE to git trust safe.directory + run: | + git config --global --add safe.directory ${GITHUB_WORKSPACE} # Cache local repositories. Note that GitHub Actions cache has a 10G limit. - name: Cache Scala, SBT and Maven uses: actions/cache@v4 @@ -477,85 +315,6 @@ jobs: name: unit-tests-log-${{ matrix.modules }}--${{ matrix.java }}-${{ inputs.hadoop }}-hive2.3-${{ env.PYTHON_TO_TEST }} path: "**/target/unit-tests.log" - sparkr: - needs: [precondition, infra-image] - # always run if sparkr == 'true', even infra-image is skip (such as non-master job) - if: (!cancelled()) && fromJson(needs.precondition.outputs.required).sparkr == 'true' - name: "Build modules: sparkr" - runs-on: ubuntu-latest - timeout-minutes: 300 - container: - image: ${{ needs.precondition.outputs.image_url }} - env: - HADOOP_PROFILE: ${{ inputs.hadoop }} - HIVE_PROFILE: hive2.3 - GITHUB_PREV_SHA: ${{ github.event.before }} - SPARK_LOCAL_IP: localhost - SKIP_UNIDOC: true - SKIP_MIMA: true - SKIP_PACKAGING: true - steps: - - name: Checkout Spark repository - uses: actions/checkout@v4 - # In order to fetch changed files - with: - fetch-depth: 0 - repository: apache/spark - ref: ${{ inputs.branch }} - - name: Add GITHUB_WORKSPACE to git trust safe.directory - run: | - git config --global --add safe.directory ${GITHUB_WORKSPACE} - - name: Sync the current branch with the latest in Apache Spark - if: github.repository != 'apache/spark' - run: | - echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV - git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/} - git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD - git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty - # Cache local repositories. Note that GitHub Actions cache has a 10G limit. - - name: Cache Scala, SBT and Maven - uses: actions/cache@v4 - with: - path: | - build/apache-maven-* - build/scala-* - build/*.jar - ~/.sbt - key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }} - restore-keys: | - build- - - name: Cache Coursier local repository - uses: actions/cache@v4 - with: - path: ~/.cache/coursier - key: sparkr-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }} - restore-keys: | - sparkr-coursier- - - name: Free up disk space - run: | - if [ -f ./dev/free_disk_space_container ]; then - ./dev/free_disk_space_container - fi - - name: Install Java ${{ inputs.java }} - uses: actions/setup-java@v4 - with: - distribution: zulu - java-version: ${{ inputs.java }} - - name: Run tests - env: ${{ fromJSON(inputs.envs) }} - run: | - # The followings are also used by `r-lib/actions/setup-r` to avoid - # R issues at docker environment - export TZ=UTC - export _R_CHECK_SYSTEM_CLOCK_=FALSE - ./dev/run-tests --parallelism 1 --modules sparkr - - name: Upload test results to report - if: always() - uses: actions/upload-artifact@v4 - with: - name: test-results-sparkr--${{ inputs.java }}-${{ inputs.hadoop }}-hive2.3 - path: "**/target/test-reports/*.xml" - buf: needs: [precondition] if: (!cancelled()) && fromJson(needs.precondition.outputs.required).buf == 'true' @@ -889,164 +648,3 @@ jobs: with: name: unit-tests-log-tpcds--${{ inputs.java }}-${{ inputs.hadoop }}-hive2.3 path: "**/target/unit-tests.log" - - docker-integration-tests: - needs: precondition - if: fromJson(needs.precondition.outputs.required).docker-integration-tests == 'true' - name: Run Docker integration tests - runs-on: ubuntu-latest - timeout-minutes: 300 - env: - HADOOP_PROFILE: ${{ inputs.hadoop }} - HIVE_PROFILE: hive2.3 - GITHUB_PREV_SHA: ${{ github.event.before }} - SPARK_LOCAL_IP: localhost - ORACLE_DOCKER_IMAGE_NAME: gvenzl/oracle-free:23.3 - SKIP_UNIDOC: true - SKIP_MIMA: true - SKIP_PACKAGING: true - steps: - - name: Checkout Spark repository - uses: actions/checkout@v4 - with: - fetch-depth: 0 - repository: apache/spark - ref: ${{ inputs.branch }} - - name: Sync the current branch with the latest in Apache Spark - if: github.repository != 'apache/spark' - run: | - echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV - git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/} - git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD - git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty - - name: Cache Scala, SBT and Maven - uses: actions/cache@v4 - with: - path: | - build/apache-maven-* - build/scala-* - build/*.jar - ~/.sbt - key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }} - restore-keys: | - build- - - name: Cache Coursier local repository - uses: actions/cache@v4 - with: - path: ~/.cache/coursier - key: docker-integration-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }} - restore-keys: | - docker-integration-coursier- - - name: Install Java ${{ inputs.java }} - uses: actions/setup-java@v4 - with: - distribution: zulu - java-version: ${{ inputs.java }} - - name: Run tests - env: ${{ fromJSON(inputs.envs) }} - run: | - ./dev/run-tests --parallelism 1 --modules docker-integration-tests --included-tags org.apache.spark.tags.DockerTest - - name: Upload test results to report - if: always() - uses: actions/upload-artifact@v4 - with: - name: test-results-docker-integration--${{ inputs.java }}-${{ inputs.hadoop }}-hive2.3 - path: "**/target/test-reports/*.xml" - - name: Upload unit tests log files - if: ${{ !success() }} - uses: actions/upload-artifact@v4 - with: - name: unit-tests-log-docker-integration--${{ inputs.java }}-${{ inputs.hadoop }}-hive2.3 - path: "**/target/unit-tests.log" - - k8s-integration-tests: - needs: precondition - if: fromJson(needs.precondition.outputs.required).k8s-integration-tests == 'true' - name: Run Spark on Kubernetes Integration test - runs-on: ubuntu-latest - timeout-minutes: 300 - steps: - - name: Checkout Spark repository - uses: actions/checkout@v4 - with: - fetch-depth: 0 - repository: apache/spark - ref: ${{ inputs.branch }} - - name: Sync the current branch with the latest in Apache Spark - if: github.repository != 'apache/spark' - run: | - echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV - git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/} - git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD - git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty - - name: Cache Scala, SBT and Maven - uses: actions/cache@v4 - with: - path: | - build/apache-maven-* - build/scala-* - build/*.jar - ~/.sbt - key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }} - restore-keys: | - build- - - name: Cache Coursier local repository - uses: actions/cache@v4 - with: - path: ~/.cache/coursier - key: k8s-integration-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }} - restore-keys: | - k8s-integration-coursier- - - name: Install Java ${{ inputs.java }} - uses: actions/setup-java@v4 - with: - distribution: zulu - java-version: ${{ inputs.java }} - - name: start minikube - run: | - # See more in "Installation" https://minikube.sigs.k8s.io/docs/start/ - curl -LO https://storage.googleapis.com/minikube/releases/latest/minikube-linux-amd64 - sudo install minikube-linux-amd64 /usr/local/bin/minikube - rm minikube-linux-amd64 - # Github Action limit cpu:2, memory: 6947MB, limit to 2U6G for better resource statistic - minikube start --cpus 2 --memory 6144 - - name: Print K8S pods and nodes info - run: | - kubectl get pods -A - kubectl describe node - - name: Run Spark on K8S integration test - run: | - # Prepare PV test - PVC_TMP_DIR=$(mktemp -d) - export PVC_TESTS_HOST_PATH=$PVC_TMP_DIR - export PVC_TESTS_VM_PATH=$PVC_TMP_DIR - minikube mount ${PVC_TESTS_HOST_PATH}:${PVC_TESTS_VM_PATH} --gid=0 --uid=185 & - kubectl create clusterrolebinding serviceaccounts-cluster-admin --clusterrole=cluster-admin --group=system:serviceaccounts || true - kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.8.2/installer/volcano-development.yaml || true - eval $(minikube docker-env) - build/sbt -Phadoop-3 -Psparkr -Pkubernetes -Pvolcano -Pkubernetes-integration-tests -Dspark.kubernetes.test.volcanoMaxConcurrencyJobNum=1 -Dtest.exclude.tags=local "kubernetes-integration-tests/test" - - name: Upload Spark on K8S integration tests log files - if: ${{ !success() }} - uses: actions/upload-artifact@v4 - with: - name: spark-on-kubernetes-it-log - path: "**/target/integration-tests.log" - - ui: - needs: [precondition] - if: fromJson(needs.precondition.outputs.required).ui == 'true' - name: Run Spark UI tests - runs-on: ubuntu-latest - timeout-minutes: 300 - steps: - - uses: actions/checkout@v4 - - name: Use Node.js - uses: actions/setup-node@v4 - with: - node-version: 20 - cache: 'npm' - cache-dependency-path: ui-test/package-lock.json - - run: | - cd ui-test - npm install --save-dev - node --experimental-vm-modules node_modules/.bin/jest From 02efe1d611271f3b9ef1f32d2aeb5773038ffd09 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Mon, 18 Mar 2024 11:14:57 +0800 Subject: [PATCH 12/71] fix it --- .github/workflows/build_and_test.yml | 405 +++++++++++++++++++++++++++ 1 file changed, 405 insertions(+) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 459b84689b18..5df0245516e9 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -117,6 +117,171 @@ jobs: IMG_URL="ghcr.io/$REPO_OWNER/$IMG_NAME" echo "image_url=$IMG_URL" >> $GITHUB_OUTPUT + # Build: build Spark and run the tests for specified modules. + build: + name: "Build modules: ${{ matrix.modules }} ${{ matrix.comment }}" + needs: precondition + if: fromJson(needs.precondition.outputs.required).build == 'true' + runs-on: ubuntu-latest + timeout-minutes: 300 + strategy: + fail-fast: false + matrix: + java: + - ${{ inputs.java }} + hadoop: + - ${{ inputs.hadoop }} + hive: + - hive2.3 + # Note that the modules below are from sparktestsupport/modules.py. + modules: + - >- + core, unsafe, kvstore, avro, utils, + network-common, network-shuffle, repl, launcher, + examples, sketch, variant + - >- + api, catalyst, hive-thriftserver + - >- + mllib-local, mllib, graphx + - >- + streaming, sql-kafka-0-10, streaming-kafka-0-10, streaming-kinesis-asl, + kubernetes, hadoop-cloud, spark-ganglia-lgpl, protobuf + - >- + yarn, connect + # Here, we split Hive and SQL tests into some of slow ones and the rest of them. + included-tags: [""] + excluded-tags: [""] + comment: [""] + include: + # Hive tests + - modules: hive + java: ${{ inputs.java }} + hadoop: ${{ inputs.hadoop }} + hive: hive2.3 + included-tags: org.apache.spark.tags.SlowHiveTest + comment: "- slow tests" + - modules: hive + java: ${{ inputs.java }} + hadoop: ${{ inputs.hadoop }} + hive: hive2.3 + excluded-tags: org.apache.spark.tags.SlowHiveTest + comment: "- other tests" + # SQL tests + - modules: sql + java: ${{ inputs.java }} + hadoop: ${{ inputs.hadoop }} + hive: hive2.3 + included-tags: org.apache.spark.tags.ExtendedSQLTest + comment: "- extended tests" + - modules: sql + java: ${{ inputs.java }} + hadoop: ${{ inputs.hadoop }} + hive: hive2.3 + included-tags: org.apache.spark.tags.SlowSQLTest + comment: "- slow tests" + - modules: sql + java: ${{ inputs.java }} + hadoop: ${{ inputs.hadoop }} + hive: hive2.3 + excluded-tags: org.apache.spark.tags.ExtendedSQLTest,org.apache.spark.tags.SlowSQLTest + comment: "- other tests" + env: + MODULES_TO_TEST: ${{ matrix.modules }} + EXCLUDED_TAGS: ${{ matrix.excluded-tags }} + INCLUDED_TAGS: ${{ matrix.included-tags }} + HADOOP_PROFILE: ${{ matrix.hadoop }} + HIVE_PROFILE: ${{ matrix.hive }} + GITHUB_PREV_SHA: ${{ github.event.before }} + SPARK_LOCAL_IP: localhost + SKIP_UNIDOC: true + SKIP_MIMA: true + SKIP_PACKAGING: true + steps: + - name: Checkout Spark repository + uses: actions/checkout@v4 + # In order to fetch changed files + with: + fetch-depth: 0 + repository: apache/spark + ref: ${{ inputs.branch }} + - name: Sync the current branch with the latest in Apache Spark + if: github.repository != 'apache/spark' + run: | + echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV + git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/} + git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD + git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty + # Cache local repositories. Note that GitHub Actions cache has a 10G limit. + - name: Cache Scala, SBT and Maven + uses: actions/cache@v4 + with: + path: | + build/apache-maven-* + build/scala-* + build/*.jar + ~/.sbt + key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }} + restore-keys: | + build- + - name: Cache Coursier local repository + uses: actions/cache@v4 + with: + path: ~/.cache/coursier + key: ${{ matrix.java }}-${{ matrix.hadoop }}-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }} + restore-keys: | + ${{ matrix.java }}-${{ matrix.hadoop }}-coursier- + - name: Free up disk space + run: | + if [ -f ./dev/free_disk_space ]; then + ./dev/free_disk_space + fi + - name: Install Java ${{ matrix.java }} + uses: actions/setup-java@v4 + with: + distribution: zulu + java-version: ${{ matrix.java }} + - name: Install Python 3.9 + uses: actions/setup-python@v5 + # We should install one Python that is higher than 3+ for SQL and Yarn because: + # - SQL component also has Python related tests, for example, IntegratedUDFTestUtils. + # - Yarn has a Python specific test too, for example, YarnClusterSuite. + if: contains(matrix.modules, 'yarn') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) || contains(matrix.modules, 'connect') + with: + python-version: '3.9' + architecture: x64 + - name: Install Python packages (Python 3.9) + if: (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) || contains(matrix.modules, 'connect') + run: | + python3.9 -m pip install 'numpy>=1.20.0' pyarrow pandas scipy unittest-xml-reporting 'lxml==4.9.4' 'grpcio==1.62.0' 'grpcio-status==1.62.0' 'protobuf==4.25.1' + python3.9 -m pip list + # Run the tests. + - name: Run tests + env: ${{ fromJSON(inputs.envs) }} + shell: 'script -q -e -c "bash {0}"' + run: | + # Fix for TTY related issues when launching the Ammonite REPL in tests. + export TERM=vt100 + # Hive "other tests" test needs larger metaspace size based on experiment. + if [[ "$MODULES_TO_TEST" == "hive" ]] && [[ "$EXCLUDED_TAGS" == "org.apache.spark.tags.SlowHiveTest" ]]; then export METASPACE_SIZE=2g; fi + # SPARK-46283: should delete the following env replacement after SPARK 3.x EOL + if [[ "$MODULES_TO_TEST" == *"streaming-kinesis-asl"* ]] && [[ "${{ inputs.branch }}" =~ ^branch-3 ]]; then + MODULES_TO_TEST=${MODULES_TO_TEST//streaming-kinesis-asl, /} + fi + export SERIAL_SBT_TESTS=1 + ./dev/run-tests --parallelism 1 --modules "$MODULES_TO_TEST" --included-tags "$INCLUDED_TAGS" --excluded-tags "$EXCLUDED_TAGS" + - name: Upload test results to report + if: always() + uses: actions/upload-artifact@v4 + with: + name: test-results-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }} + path: "**/target/test-reports/*.xml" + - name: Upload unit tests log files + if: ${{ !success() }} + uses: actions/upload-artifact@v4 + with: + name: unit-tests-log-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }} + path: "**/target/unit-tests.log" + infra-image: name: "Base image build" needs: precondition @@ -315,6 +480,85 @@ jobs: name: unit-tests-log-${{ matrix.modules }}--${{ matrix.java }}-${{ inputs.hadoop }}-hive2.3-${{ env.PYTHON_TO_TEST }} path: "**/target/unit-tests.log" + sparkr: + needs: [precondition, infra-image] + # always run if sparkr == 'true', even infra-image is skip (such as non-master job) + if: (!cancelled()) && fromJson(needs.precondition.outputs.required).sparkr == 'true' + name: "Build modules: sparkr" + runs-on: ubuntu-latest + timeout-minutes: 300 + container: + image: ${{ needs.precondition.outputs.image_url }} + env: + HADOOP_PROFILE: ${{ inputs.hadoop }} + HIVE_PROFILE: hive2.3 + GITHUB_PREV_SHA: ${{ github.event.before }} + SPARK_LOCAL_IP: localhost + SKIP_UNIDOC: true + SKIP_MIMA: true + SKIP_PACKAGING: true + steps: + - name: Checkout Spark repository + uses: actions/checkout@v4 + # In order to fetch changed files + with: + fetch-depth: 0 + repository: apache/spark + ref: ${{ inputs.branch }} + - name: Add GITHUB_WORKSPACE to git trust safe.directory + run: | + git config --global --add safe.directory ${GITHUB_WORKSPACE} + - name: Sync the current branch with the latest in Apache Spark + if: github.repository != 'apache/spark' + run: | + echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV + git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/} + git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD + git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty + # Cache local repositories. Note that GitHub Actions cache has a 10G limit. + - name: Cache Scala, SBT and Maven + uses: actions/cache@v4 + with: + path: | + build/apache-maven-* + build/scala-* + build/*.jar + ~/.sbt + key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }} + restore-keys: | + build- + - name: Cache Coursier local repository + uses: actions/cache@v4 + with: + path: ~/.cache/coursier + key: sparkr-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }} + restore-keys: | + sparkr-coursier- + - name: Free up disk space + run: | + if [ -f ./dev/free_disk_space_container ]; then + ./dev/free_disk_space_container + fi + - name: Install Java ${{ inputs.java }} + uses: actions/setup-java@v4 + with: + distribution: zulu + java-version: ${{ inputs.java }} + - name: Run tests + env: ${{ fromJSON(inputs.envs) }} + run: | + # The followings are also used by `r-lib/actions/setup-r` to avoid + # R issues at docker environment + export TZ=UTC + export _R_CHECK_SYSTEM_CLOCK_=FALSE + ./dev/run-tests --parallelism 1 --modules sparkr + - name: Upload test results to report + if: always() + uses: actions/upload-artifact@v4 + with: + name: test-results-sparkr--${{ inputs.java }}-${{ inputs.hadoop }}-hive2.3 + path: "**/target/test-reports/*.xml" + buf: needs: [precondition] if: (!cancelled()) && fromJson(needs.precondition.outputs.required).buf == 'true' @@ -648,3 +892,164 @@ jobs: with: name: unit-tests-log-tpcds--${{ inputs.java }}-${{ inputs.hadoop }}-hive2.3 path: "**/target/unit-tests.log" + + docker-integration-tests: + needs: precondition + if: fromJson(needs.precondition.outputs.required).docker-integration-tests == 'true' + name: Run Docker integration tests + runs-on: ubuntu-latest + timeout-minutes: 300 + env: + HADOOP_PROFILE: ${{ inputs.hadoop }} + HIVE_PROFILE: hive2.3 + GITHUB_PREV_SHA: ${{ github.event.before }} + SPARK_LOCAL_IP: localhost + ORACLE_DOCKER_IMAGE_NAME: gvenzl/oracle-free:23.3 + SKIP_UNIDOC: true + SKIP_MIMA: true + SKIP_PACKAGING: true + steps: + - name: Checkout Spark repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + repository: apache/spark + ref: ${{ inputs.branch }} + - name: Sync the current branch with the latest in Apache Spark + if: github.repository != 'apache/spark' + run: | + echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV + git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/} + git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD + git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty + - name: Cache Scala, SBT and Maven + uses: actions/cache@v4 + with: + path: | + build/apache-maven-* + build/scala-* + build/*.jar + ~/.sbt + key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }} + restore-keys: | + build- + - name: Cache Coursier local repository + uses: actions/cache@v4 + with: + path: ~/.cache/coursier + key: docker-integration-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }} + restore-keys: | + docker-integration-coursier- + - name: Install Java ${{ inputs.java }} + uses: actions/setup-java@v4 + with: + distribution: zulu + java-version: ${{ inputs.java }} + - name: Run tests + env: ${{ fromJSON(inputs.envs) }} + run: | + ./dev/run-tests --parallelism 1 --modules docker-integration-tests --included-tags org.apache.spark.tags.DockerTest + - name: Upload test results to report + if: always() + uses: actions/upload-artifact@v4 + with: + name: test-results-docker-integration--${{ inputs.java }}-${{ inputs.hadoop }}-hive2.3 + path: "**/target/test-reports/*.xml" + - name: Upload unit tests log files + if: ${{ !success() }} + uses: actions/upload-artifact@v4 + with: + name: unit-tests-log-docker-integration--${{ inputs.java }}-${{ inputs.hadoop }}-hive2.3 + path: "**/target/unit-tests.log" + + k8s-integration-tests: + needs: precondition + if: fromJson(needs.precondition.outputs.required).k8s-integration-tests == 'true' + name: Run Spark on Kubernetes Integration test + runs-on: ubuntu-latest + timeout-minutes: 300 + steps: + - name: Checkout Spark repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + repository: apache/spark + ref: ${{ inputs.branch }} + - name: Sync the current branch with the latest in Apache Spark + if: github.repository != 'apache/spark' + run: | + echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV + git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/} + git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD + git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty + - name: Cache Scala, SBT and Maven + uses: actions/cache@v4 + with: + path: | + build/apache-maven-* + build/scala-* + build/*.jar + ~/.sbt + key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }} + restore-keys: | + build- + - name: Cache Coursier local repository + uses: actions/cache@v4 + with: + path: ~/.cache/coursier + key: k8s-integration-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }} + restore-keys: | + k8s-integration-coursier- + - name: Install Java ${{ inputs.java }} + uses: actions/setup-java@v4 + with: + distribution: zulu + java-version: ${{ inputs.java }} + - name: start minikube + run: | + # See more in "Installation" https://minikube.sigs.k8s.io/docs/start/ + curl -LO https://storage.googleapis.com/minikube/releases/latest/minikube-linux-amd64 + sudo install minikube-linux-amd64 /usr/local/bin/minikube + rm minikube-linux-amd64 + # Github Action limit cpu:2, memory: 6947MB, limit to 2U6G for better resource statistic + minikube start --cpus 2 --memory 6144 + - name: Print K8S pods and nodes info + run: | + kubectl get pods -A + kubectl describe node + - name: Run Spark on K8S integration test + run: | + # Prepare PV test + PVC_TMP_DIR=$(mktemp -d) + export PVC_TESTS_HOST_PATH=$PVC_TMP_DIR + export PVC_TESTS_VM_PATH=$PVC_TMP_DIR + minikube mount ${PVC_TESTS_HOST_PATH}:${PVC_TESTS_VM_PATH} --gid=0 --uid=185 & + kubectl create clusterrolebinding serviceaccounts-cluster-admin --clusterrole=cluster-admin --group=system:serviceaccounts || true + kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.8.2/installer/volcano-development.yaml || true + eval $(minikube docker-env) + build/sbt -Phadoop-3 -Psparkr -Pkubernetes -Pvolcano -Pkubernetes-integration-tests -Dspark.kubernetes.test.volcanoMaxConcurrencyJobNum=1 -Dtest.exclude.tags=local "kubernetes-integration-tests/test" + - name: Upload Spark on K8S integration tests log files + if: ${{ !success() }} + uses: actions/upload-artifact@v4 + with: + name: spark-on-kubernetes-it-log + path: "**/target/integration-tests.log" + + ui: + needs: [precondition] + if: fromJson(needs.precondition.outputs.required).ui == 'true' + name: Run Spark UI tests + runs-on: ubuntu-latest + timeout-minutes: 300 + steps: + - uses: actions/checkout@v4 + - name: Use Node.js + uses: actions/setup-node@v4 + with: + node-version: 20 + cache: 'npm' + cache-dependency-path: ui-test/package-lock.json + - run: | + cd ui-test + npm install --save-dev + node --experimental-vm-modules node_modules/.bin/jest From 323d0b3d036873fd385916b1ae31fe8bac38664f Mon Sep 17 00:00:00 2001 From: panbingkun Date: Mon, 18 Mar 2024 11:23:35 +0800 Subject: [PATCH 13/71] fix --- .github/workflows/build_and_test.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 5df0245516e9..9162dfc8046a 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -341,9 +341,7 @@ jobs: fail-fast: false matrix: java: - - 8 - branch: - - branch-3.5 + - ${{ inputs.java }} modules: - >- pyspark-sql, pyspark-resource, pyspark-testing From 156ff37227bd9ff95b203f329e3a7ebfa24c820f Mon Sep 17 00:00:00 2001 From: panbingkun Date: Mon, 18 Mar 2024 11:25:15 +0800 Subject: [PATCH 14/71] Trigger build From b029989f4c2933e1f9d9a273cbccceae38f8f831 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Mon, 18 Mar 2024 11:27:05 +0800 Subject: [PATCH 15/71] fix it --- .github/workflows/build_and_test.yml | 61 ++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 9162dfc8046a..9e2dcbc12801 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -790,6 +790,67 @@ jobs: path: site.tar.bz2 retention-days: 1 + maven-build: + needs: precondition + if: fromJson(needs.precondition.outputs.required).maven-build == 'true' + name: Java ${{ matrix.java }} build with Maven (${{ matrix.os }}) + strategy: + fail-fast: false + matrix: + include: + - java: 17 + os: ubuntu-latest + - java: 21 + os: ubuntu-latest + - java: 21 + os: macos-14 + runs-on: ${{ matrix.os }} + timeout-minutes: 300 + steps: + - name: Checkout Spark repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + repository: apache/spark + ref: ${{ inputs.branch }} + - name: Sync the current branch with the latest in Apache Spark + if: github.repository != 'apache/spark' + run: | + git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/} + git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD + git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty + - name: Cache Scala, SBT and Maven + uses: actions/cache@v4 + with: + path: | + build/apache-maven-* + build/scala-* + build/*.jar + ~/.sbt + key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }} + restore-keys: | + build- + - name: Cache Maven local repository + uses: actions/cache@v4 + with: + path: ~/.m2/repository + key: java${{ matrix.java }}-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: | + java${{ matrix.java }}-maven- + - name: Install Java ${{ matrix.java }} + uses: actions/setup-java@v4 + with: + distribution: zulu + java-version: ${{ matrix.java }} + - name: Build with Maven + run: | + export MAVEN_OPTS="-Xss64m -Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN" + export MAVEN_CLI_OPTS="--no-transfer-progress" + export JAVA_VERSION=${{ matrix.java }} + # It uses Maven's 'install' intentionally, see https://github.com/apache/spark/pull/26414. + ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Djava.version=${JAVA_VERSION/-ea} install + rm -rf ~/.m2/repository/org/apache/spark + # Any TPC-DS related updates on this job need to be applied to tpcds-1g-gen job of benchmark.yml as well tpcds-1g: needs: precondition From 99587658df225419e5b567e438efc7da9cb02469 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Mon, 18 Mar 2024 11:28:25 +0800 Subject: [PATCH 16/71] fix it --- .github/workflows/build_and_test.yml | 86 ++++++++++++++-------------- 1 file changed, 43 insertions(+), 43 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 9e2dcbc12801..ca2951c69435 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -807,49 +807,49 @@ jobs: runs-on: ${{ matrix.os }} timeout-minutes: 300 steps: - - name: Checkout Spark repository - uses: actions/checkout@v4 - with: - fetch-depth: 0 - repository: apache/spark - ref: ${{ inputs.branch }} - - name: Sync the current branch with the latest in Apache Spark - if: github.repository != 'apache/spark' - run: | - git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/} - git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD - git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty - - name: Cache Scala, SBT and Maven - uses: actions/cache@v4 - with: - path: | - build/apache-maven-* - build/scala-* - build/*.jar - ~/.sbt - key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }} - restore-keys: | - build- - - name: Cache Maven local repository - uses: actions/cache@v4 - with: - path: ~/.m2/repository - key: java${{ matrix.java }}-maven-${{ hashFiles('**/pom.xml') }} - restore-keys: | - java${{ matrix.java }}-maven- - - name: Install Java ${{ matrix.java }} - uses: actions/setup-java@v4 - with: - distribution: zulu - java-version: ${{ matrix.java }} - - name: Build with Maven - run: | - export MAVEN_OPTS="-Xss64m -Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN" - export MAVEN_CLI_OPTS="--no-transfer-progress" - export JAVA_VERSION=${{ matrix.java }} - # It uses Maven's 'install' intentionally, see https://github.com/apache/spark/pull/26414. - ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Djava.version=${JAVA_VERSION/-ea} install - rm -rf ~/.m2/repository/org/apache/spark + - name: Checkout Spark repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + repository: apache/spark + ref: ${{ inputs.branch }} + - name: Sync the current branch with the latest in Apache Spark + if: github.repository != 'apache/spark' + run: | + git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/} + git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD + git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty + - name: Cache Scala, SBT and Maven + uses: actions/cache@v4 + with: + path: | + build/apache-maven-* + build/scala-* + build/*.jar + ~/.sbt + key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }} + restore-keys: | + build- + - name: Cache Maven local repository + uses: actions/cache@v4 + with: + path: ~/.m2/repository + key: java${{ matrix.java }}-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: | + java${{ matrix.java }}-maven- + - name: Install Java ${{ matrix.java }} + uses: actions/setup-java@v4 + with: + distribution: zulu + java-version: ${{ matrix.java }} + - name: Build with Maven + run: | + export MAVEN_OPTS="-Xss64m -Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN" + export MAVEN_CLI_OPTS="--no-transfer-progress" + export JAVA_VERSION=${{ matrix.java }} + # It uses Maven's 'install' intentionally, see https://github.com/apache/spark/pull/26414. + ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Djava.version=${JAVA_VERSION/-ea} install + rm -rf ~/.m2/repository/org/apache/spark # Any TPC-DS related updates on this job need to be applied to tpcds-1g-gen job of benchmark.yml as well tpcds-1g: From 2bc62ec5771eabec67033fb34f9cc56bbd5c88d6 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Mon, 18 Mar 2024 11:30:57 +0800 Subject: [PATCH 17/71] fix it --- .github/workflows/build_and_test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index ca2951c69435..b3dee0449997 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -374,8 +374,8 @@ jobs: SKIP_MIMA: true SKIP_PACKAGING: true METASPACE_SIZE: 1g - "SCALA_PROFILE": "scala2.13", - "ORACLE_DOCKER_IMAGE_NAME": "gvenzl/oracle-xe:21.3.0" + SCALA_PROFILE: scala2.13, + ORACLE_DOCKER_IMAGE_NAME: gvenzl/oracle-xe:21.3.0 steps: - name: Checkout Spark repository uses: actions/checkout@v4 From f8ac6b519fdbbdd640fd8c32b61ba0c3ec175d27 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Mon, 18 Mar 2024 11:32:15 +0800 Subject: [PATCH 18/71] fix it --- .github/workflows/build_and_test.yml | 301 --------------------------- 1 file changed, 301 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index b3dee0449997..4eb19049d987 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -478,85 +478,6 @@ jobs: name: unit-tests-log-${{ matrix.modules }}--${{ matrix.java }}-${{ inputs.hadoop }}-hive2.3-${{ env.PYTHON_TO_TEST }} path: "**/target/unit-tests.log" - sparkr: - needs: [precondition, infra-image] - # always run if sparkr == 'true', even infra-image is skip (such as non-master job) - if: (!cancelled()) && fromJson(needs.precondition.outputs.required).sparkr == 'true' - name: "Build modules: sparkr" - runs-on: ubuntu-latest - timeout-minutes: 300 - container: - image: ${{ needs.precondition.outputs.image_url }} - env: - HADOOP_PROFILE: ${{ inputs.hadoop }} - HIVE_PROFILE: hive2.3 - GITHUB_PREV_SHA: ${{ github.event.before }} - SPARK_LOCAL_IP: localhost - SKIP_UNIDOC: true - SKIP_MIMA: true - SKIP_PACKAGING: true - steps: - - name: Checkout Spark repository - uses: actions/checkout@v4 - # In order to fetch changed files - with: - fetch-depth: 0 - repository: apache/spark - ref: ${{ inputs.branch }} - - name: Add GITHUB_WORKSPACE to git trust safe.directory - run: | - git config --global --add safe.directory ${GITHUB_WORKSPACE} - - name: Sync the current branch with the latest in Apache Spark - if: github.repository != 'apache/spark' - run: | - echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV - git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/} - git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD - git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty - # Cache local repositories. Note that GitHub Actions cache has a 10G limit. - - name: Cache Scala, SBT and Maven - uses: actions/cache@v4 - with: - path: | - build/apache-maven-* - build/scala-* - build/*.jar - ~/.sbt - key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }} - restore-keys: | - build- - - name: Cache Coursier local repository - uses: actions/cache@v4 - with: - path: ~/.cache/coursier - key: sparkr-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }} - restore-keys: | - sparkr-coursier- - - name: Free up disk space - run: | - if [ -f ./dev/free_disk_space_container ]; then - ./dev/free_disk_space_container - fi - - name: Install Java ${{ inputs.java }} - uses: actions/setup-java@v4 - with: - distribution: zulu - java-version: ${{ inputs.java }} - - name: Run tests - env: ${{ fromJSON(inputs.envs) }} - run: | - # The followings are also used by `r-lib/actions/setup-r` to avoid - # R issues at docker environment - export TZ=UTC - export _R_CHECK_SYSTEM_CLOCK_=FALSE - ./dev/run-tests --parallelism 1 --modules sparkr - - name: Upload test results to report - if: always() - uses: actions/upload-artifact@v4 - with: - name: test-results-sparkr--${{ inputs.java }}-${{ inputs.hadoop }}-hive2.3 - path: "**/target/test-reports/*.xml" - buf: needs: [precondition] if: (!cancelled()) && fromJson(needs.precondition.outputs.required).buf == 'true' @@ -790,67 +711,6 @@ jobs: path: site.tar.bz2 retention-days: 1 - maven-build: - needs: precondition - if: fromJson(needs.precondition.outputs.required).maven-build == 'true' - name: Java ${{ matrix.java }} build with Maven (${{ matrix.os }}) - strategy: - fail-fast: false - matrix: - include: - - java: 17 - os: ubuntu-latest - - java: 21 - os: ubuntu-latest - - java: 21 - os: macos-14 - runs-on: ${{ matrix.os }} - timeout-minutes: 300 - steps: - - name: Checkout Spark repository - uses: actions/checkout@v4 - with: - fetch-depth: 0 - repository: apache/spark - ref: ${{ inputs.branch }} - - name: Sync the current branch with the latest in Apache Spark - if: github.repository != 'apache/spark' - run: | - git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/} - git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD - git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty - - name: Cache Scala, SBT and Maven - uses: actions/cache@v4 - with: - path: | - build/apache-maven-* - build/scala-* - build/*.jar - ~/.sbt - key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }} - restore-keys: | - build- - - name: Cache Maven local repository - uses: actions/cache@v4 - with: - path: ~/.m2/repository - key: java${{ matrix.java }}-maven-${{ hashFiles('**/pom.xml') }} - restore-keys: | - java${{ matrix.java }}-maven- - - name: Install Java ${{ matrix.java }} - uses: actions/setup-java@v4 - with: - distribution: zulu - java-version: ${{ matrix.java }} - - name: Build with Maven - run: | - export MAVEN_OPTS="-Xss64m -Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN" - export MAVEN_CLI_OPTS="--no-transfer-progress" - export JAVA_VERSION=${{ matrix.java }} - # It uses Maven's 'install' intentionally, see https://github.com/apache/spark/pull/26414. - ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Djava.version=${JAVA_VERSION/-ea} install - rm -rf ~/.m2/repository/org/apache/spark - # Any TPC-DS related updates on this job need to be applied to tpcds-1g-gen job of benchmark.yml as well tpcds-1g: needs: precondition @@ -951,164 +811,3 @@ jobs: with: name: unit-tests-log-tpcds--${{ inputs.java }}-${{ inputs.hadoop }}-hive2.3 path: "**/target/unit-tests.log" - - docker-integration-tests: - needs: precondition - if: fromJson(needs.precondition.outputs.required).docker-integration-tests == 'true' - name: Run Docker integration tests - runs-on: ubuntu-latest - timeout-minutes: 300 - env: - HADOOP_PROFILE: ${{ inputs.hadoop }} - HIVE_PROFILE: hive2.3 - GITHUB_PREV_SHA: ${{ github.event.before }} - SPARK_LOCAL_IP: localhost - ORACLE_DOCKER_IMAGE_NAME: gvenzl/oracle-free:23.3 - SKIP_UNIDOC: true - SKIP_MIMA: true - SKIP_PACKAGING: true - steps: - - name: Checkout Spark repository - uses: actions/checkout@v4 - with: - fetch-depth: 0 - repository: apache/spark - ref: ${{ inputs.branch }} - - name: Sync the current branch with the latest in Apache Spark - if: github.repository != 'apache/spark' - run: | - echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV - git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/} - git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD - git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty - - name: Cache Scala, SBT and Maven - uses: actions/cache@v4 - with: - path: | - build/apache-maven-* - build/scala-* - build/*.jar - ~/.sbt - key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }} - restore-keys: | - build- - - name: Cache Coursier local repository - uses: actions/cache@v4 - with: - path: ~/.cache/coursier - key: docker-integration-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }} - restore-keys: | - docker-integration-coursier- - - name: Install Java ${{ inputs.java }} - uses: actions/setup-java@v4 - with: - distribution: zulu - java-version: ${{ inputs.java }} - - name: Run tests - env: ${{ fromJSON(inputs.envs) }} - run: | - ./dev/run-tests --parallelism 1 --modules docker-integration-tests --included-tags org.apache.spark.tags.DockerTest - - name: Upload test results to report - if: always() - uses: actions/upload-artifact@v4 - with: - name: test-results-docker-integration--${{ inputs.java }}-${{ inputs.hadoop }}-hive2.3 - path: "**/target/test-reports/*.xml" - - name: Upload unit tests log files - if: ${{ !success() }} - uses: actions/upload-artifact@v4 - with: - name: unit-tests-log-docker-integration--${{ inputs.java }}-${{ inputs.hadoop }}-hive2.3 - path: "**/target/unit-tests.log" - - k8s-integration-tests: - needs: precondition - if: fromJson(needs.precondition.outputs.required).k8s-integration-tests == 'true' - name: Run Spark on Kubernetes Integration test - runs-on: ubuntu-latest - timeout-minutes: 300 - steps: - - name: Checkout Spark repository - uses: actions/checkout@v4 - with: - fetch-depth: 0 - repository: apache/spark - ref: ${{ inputs.branch }} - - name: Sync the current branch with the latest in Apache Spark - if: github.repository != 'apache/spark' - run: | - echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV - git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/} - git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD - git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty - - name: Cache Scala, SBT and Maven - uses: actions/cache@v4 - with: - path: | - build/apache-maven-* - build/scala-* - build/*.jar - ~/.sbt - key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }} - restore-keys: | - build- - - name: Cache Coursier local repository - uses: actions/cache@v4 - with: - path: ~/.cache/coursier - key: k8s-integration-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }} - restore-keys: | - k8s-integration-coursier- - - name: Install Java ${{ inputs.java }} - uses: actions/setup-java@v4 - with: - distribution: zulu - java-version: ${{ inputs.java }} - - name: start minikube - run: | - # See more in "Installation" https://minikube.sigs.k8s.io/docs/start/ - curl -LO https://storage.googleapis.com/minikube/releases/latest/minikube-linux-amd64 - sudo install minikube-linux-amd64 /usr/local/bin/minikube - rm minikube-linux-amd64 - # Github Action limit cpu:2, memory: 6947MB, limit to 2U6G for better resource statistic - minikube start --cpus 2 --memory 6144 - - name: Print K8S pods and nodes info - run: | - kubectl get pods -A - kubectl describe node - - name: Run Spark on K8S integration test - run: | - # Prepare PV test - PVC_TMP_DIR=$(mktemp -d) - export PVC_TESTS_HOST_PATH=$PVC_TMP_DIR - export PVC_TESTS_VM_PATH=$PVC_TMP_DIR - minikube mount ${PVC_TESTS_HOST_PATH}:${PVC_TESTS_VM_PATH} --gid=0 --uid=185 & - kubectl create clusterrolebinding serviceaccounts-cluster-admin --clusterrole=cluster-admin --group=system:serviceaccounts || true - kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.8.2/installer/volcano-development.yaml || true - eval $(minikube docker-env) - build/sbt -Phadoop-3 -Psparkr -Pkubernetes -Pvolcano -Pkubernetes-integration-tests -Dspark.kubernetes.test.volcanoMaxConcurrencyJobNum=1 -Dtest.exclude.tags=local "kubernetes-integration-tests/test" - - name: Upload Spark on K8S integration tests log files - if: ${{ !success() }} - uses: actions/upload-artifact@v4 - with: - name: spark-on-kubernetes-it-log - path: "**/target/integration-tests.log" - - ui: - needs: [precondition] - if: fromJson(needs.precondition.outputs.required).ui == 'true' - name: Run Spark UI tests - runs-on: ubuntu-latest - timeout-minutes: 300 - steps: - - uses: actions/checkout@v4 - - name: Use Node.js - uses: actions/setup-node@v4 - with: - node-version: 20 - cache: 'npm' - cache-dependency-path: ui-test/package-lock.json - - run: | - cd ui-test - npm install --save-dev - node --experimental-vm-modules node_modules/.bin/jest From 7ddaa7bcb7ba9288edaaafcdb7ad65c66c8c5b0b Mon Sep 17 00:00:00 2001 From: panbingkun Date: Mon, 18 Mar 2024 11:36:26 +0800 Subject: [PATCH 19/71] fix it --- .github/workflows/build_and_test.yml | 172 +-------------------------- 1 file changed, 5 insertions(+), 167 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 4eb19049d987..df5c0bfe42da 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -117,171 +117,6 @@ jobs: IMG_URL="ghcr.io/$REPO_OWNER/$IMG_NAME" echo "image_url=$IMG_URL" >> $GITHUB_OUTPUT - # Build: build Spark and run the tests for specified modules. - build: - name: "Build modules: ${{ matrix.modules }} ${{ matrix.comment }}" - needs: precondition - if: fromJson(needs.precondition.outputs.required).build == 'true' - runs-on: ubuntu-latest - timeout-minutes: 300 - strategy: - fail-fast: false - matrix: - java: - - ${{ inputs.java }} - hadoop: - - ${{ inputs.hadoop }} - hive: - - hive2.3 - # Note that the modules below are from sparktestsupport/modules.py. - modules: - - >- - core, unsafe, kvstore, avro, utils, - network-common, network-shuffle, repl, launcher, - examples, sketch, variant - - >- - api, catalyst, hive-thriftserver - - >- - mllib-local, mllib, graphx - - >- - streaming, sql-kafka-0-10, streaming-kafka-0-10, streaming-kinesis-asl, - kubernetes, hadoop-cloud, spark-ganglia-lgpl, protobuf - - >- - yarn, connect - # Here, we split Hive and SQL tests into some of slow ones and the rest of them. - included-tags: [""] - excluded-tags: [""] - comment: [""] - include: - # Hive tests - - modules: hive - java: ${{ inputs.java }} - hadoop: ${{ inputs.hadoop }} - hive: hive2.3 - included-tags: org.apache.spark.tags.SlowHiveTest - comment: "- slow tests" - - modules: hive - java: ${{ inputs.java }} - hadoop: ${{ inputs.hadoop }} - hive: hive2.3 - excluded-tags: org.apache.spark.tags.SlowHiveTest - comment: "- other tests" - # SQL tests - - modules: sql - java: ${{ inputs.java }} - hadoop: ${{ inputs.hadoop }} - hive: hive2.3 - included-tags: org.apache.spark.tags.ExtendedSQLTest - comment: "- extended tests" - - modules: sql - java: ${{ inputs.java }} - hadoop: ${{ inputs.hadoop }} - hive: hive2.3 - included-tags: org.apache.spark.tags.SlowSQLTest - comment: "- slow tests" - - modules: sql - java: ${{ inputs.java }} - hadoop: ${{ inputs.hadoop }} - hive: hive2.3 - excluded-tags: org.apache.spark.tags.ExtendedSQLTest,org.apache.spark.tags.SlowSQLTest - comment: "- other tests" - env: - MODULES_TO_TEST: ${{ matrix.modules }} - EXCLUDED_TAGS: ${{ matrix.excluded-tags }} - INCLUDED_TAGS: ${{ matrix.included-tags }} - HADOOP_PROFILE: ${{ matrix.hadoop }} - HIVE_PROFILE: ${{ matrix.hive }} - GITHUB_PREV_SHA: ${{ github.event.before }} - SPARK_LOCAL_IP: localhost - SKIP_UNIDOC: true - SKIP_MIMA: true - SKIP_PACKAGING: true - steps: - - name: Checkout Spark repository - uses: actions/checkout@v4 - # In order to fetch changed files - with: - fetch-depth: 0 - repository: apache/spark - ref: ${{ inputs.branch }} - - name: Sync the current branch with the latest in Apache Spark - if: github.repository != 'apache/spark' - run: | - echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV - git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/} - git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD - git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty - # Cache local repositories. Note that GitHub Actions cache has a 10G limit. - - name: Cache Scala, SBT and Maven - uses: actions/cache@v4 - with: - path: | - build/apache-maven-* - build/scala-* - build/*.jar - ~/.sbt - key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }} - restore-keys: | - build- - - name: Cache Coursier local repository - uses: actions/cache@v4 - with: - path: ~/.cache/coursier - key: ${{ matrix.java }}-${{ matrix.hadoop }}-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }} - restore-keys: | - ${{ matrix.java }}-${{ matrix.hadoop }}-coursier- - - name: Free up disk space - run: | - if [ -f ./dev/free_disk_space ]; then - ./dev/free_disk_space - fi - - name: Install Java ${{ matrix.java }} - uses: actions/setup-java@v4 - with: - distribution: zulu - java-version: ${{ matrix.java }} - - name: Install Python 3.9 - uses: actions/setup-python@v5 - # We should install one Python that is higher than 3+ for SQL and Yarn because: - # - SQL component also has Python related tests, for example, IntegratedUDFTestUtils. - # - Yarn has a Python specific test too, for example, YarnClusterSuite. - if: contains(matrix.modules, 'yarn') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) || contains(matrix.modules, 'connect') - with: - python-version: '3.9' - architecture: x64 - - name: Install Python packages (Python 3.9) - if: (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) || contains(matrix.modules, 'connect') - run: | - python3.9 -m pip install 'numpy>=1.20.0' pyarrow pandas scipy unittest-xml-reporting 'lxml==4.9.4' 'grpcio==1.62.0' 'grpcio-status==1.62.0' 'protobuf==4.25.1' - python3.9 -m pip list - # Run the tests. - - name: Run tests - env: ${{ fromJSON(inputs.envs) }} - shell: 'script -q -e -c "bash {0}"' - run: | - # Fix for TTY related issues when launching the Ammonite REPL in tests. - export TERM=vt100 - # Hive "other tests" test needs larger metaspace size based on experiment. - if [[ "$MODULES_TO_TEST" == "hive" ]] && [[ "$EXCLUDED_TAGS" == "org.apache.spark.tags.SlowHiveTest" ]]; then export METASPACE_SIZE=2g; fi - # SPARK-46283: should delete the following env replacement after SPARK 3.x EOL - if [[ "$MODULES_TO_TEST" == *"streaming-kinesis-asl"* ]] && [[ "${{ inputs.branch }}" =~ ^branch-3 ]]; then - MODULES_TO_TEST=${MODULES_TO_TEST//streaming-kinesis-asl, /} - fi - export SERIAL_SBT_TESTS=1 - ./dev/run-tests --parallelism 1 --modules "$MODULES_TO_TEST" --included-tags "$INCLUDED_TAGS" --excluded-tags "$EXCLUDED_TAGS" - - name: Upload test results to report - if: always() - uses: actions/upload-artifact@v4 - with: - name: test-results-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }} - path: "**/target/test-reports/*.xml" - - name: Upload unit tests log files - if: ${{ !success() }} - uses: actions/upload-artifact@v4 - with: - name: unit-tests-log-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }} - path: "**/target/unit-tests.log" - infra-image: name: "Base image build" needs: precondition @@ -341,7 +176,9 @@ jobs: fail-fast: false matrix: java: - - ${{ inputs.java }} + - 8 + branch: + - branch-3.5 modules: - >- pyspark-sql, pyspark-resource, pyspark-testing @@ -374,6 +211,7 @@ jobs: SKIP_MIMA: true SKIP_PACKAGING: true METASPACE_SIZE: 1g + BRANCH: ${{ inputs.branch }} SCALA_PROFILE: scala2.13, ORACLE_DOCKER_IMAGE_NAME: gvenzl/oracle-xe:21.3.0 steps: @@ -416,7 +254,7 @@ jobs: uses: actions/setup-java@v4 with: distribution: zulu - java-version: 8 + java-version: ${{ matrix.java }} - name: List Python packages (${{ env.PYTHON_TO_TEST }}) env: ${{ fromJSON(inputs.envs) }} shell: 'script -q -e -c "bash {0}"' From 8b7b6dc3f385b9662aa6b2048d088cb47d858a12 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Mon, 18 Mar 2024 11:52:19 +0800 Subject: [PATCH 20/71] test --- .github/workflows/build_and_test.yml | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index df5c0bfe42da..315034964308 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -221,10 +221,17 @@ jobs: with: fetch-depth: 0 repository: apache/spark - ref: branch-3.5 + ref: ${{ inputs.branch }} - name: Add GITHUB_WORKSPACE to git trust safe.directory run: | git config --global --add safe.directory ${GITHUB_WORKSPACE} + - name: Sync the current branch with the latest in Apache Spark + if: github.repository != 'apache/spark' + run: | + echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV + git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/} + git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD + git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty # Cache local repositories. Note that GitHub Actions cache has a 10G limit. - name: Cache Scala, SBT and Maven uses: actions/cache@v4 From c5db8d0d807bafbd90e3cae48958ba9ecafbd25e Mon Sep 17 00:00:00 2001 From: panbingkun Date: Mon, 18 Mar 2024 12:44:53 +0800 Subject: [PATCH 21/71] fix SCALA_PROFILE --- .github/workflows/build_and_test.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 315034964308..97452954a1e3 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -212,8 +212,7 @@ jobs: SKIP_PACKAGING: true METASPACE_SIZE: 1g BRANCH: ${{ inputs.branch }} - SCALA_PROFILE: scala2.13, - ORACLE_DOCKER_IMAGE_NAME: gvenzl/oracle-xe:21.3.0 + SCALA_PROFILE: "scala2.13", steps: - name: Checkout Spark repository uses: actions/checkout@v4 From f7a3c76bb4f1afc28670125c0f14a41ff4594936 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Mon, 18 Mar 2024 13:12:31 +0800 Subject: [PATCH 22/71] fix --- .github/workflows/build_and_test.yml | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 97452954a1e3..a2ba44579cad 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -211,21 +211,25 @@ jobs: SKIP_MIMA: true SKIP_PACKAGING: true METASPACE_SIZE: 1g - BRANCH: ${{ inputs.branch }} - SCALA_PROFILE: "scala2.13", + BRANCH: branch-3.5 steps: - name: Checkout Spark repository uses: actions/checkout@v4 + env: ${{ fromJSON(inputs.envs) }} # In order to fetch changed files with: fetch-depth: 0 repository: apache/spark - ref: ${{ inputs.branch }} + ref: branch-3.5 - name: Add GITHUB_WORKSPACE to git trust safe.directory + env: ${{ fromJSON(inputs.envs) }} run: | git config --global --add safe.directory ${GITHUB_WORKSPACE} - name: Sync the current branch with the latest in Apache Spark if: github.repository != 'apache/spark' + env: ${{ fromJSON(inputs.envs) }} + with: + ref: branch-3.5 run: | echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/} @@ -281,13 +285,17 @@ jobs: run: | python3.9 -m pip install 'numpy==1.24.4' 'pyarrow==12.0.1' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' - name: Install Python test dependencies for branch-3.5 - if: inputs.branch == 'branch-3.5' + env: ${{ fromJSON(inputs.envs) }} run: | python3.9 -m pip install 'numpy==1.25.1' 'pyarrow==12.0.1' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' # Run the tests. - name: Run tests env: ${{ fromJSON(inputs.envs) }} shell: 'script -q -e -c "bash {0}"' + with: + java: 8 + branch: branch-3.5 + hadoop: hadoop3 run: | if [[ "$MODULES_TO_TEST" == *"pyspark-errors"* ]]; then export PATH=$PATH:$HOME/miniconda/bin From f60cb2e758ffe7236321085cabd32ce41960a1fd Mon Sep 17 00:00:00 2001 From: panbingkun Date: Mon, 18 Mar 2024 13:16:45 +0800 Subject: [PATCH 23/71] fix --- .github/workflows/build_and_test.yml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index a2ba44579cad..d413f15cb7ac 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -211,7 +211,7 @@ jobs: SKIP_MIMA: true SKIP_PACKAGING: true METASPACE_SIZE: 1g - BRANCH: branch-3.5 + BRANCH: ${{ inputs.branch }} steps: - name: Checkout Spark repository uses: actions/checkout@v4 @@ -221,6 +221,7 @@ jobs: fetch-depth: 0 repository: apache/spark ref: branch-3.5 + branch: branch-3.5 - name: Add GITHUB_WORKSPACE to git trust safe.directory env: ${{ fromJSON(inputs.envs) }} run: | @@ -292,10 +293,6 @@ jobs: - name: Run tests env: ${{ fromJSON(inputs.envs) }} shell: 'script -q -e -c "bash {0}"' - with: - java: 8 - branch: branch-3.5 - hadoop: hadoop3 run: | if [[ "$MODULES_TO_TEST" == *"pyspark-errors"* ]]; then export PATH=$PATH:$HOME/miniconda/bin From eb11a6439eefaabdce54c92213d1e1d08c04233c Mon Sep 17 00:00:00 2001 From: panbingkun Date: Mon, 18 Mar 2024 13:19:52 +0800 Subject: [PATCH 24/71] Revert "fix" This reverts commit f60cb2e758ffe7236321085cabd32ce41960a1fd. --- .github/workflows/build_and_test.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index d413f15cb7ac..a2ba44579cad 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -211,7 +211,7 @@ jobs: SKIP_MIMA: true SKIP_PACKAGING: true METASPACE_SIZE: 1g - BRANCH: ${{ inputs.branch }} + BRANCH: branch-3.5 steps: - name: Checkout Spark repository uses: actions/checkout@v4 @@ -221,7 +221,6 @@ jobs: fetch-depth: 0 repository: apache/spark ref: branch-3.5 - branch: branch-3.5 - name: Add GITHUB_WORKSPACE to git trust safe.directory env: ${{ fromJSON(inputs.envs) }} run: | @@ -293,6 +292,10 @@ jobs: - name: Run tests env: ${{ fromJSON(inputs.envs) }} shell: 'script -q -e -c "bash {0}"' + with: + java: 8 + branch: branch-3.5 + hadoop: hadoop3 run: | if [[ "$MODULES_TO_TEST" == *"pyspark-errors"* ]]; then export PATH=$PATH:$HOME/miniconda/bin From 0d29f7d5075aaed94c7e451fb53a14c229e274b8 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Mon, 18 Mar 2024 13:20:04 +0800 Subject: [PATCH 25/71] Revert "fix" This reverts commit f7a3c76bb4f1afc28670125c0f14a41ff4594936. --- .github/workflows/build_and_test.yml | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index a2ba44579cad..97452954a1e3 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -211,25 +211,21 @@ jobs: SKIP_MIMA: true SKIP_PACKAGING: true METASPACE_SIZE: 1g - BRANCH: branch-3.5 + BRANCH: ${{ inputs.branch }} + SCALA_PROFILE: "scala2.13", steps: - name: Checkout Spark repository uses: actions/checkout@v4 - env: ${{ fromJSON(inputs.envs) }} # In order to fetch changed files with: fetch-depth: 0 repository: apache/spark - ref: branch-3.5 + ref: ${{ inputs.branch }} - name: Add GITHUB_WORKSPACE to git trust safe.directory - env: ${{ fromJSON(inputs.envs) }} run: | git config --global --add safe.directory ${GITHUB_WORKSPACE} - name: Sync the current branch with the latest in Apache Spark if: github.repository != 'apache/spark' - env: ${{ fromJSON(inputs.envs) }} - with: - ref: branch-3.5 run: | echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/} @@ -285,17 +281,13 @@ jobs: run: | python3.9 -m pip install 'numpy==1.24.4' 'pyarrow==12.0.1' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' - name: Install Python test dependencies for branch-3.5 - env: ${{ fromJSON(inputs.envs) }} + if: inputs.branch == 'branch-3.5' run: | python3.9 -m pip install 'numpy==1.25.1' 'pyarrow==12.0.1' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' # Run the tests. - name: Run tests env: ${{ fromJSON(inputs.envs) }} shell: 'script -q -e -c "bash {0}"' - with: - java: 8 - branch: branch-3.5 - hadoop: hadoop3 run: | if [[ "$MODULES_TO_TEST" == *"pyspark-errors"* ]]; then export PATH=$PATH:$HOME/miniconda/bin From 57fc86f40e554111a1e461315f0e8ffa0ce9142b Mon Sep 17 00:00:00 2001 From: panbingkun Date: Mon, 18 Mar 2024 13:20:31 +0800 Subject: [PATCH 26/71] Revert "fix SCALA_PROFILE" This reverts commit c5db8d0d807bafbd90e3cae48958ba9ecafbd25e. --- .github/workflows/build_and_test.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 97452954a1e3..315034964308 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -212,7 +212,8 @@ jobs: SKIP_PACKAGING: true METASPACE_SIZE: 1g BRANCH: ${{ inputs.branch }} - SCALA_PROFILE: "scala2.13", + SCALA_PROFILE: scala2.13, + ORACLE_DOCKER_IMAGE_NAME: gvenzl/oracle-xe:21.3.0 steps: - name: Checkout Spark repository uses: actions/checkout@v4 From b11d4035a1748a169efcd8d00a7f7c2c3cd5cb6b Mon Sep 17 00:00:00 2001 From: panbingkun Date: Mon, 18 Mar 2024 13:37:44 +0800 Subject: [PATCH 27/71] fix try --- .github/workflows/build_and_test.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 315034964308..1b266a1f5822 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -221,7 +221,7 @@ jobs: with: fetch-depth: 0 repository: apache/spark - ref: ${{ inputs.branch }} + ref: ${{ matrix.branch }} - name: Add GITHUB_WORKSPACE to git trust safe.directory run: | git config --global --add safe.directory ${GITHUB_WORKSPACE} @@ -278,11 +278,11 @@ jobs: bash miniconda.sh -b -p $HOME/miniconda rm miniconda.sh - name: Install Python test dependencies for branch-3.4 - if: inputs.branch == 'branch-3.4' + if: matrix.branch == 'branch-3.4' run: | python3.9 -m pip install 'numpy==1.24.4' 'pyarrow==12.0.1' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' - name: Install Python test dependencies for branch-3.5 - if: inputs.branch == 'branch-3.5' + if: matrix.branch == 'branch-3.5' run: | python3.9 -m pip install 'numpy==1.25.1' 'pyarrow==12.0.1' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' # Run the tests. @@ -290,6 +290,7 @@ jobs: env: ${{ fromJSON(inputs.envs) }} shell: 'script -q -e -c "bash {0}"' run: | + export SCALA_PROFILE="scala2.13" if [[ "$MODULES_TO_TEST" == *"pyspark-errors"* ]]; then export PATH=$PATH:$HOME/miniconda/bin export SKIP_PACKAGING=false From b61fad547aba4b5c7dd9bdf45f5f26ad4d4d0866 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Mon, 18 Mar 2024 13:42:32 +0800 Subject: [PATCH 28/71] fix --- .github/workflows/build_and_test.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 1b266a1f5822..356b0d1720f6 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -212,8 +212,6 @@ jobs: SKIP_PACKAGING: true METASPACE_SIZE: 1g BRANCH: ${{ inputs.branch }} - SCALA_PROFILE: scala2.13, - ORACLE_DOCKER_IMAGE_NAME: gvenzl/oracle-xe:21.3.0 steps: - name: Checkout Spark repository uses: actions/checkout@v4 From 3cfabac2249c8c6b57c1a2b1f0cdd81afba3a836 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Mon, 18 Mar 2024 14:38:14 +0800 Subject: [PATCH 29/71] test --- .github/workflows/build_and_test.yml | 7 ------- 1 file changed, 7 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 356b0d1720f6..fa68b2e68946 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -223,13 +223,6 @@ jobs: - name: Add GITHUB_WORKSPACE to git trust safe.directory run: | git config --global --add safe.directory ${GITHUB_WORKSPACE} - - name: Sync the current branch with the latest in Apache Spark - if: github.repository != 'apache/spark' - run: | - echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV - git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/} - git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD - git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty # Cache local repositories. Note that GitHub Actions cache has a 10G limit. - name: Cache Scala, SBT and Maven uses: actions/cache@v4 From 133b3fcbfa992354adab7a18e221892c6396b07a Mon Sep 17 00:00:00 2001 From: panbingkun Date: Mon, 18 Mar 2024 14:50:55 +0800 Subject: [PATCH 30/71] fix linter --- .github/workflows/build_and_test.yml | 29 ++-------------------------- 1 file changed, 2 insertions(+), 27 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index fa68b2e68946..4a21d3933424 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -380,17 +380,10 @@ jobs: with: fetch-depth: 0 repository: apache/spark - ref: ${{ inputs.branch }} + ref: branch-3.5 - name: Add GITHUB_WORKSPACE to git trust safe.directory run: | git config --global --add safe.directory ${GITHUB_WORKSPACE} - - name: Sync the current branch with the latest in Apache Spark - if: github.repository != 'apache/spark' - run: | - echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV - git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/} - git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD - git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty # Cache local repositories. Note that GitHub Actions cache has a 10G limit. - name: Cache Scala, SBT and Maven uses: actions/cache@v4 @@ -426,7 +419,7 @@ jobs: uses: actions/setup-java@v4 with: distribution: zulu - java-version: ${{ inputs.java }} + java-version: 8 - name: License test run: ./dev/check-license - name: Dependencies test @@ -447,29 +440,15 @@ jobs: python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.920' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' numpydoc 'jinja2<3.0.0' 'black==22.6.0' python3.9 -m pip install 'pandas-stubs==1.2.0.53' ipython 'grpcio==1.48.1' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' - name: Install Python linter dependencies for branch-3.5 - if: inputs.branch == 'branch-3.5' run: | # SPARK-45212: Copy from https://github.com/apache/spark/blob/555c8def51e5951c7bf5165a332795e9e330ec9d/.github/workflows/build_and_test.yml#L631-L638 # Should delete this section after SPARK 3.5 EOL. python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.982' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' numpydoc 'jinja2<3.0.0' 'black==22.6.0' python3.9 -m pip install 'pandas-stubs==1.2.0.53' ipython 'grpcio==1.56.0' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' - - name: Install Python dependencies for python linter and documentation generation - if: inputs.branch != 'branch-3.4' && inputs.branch != 'branch-3.5' - run: | - # Should unpin 'sphinxcontrib-*' after upgrading sphinx>5 - # See 'ipython_genutils' in SPARK-38517 - # See 'docutils<0.18.0' in SPARK-39421 - python3.9 -m pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0' \ - ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly>=4.8' 'docutils<0.18.0' \ - 'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'black==23.9.1' \ - 'pandas-stubs==1.2.0.53' 'grpcio==1.62.0' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \ - 'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5' - python3.9 -m pip list - name: Python linter run: PYTHON_EXECUTABLE=python3.9 ./dev/lint-python # Should delete this section after SPARK 3.5 EOL. - name: Install dependencies for Python code generation check for branch-3.5 - if: inputs.branch == 'branch-3.5' run: | # See more in "Installation" https://docs.buf.build/installation#tarball curl -LO https://github.com/bufbuild/buf/releases/download/v1.28.1/buf-Linux-x86_64.tar.gz @@ -479,11 +458,9 @@ jobs: python3.9 -m pip install 'protobuf==4.25.1' 'mypy-protobuf==3.3.0' # Should delete this section after SPARK 3.5 EOL. - name: Python code generation check for branch-3.5 - if: inputs.branch == 'branch-3.5' run: if test -f ./dev/connect-check-protos.py; then PATH=$PATH:$HOME/buf/bin PYTHON_EXECUTABLE=python3.9 ./dev/connect-check-protos.py; fi # Should delete this section after SPARK 3.5 EOL. - name: Install JavaScript linter dependencies for branch-3.4, branch-3.5 - if: inputs.branch == 'branch-3.4' || inputs.branch == 'branch-3.5' run: | apt update apt-get install -y nodejs npm @@ -491,7 +468,6 @@ jobs: run: ./dev/lint-js # Should delete this section after SPARK 3.5 EOL. - name: Install R linter dependencies for branch-3.4, branch-3.5 - if: inputs.branch == 'branch-3.4' || inputs.branch == 'branch-3.5' run: | apt update apt-get install -y libcurl4-openssl-dev libgit2-dev libssl-dev libxml2-dev \ @@ -503,7 +479,6 @@ jobs: run: ./R/install-dev.sh # Should delete this section after SPARK 3.5 EOL. - name: Install dependencies for documentation generation for branch-3.4, branch-3.5 - if: inputs.branch == 'branch-3.4' || inputs.branch == 'branch-3.5' run: | # pandoc is required to generate PySpark APIs as well in nbsphinx. apt-get update -y From 65757ca4650373955d881b37ccb56d12e321afbc Mon Sep 17 00:00:00 2001 From: panbingkun Date: Mon, 18 Mar 2024 15:55:04 +0800 Subject: [PATCH 31/71] hack dev/run-tests --- .github/workflows/build_and_test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 4a21d3933424..eb6923fc7024 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -288,10 +288,10 @@ jobs: echo "Python Packaging Tests Enabled!" fi if [ ! -z "$PYTHON_TO_TEST" ]; then - ./dev/run-tests --parallelism 1 --modules "$MODULES_TO_TEST" --python-executables "$PYTHON_TO_TEST" + ./dev/run-tests --parallelism 1 --python-executables "$PYTHON_TO_TEST" else # For branch-3.5 and below, it uses the default Python versions. - ./dev/run-tests --parallelism 1 --modules "$MODULES_TO_TEST" + ./dev/run-tests --parallelism 1 fi - name: Upload coverage to Codecov if: fromJSON(inputs.envs).PYSPARK_CODECOV == 'true' From 20c245d1e540abfd939ef32db4227e149b3e9bb8 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Mon, 18 Mar 2024 15:59:04 +0800 Subject: [PATCH 32/71] hack build_and_test.yml --- .github/workflows/build_and_test.yml | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index eb6923fc7024..c98d7b31e628 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -180,26 +180,8 @@ jobs: branch: - branch-3.5 modules: - - >- - pyspark-sql, pyspark-resource, pyspark-testing - - >- - pyspark-core, pyspark-errors, pyspark-streaming - - >- - pyspark-mllib, pyspark-ml, pyspark-ml-connect - - >- - pyspark-pandas - >- pyspark-pandas-slow - - >- - pyspark-connect - - >- - pyspark-pandas-connect-part0 - - >- - pyspark-pandas-connect-part1 - - >- - pyspark-pandas-connect-part2 - - >- - pyspark-pandas-connect-part3 env: MODULES_TO_TEST: ${{ matrix.modules }} PYTHON_TO_TEST: '' From 38cc01702c1a75c97df8c577fc21172c1dd3b592 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Mon, 18 Mar 2024 17:06:55 +0800 Subject: [PATCH 33/71] fix --- .github/workflows/build_and_test.yml | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index c98d7b31e628..4a21d3933424 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -180,8 +180,26 @@ jobs: branch: - branch-3.5 modules: + - >- + pyspark-sql, pyspark-resource, pyspark-testing + - >- + pyspark-core, pyspark-errors, pyspark-streaming + - >- + pyspark-mllib, pyspark-ml, pyspark-ml-connect + - >- + pyspark-pandas - >- pyspark-pandas-slow + - >- + pyspark-connect + - >- + pyspark-pandas-connect-part0 + - >- + pyspark-pandas-connect-part1 + - >- + pyspark-pandas-connect-part2 + - >- + pyspark-pandas-connect-part3 env: MODULES_TO_TEST: ${{ matrix.modules }} PYTHON_TO_TEST: '' @@ -270,10 +288,10 @@ jobs: echo "Python Packaging Tests Enabled!" fi if [ ! -z "$PYTHON_TO_TEST" ]; then - ./dev/run-tests --parallelism 1 --python-executables "$PYTHON_TO_TEST" + ./dev/run-tests --parallelism 1 --modules "$MODULES_TO_TEST" --python-executables "$PYTHON_TO_TEST" else # For branch-3.5 and below, it uses the default Python versions. - ./dev/run-tests --parallelism 1 + ./dev/run-tests --parallelism 1 --modules "$MODULES_TO_TEST" fi - name: Upload coverage to Codecov if: fromJSON(inputs.envs).PYSPARK_CODECOV == 'true' From 06c783ebcced09883f6e8d4917ae90a9ec98e264 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Mon, 18 Mar 2024 17:10:11 +0800 Subject: [PATCH 34/71] fix --- .github/workflows/build_and_test.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 4a21d3933424..a0fb8ac604c1 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -282,6 +282,7 @@ jobs: shell: 'script -q -e -c "bash {0}"' run: | export SCALA_PROFILE="scala2.13" + export GITHUB_PREV_SHA="" if [[ "$MODULES_TO_TEST" == *"pyspark-errors"* ]]; then export PATH=$PATH:$HOME/miniconda/bin export SKIP_PACKAGING=false From 3b0c74470b7eafb4c4b320c3779de8d31b825bb5 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Mon, 18 Mar 2024 17:26:55 +0800 Subject: [PATCH 35/71] hack run_test --- .github/workflows/build_and_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index a0fb8ac604c1..ae83c42753e9 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -282,7 +282,7 @@ jobs: shell: 'script -q -e -c "bash {0}"' run: | export SCALA_PROFILE="scala2.13" - export GITHUB_PREV_SHA="" + unset GITHUB_ACTIONS if [[ "$MODULES_TO_TEST" == *"pyspark-errors"* ]]; then export PATH=$PATH:$HOME/miniconda/bin export SKIP_PACKAGING=false From 8581c2c561b0236ecfa85f3d478f30088ce735ef Mon Sep 17 00:00:00 2001 From: panbingkun Date: Mon, 18 Mar 2024 19:01:53 +0800 Subject: [PATCH 36/71] pin pandas 2.0.3 --- .github/workflows/build_and_test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index ae83c42753e9..fcb41429df98 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -271,11 +271,11 @@ jobs: - name: Install Python test dependencies for branch-3.4 if: matrix.branch == 'branch-3.4' run: | - python3.9 -m pip install 'numpy==1.24.4' 'pyarrow==12.0.1' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' + python3.9 -m pip install 'numpy==1.24.4' 'pandas<=2.0.3''pyarrow==12.0.1' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' - name: Install Python test dependencies for branch-3.5 if: matrix.branch == 'branch-3.5' run: | - python3.9 -m pip install 'numpy==1.25.1' 'pyarrow==12.0.1' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' + python3.9 -m pip install 'numpy==1.25.1' 'pandas<=2.0.3' 'pyarrow==12.0.1' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' # Run the tests. - name: Run tests env: ${{ fromJSON(inputs.envs) }} From b892aab3ba3b34da57c095b941f5493178db013b Mon Sep 17 00:00:00 2001 From: panbingkun Date: Mon, 18 Mar 2024 19:26:24 +0800 Subject: [PATCH 37/71] fix python liner --- .github/workflows/build_and_test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index fcb41429df98..82f65f1eeba9 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -438,13 +438,13 @@ jobs: run: | # SPARK-44554: Copy from https://github.com/apache/spark/blob/a05c27e85829fe742c1828507a1fd180cdc84b54/.github/workflows/build_and_test.yml#L571-L578 # Should delete this section after SPARK 3.4 EOL. - python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.920' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' numpydoc 'jinja2<3.0.0' 'black==22.6.0' + python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.920' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' numpydoc 'jinja2<3.0.0' 'black==22.6.0' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' python3.9 -m pip install 'pandas-stubs==1.2.0.53' ipython 'grpcio==1.48.1' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' - name: Install Python linter dependencies for branch-3.5 run: | # SPARK-45212: Copy from https://github.com/apache/spark/blob/555c8def51e5951c7bf5165a332795e9e330ec9d/.github/workflows/build_and_test.yml#L631-L638 # Should delete this section after SPARK 3.5 EOL. - python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.982' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' numpydoc 'jinja2<3.0.0' 'black==22.6.0' + python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.982' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' numpydoc 'jinja2<3.0.0' 'black==22.6.0' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' python3.9 -m pip install 'pandas-stubs==1.2.0.53' ipython 'grpcio==1.56.0' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' - name: Python linter run: PYTHON_EXECUTABLE=python3.9 ./dev/lint-python From 3a151f333c156b1674d4e6589f57c1a2939f071a Mon Sep 17 00:00:00 2001 From: panbingkun Date: Mon, 18 Mar 2024 21:22:09 +0800 Subject: [PATCH 38/71] fix python liner --- .github/workflows/build_and_test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 82f65f1eeba9..45bc3f8e8dea 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -438,13 +438,13 @@ jobs: run: | # SPARK-44554: Copy from https://github.com/apache/spark/blob/a05c27e85829fe742c1828507a1fd180cdc84b54/.github/workflows/build_and_test.yml#L571-L578 # Should delete this section after SPARK 3.4 EOL. - python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.920' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' numpydoc 'jinja2<3.0.0' 'black==22.6.0' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' + python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.920' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' numpydoc 'jinja2<3.0.0' 'black==22.6.0' 'pandas<=2.0.3' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' python3.9 -m pip install 'pandas-stubs==1.2.0.53' ipython 'grpcio==1.48.1' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' - name: Install Python linter dependencies for branch-3.5 run: | # SPARK-45212: Copy from https://github.com/apache/spark/blob/555c8def51e5951c7bf5165a332795e9e330ec9d/.github/workflows/build_and_test.yml#L631-L638 # Should delete this section after SPARK 3.5 EOL. - python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.982' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' numpydoc 'jinja2<3.0.0' 'black==22.6.0' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' + python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.982' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' numpydoc 'jinja2<3.0.0' 'black==22.6.0' 'pandas<=2.0.3' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' python3.9 -m pip install 'pandas-stubs==1.2.0.53' ipython 'grpcio==1.56.0' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' - name: Python linter run: PYTHON_EXECUTABLE=python3.9 ./dev/lint-python From 57701c6437c3359a3c0ce88e149f225e1ea87ddc Mon Sep 17 00:00:00 2001 From: panbingkun Date: Mon, 18 Mar 2024 21:29:27 +0800 Subject: [PATCH 39/71] fix --- .github/workflows/build_and_test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 45bc3f8e8dea..c083afbbca59 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -271,11 +271,11 @@ jobs: - name: Install Python test dependencies for branch-3.4 if: matrix.branch == 'branch-3.4' run: | - python3.9 -m pip install 'numpy==1.24.4' 'pandas<=2.0.3''pyarrow==12.0.1' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' + python3.9 -m pip install 'numpy==1.24.4' 'pandas<=2.0.3''pyarrow==12.0.1' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' 'scikit-learn==1.1.*' - name: Install Python test dependencies for branch-3.5 if: matrix.branch == 'branch-3.5' run: | - python3.9 -m pip install 'numpy==1.25.1' 'pandas<=2.0.3' 'pyarrow==12.0.1' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' + python3.9 -m pip install 'numpy==1.25.1' 'pandas<=2.0.3' 'pyarrow==12.0.1' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' 'scikit-learn==1.1.*' # Run the tests. - name: Run tests env: ${{ fromJSON(inputs.envs) }} From 7b293e1f10a89d919f0fb39ad03e609ec6673a96 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Tue, 19 Mar 2024 01:26:03 +0800 Subject: [PATCH 40/71] fix --- .github/workflows/build_and_test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index c083afbbca59..6c8de28dd5c1 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -438,13 +438,13 @@ jobs: run: | # SPARK-44554: Copy from https://github.com/apache/spark/blob/a05c27e85829fe742c1828507a1fd180cdc84b54/.github/workflows/build_and_test.yml#L571-L578 # Should delete this section after SPARK 3.4 EOL. - python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.920' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' numpydoc 'jinja2<3.0.0' 'black==22.6.0' 'pandas<=2.0.3' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' + python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.920' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'numpy==1.25.1' 'pyarrow==12.0.1' numpydoc 'jinja2<3.0.0' 'black==22.6.0' 'pandas<=2.0.3' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' python3.9 -m pip install 'pandas-stubs==1.2.0.53' ipython 'grpcio==1.48.1' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' - name: Install Python linter dependencies for branch-3.5 run: | # SPARK-45212: Copy from https://github.com/apache/spark/blob/555c8def51e5951c7bf5165a332795e9e330ec9d/.github/workflows/build_and_test.yml#L631-L638 # Should delete this section after SPARK 3.5 EOL. - python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.982' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' numpydoc 'jinja2<3.0.0' 'black==22.6.0' 'pandas<=2.0.3' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' + python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.982' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'numpy==1.25.1' 'pyarrow==12.0.1' numpydoc 'jinja2<3.0.0' 'black==22.6.0' 'pandas<=2.0.3' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' python3.9 -m pip install 'pandas-stubs==1.2.0.53' ipython 'grpcio==1.56.0' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' - name: Python linter run: PYTHON_EXECUTABLE=python3.9 ./dev/lint-python From 8ef4aa358370f9c87726a4cb5ff5116d0493eb87 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Tue, 19 Mar 2024 01:43:09 +0800 Subject: [PATCH 41/71] fix --- .github/workflows/build_and_test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 6c8de28dd5c1..40211335e193 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -438,13 +438,13 @@ jobs: run: | # SPARK-44554: Copy from https://github.com/apache/spark/blob/a05c27e85829fe742c1828507a1fd180cdc84b54/.github/workflows/build_and_test.yml#L571-L578 # Should delete this section after SPARK 3.4 EOL. - python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.920' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'numpy==1.25.1' 'pyarrow==12.0.1' numpydoc 'jinja2<3.0.0' 'black==22.6.0' 'pandas<=2.0.3' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' + python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.920' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'numpy==1.25.1' 'pyarrow==12.0.1' numpydoc 'jinja2<3.0.0' 'black==22.6.0' 'pandas<=2.0.3' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' 'importlib_metadata==7.0.2' 'importlib_resources==6.3.1' 'typing_extensions==4.10.0' python3.9 -m pip install 'pandas-stubs==1.2.0.53' ipython 'grpcio==1.48.1' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' - name: Install Python linter dependencies for branch-3.5 run: | # SPARK-45212: Copy from https://github.com/apache/spark/blob/555c8def51e5951c7bf5165a332795e9e330ec9d/.github/workflows/build_and_test.yml#L631-L638 # Should delete this section after SPARK 3.5 EOL. - python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.982' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'numpy==1.25.1' 'pyarrow==12.0.1' numpydoc 'jinja2<3.0.0' 'black==22.6.0' 'pandas<=2.0.3' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' + python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.982' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'numpy==1.25.1' 'pyarrow==12.0.1' numpydoc 'jinja2<3.0.0' 'black==22.6.0' 'pandas<=2.0.3' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' 'importlib_metadata==7.0.2' 'importlib_resources==6.3.1' 'typing_extensions==4.10.0' python3.9 -m pip install 'pandas-stubs==1.2.0.53' ipython 'grpcio==1.56.0' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' - name: Python linter run: PYTHON_EXECUTABLE=python3.9 ./dev/lint-python From 0e09b9a9d0e18bfcb5e51be30b35be733f4b91e1 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Tue, 19 Mar 2024 08:47:53 +0800 Subject: [PATCH 42/71] Revert "fix" This reverts commit 8ef4aa358370f9c87726a4cb5ff5116d0493eb87. --- .github/workflows/build_and_test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 40211335e193..6c8de28dd5c1 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -438,13 +438,13 @@ jobs: run: | # SPARK-44554: Copy from https://github.com/apache/spark/blob/a05c27e85829fe742c1828507a1fd180cdc84b54/.github/workflows/build_and_test.yml#L571-L578 # Should delete this section after SPARK 3.4 EOL. - python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.920' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'numpy==1.25.1' 'pyarrow==12.0.1' numpydoc 'jinja2<3.0.0' 'black==22.6.0' 'pandas<=2.0.3' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' 'importlib_metadata==7.0.2' 'importlib_resources==6.3.1' 'typing_extensions==4.10.0' + python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.920' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'numpy==1.25.1' 'pyarrow==12.0.1' numpydoc 'jinja2<3.0.0' 'black==22.6.0' 'pandas<=2.0.3' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' python3.9 -m pip install 'pandas-stubs==1.2.0.53' ipython 'grpcio==1.48.1' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' - name: Install Python linter dependencies for branch-3.5 run: | # SPARK-45212: Copy from https://github.com/apache/spark/blob/555c8def51e5951c7bf5165a332795e9e330ec9d/.github/workflows/build_and_test.yml#L631-L638 # Should delete this section after SPARK 3.5 EOL. - python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.982' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'numpy==1.25.1' 'pyarrow==12.0.1' numpydoc 'jinja2<3.0.0' 'black==22.6.0' 'pandas<=2.0.3' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' 'importlib_metadata==7.0.2' 'importlib_resources==6.3.1' 'typing_extensions==4.10.0' + python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.982' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'numpy==1.25.1' 'pyarrow==12.0.1' numpydoc 'jinja2<3.0.0' 'black==22.6.0' 'pandas<=2.0.3' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' python3.9 -m pip install 'pandas-stubs==1.2.0.53' ipython 'grpcio==1.56.0' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' - name: Python linter run: PYTHON_EXECUTABLE=python3.9 ./dev/lint-python From ffd633a6ad00c33cb2b1889ec82b92e051683581 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Tue, 19 Mar 2024 08:48:04 +0800 Subject: [PATCH 43/71] Revert "fix" This reverts commit 57701c6437c3359a3c0ce88e149f225e1ea87ddc. --- .github/workflows/build_and_test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 6c8de28dd5c1..d7008eae63ac 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -271,11 +271,11 @@ jobs: - name: Install Python test dependencies for branch-3.4 if: matrix.branch == 'branch-3.4' run: | - python3.9 -m pip install 'numpy==1.24.4' 'pandas<=2.0.3''pyarrow==12.0.1' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' 'scikit-learn==1.1.*' + python3.9 -m pip install 'numpy==1.24.4' 'pandas<=2.0.3''pyarrow==12.0.1' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' - name: Install Python test dependencies for branch-3.5 if: matrix.branch == 'branch-3.5' run: | - python3.9 -m pip install 'numpy==1.25.1' 'pandas<=2.0.3' 'pyarrow==12.0.1' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' 'scikit-learn==1.1.*' + python3.9 -m pip install 'numpy==1.25.1' 'pandas<=2.0.3' 'pyarrow==12.0.1' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' # Run the tests. - name: Run tests env: ${{ fromJSON(inputs.envs) }} From f6119b9844cc34d79cb323031512d3c5841c0aba Mon Sep 17 00:00:00 2001 From: panbingkun Date: Tue, 19 Mar 2024 08:53:14 +0800 Subject: [PATCH 44/71] fix docs build --- .github/workflows/build_and_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index d7008eae63ac..a33b83ff4cb4 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -489,7 +489,7 @@ jobs: Rscript -e "devtools::install_version('pkgdown', version='2.0.1', repos='https://cloud.r-project.org')" Rscript -e "devtools::install_version('preferably', version='0.4', repos='https://cloud.r-project.org')" # Should unpin 'sphinxcontrib-*' after upgrading sphinx>5 - python3.9 -m pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0' 'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5' + python3.9 -m pip install 'sphinx<3.1.0' mkdocs pydata_sphinx_theme sphinx-copybutton nbsphinx numpydoc 'jinja2<3.0.0' markupsafe 'pyzmq<24.0.0' 'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5' python3.9 -m pip install ipython_genutils # See SPARK-38517 python3.9 -m pip install sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly>=4.8' python3.9 -m pip install 'docutils<0.18.0' # See SPARK-39421 From 0afff676106645c8d1a90933ac629aadc619faa5 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Tue, 19 Mar 2024 09:22:04 +0800 Subject: [PATCH 45/71] pin Sphinx==3.0.4 --- .github/workflows/build_and_test.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index a33b83ff4cb4..5deaa16fe0f7 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -489,10 +489,9 @@ jobs: Rscript -e "devtools::install_version('pkgdown', version='2.0.1', repos='https://cloud.r-project.org')" Rscript -e "devtools::install_version('preferably', version='0.4', repos='https://cloud.r-project.org')" # Should unpin 'sphinxcontrib-*' after upgrading sphinx>5 - python3.9 -m pip install 'sphinx<3.1.0' mkdocs pydata_sphinx_theme sphinx-copybutton nbsphinx numpydoc 'jinja2<3.0.0' markupsafe 'pyzmq<24.0.0' 'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5' + python3.9 -m pip install 'Sphinx==3.0.4' mkdocs 'docutils==0.17.1' 'pydata-sphinx-theme==0.8.0' sphinx-copybutton 'nbsphinx==0.9.2' 'numpydoc==1.4.0' 'jinja2==2.11.3' markupsafe 'pyzmq<24.0.0' 'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5' python3.9 -m pip install ipython_genutils # See SPARK-38517 python3.9 -m pip install sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly>=4.8' - python3.9 -m pip install 'docutils<0.18.0' # See SPARK-39421 - name: Install dependencies for documentation generation run: | gem install bundler -v 2.4.22 From c2bb18a835218086c3cd5111ec796ee6ec3fd341 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Tue, 19 Mar 2024 10:38:57 +0800 Subject: [PATCH 46/71] fix scikit-learn --- .github/workflows/build_and_test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 5deaa16fe0f7..e268ae3ffec6 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -271,11 +271,11 @@ jobs: - name: Install Python test dependencies for branch-3.4 if: matrix.branch == 'branch-3.4' run: | - python3.9 -m pip install 'numpy==1.24.4' 'pandas<=2.0.3''pyarrow==12.0.1' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' + python3.9 -m pip install 'numpy==1.24.4' 'pandas<=2.0.3''pyarrow==12.0.1' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' 'scikit-learn==1.1.*' - name: Install Python test dependencies for branch-3.5 if: matrix.branch == 'branch-3.5' run: | - python3.9 -m pip install 'numpy==1.25.1' 'pandas<=2.0.3' 'pyarrow==12.0.1' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' + python3.9 -m pip install 'numpy==1.25.1' 'pandas<=2.0.3' 'pyarrow==12.0.1' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' 'scikit-learn==1.1.*' # Run the tests. - name: Run tests env: ${{ fromJSON(inputs.envs) }} From 1b5e20efefa87ba6599f443a5fadb403791e86cd Mon Sep 17 00:00:00 2001 From: panbingkun Date: Tue, 19 Mar 2024 10:45:56 +0800 Subject: [PATCH 47/71] fix markupsafe --- .github/workflows/build_and_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index e268ae3ffec6..98c020037b90 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -489,7 +489,7 @@ jobs: Rscript -e "devtools::install_version('pkgdown', version='2.0.1', repos='https://cloud.r-project.org')" Rscript -e "devtools::install_version('preferably', version='0.4', repos='https://cloud.r-project.org')" # Should unpin 'sphinxcontrib-*' after upgrading sphinx>5 - python3.9 -m pip install 'Sphinx==3.0.4' mkdocs 'docutils==0.17.1' 'pydata-sphinx-theme==0.8.0' sphinx-copybutton 'nbsphinx==0.9.2' 'numpydoc==1.4.0' 'jinja2==2.11.3' markupsafe 'pyzmq<24.0.0' 'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5' + python3.9 -m pip install 'Sphinx==3.0.4' mkdocs 'docutils==0.17.1' 'pydata-sphinx-theme==0.8.0' sphinx-copybutton 'nbsphinx==0.9.2' 'numpydoc==1.4.0' 'jinja2==2.11.3' 'markupsafe==2.0.1' 'pyzmq<24.0.0' 'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5' python3.9 -m pip install ipython_genutils # See SPARK-38517 python3.9 -m pip install sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly>=4.8' - name: Install dependencies for documentation generation From d7c3c2bdef0004dc5f89d5a12df63b646c9cc36c Mon Sep 17 00:00:00 2001 From: panbingkun Date: Tue, 19 Mar 2024 13:42:09 +0800 Subject: [PATCH 48/71] pin alabaster==0.7.12 --- .github/workflows/build_and_test.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 98c020037b90..bd0edfb65a8d 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -489,7 +489,7 @@ jobs: Rscript -e "devtools::install_version('pkgdown', version='2.0.1', repos='https://cloud.r-project.org')" Rscript -e "devtools::install_version('preferably', version='0.4', repos='https://cloud.r-project.org')" # Should unpin 'sphinxcontrib-*' after upgrading sphinx>5 - python3.9 -m pip install 'Sphinx==3.0.4' mkdocs 'docutils==0.17.1' 'pydata-sphinx-theme==0.8.0' sphinx-copybutton 'nbsphinx==0.9.2' 'numpydoc==1.4.0' 'jinja2==2.11.3' 'markupsafe==2.0.1' 'pyzmq<24.0.0' 'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5' + python3.9 -m pip install 'Sphinx==3.0.4' mkdocs 'docutils==0.17.1' 'pydata-sphinx-theme==0.8.0' 'alabaster==0.7.12' sphinx-copybutton 'nbsphinx==0.9.2' 'numpydoc==1.4.0' 'jinja2==2.11.3' 'markupsafe==2.0.1' 'pyzmq<24.0.0' 'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5' python3.9 -m pip install ipython_genutils # See SPARK-38517 python3.9 -m pip install sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly>=4.8' - name: Install dependencies for documentation generation @@ -501,6 +501,7 @@ jobs: run: ./dev/lint-r - name: Run documentation build run: | + python3.9 -m pip list # Build docs first with SKIP_API to ensure they are buildable without requiring any # language docs to be built beforehand. cd docs; SKIP_API=1 bundle exec jekyll build; cd .. From 3934806db3d0f611bda97345c7c838ea0167390d Mon Sep 17 00:00:00 2001 From: panbingkun Date: Tue, 19 Mar 2024 16:42:44 +0800 Subject: [PATCH 49/71] final branch-3.5 test --- .github/workflows/build_and_test.yml | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index bd0edfb65a8d..38bb81456aa2 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -269,13 +269,9 @@ jobs: bash miniconda.sh -b -p $HOME/miniconda rm miniconda.sh - name: Install Python test dependencies for branch-3.4 - if: matrix.branch == 'branch-3.4' + if: matrix.branch == 'branch-3.4' || matrix.branch == 'branch-3.5' run: | python3.9 -m pip install 'numpy==1.24.4' 'pandas<=2.0.3''pyarrow==12.0.1' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' 'scikit-learn==1.1.*' - - name: Install Python test dependencies for branch-3.5 - if: matrix.branch == 'branch-3.5' - run: | - python3.9 -m pip install 'numpy==1.25.1' 'pandas<=2.0.3' 'pyarrow==12.0.1' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' 'scikit-learn==1.1.*' # Run the tests. - name: Run tests env: ${{ fromJSON(inputs.envs) }} @@ -438,13 +434,13 @@ jobs: run: | # SPARK-44554: Copy from https://github.com/apache/spark/blob/a05c27e85829fe742c1828507a1fd180cdc84b54/.github/workflows/build_and_test.yml#L571-L578 # Should delete this section after SPARK 3.4 EOL. - python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.920' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'numpy==1.25.1' 'pyarrow==12.0.1' numpydoc 'jinja2<3.0.0' 'black==22.6.0' 'pandas<=2.0.3' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' + python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.920' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'numpy==1.24.4' 'pyarrow==12.0.1' numpydoc 'jinja2<3.0.0' 'black==22.6.0' 'pandas<=2.0.3' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' python3.9 -m pip install 'pandas-stubs==1.2.0.53' ipython 'grpcio==1.48.1' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' - name: Install Python linter dependencies for branch-3.5 run: | # SPARK-45212: Copy from https://github.com/apache/spark/blob/555c8def51e5951c7bf5165a332795e9e330ec9d/.github/workflows/build_and_test.yml#L631-L638 # Should delete this section after SPARK 3.5 EOL. - python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.982' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'numpy==1.25.1' 'pyarrow==12.0.1' numpydoc 'jinja2<3.0.0' 'black==22.6.0' 'pandas<=2.0.3' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' + python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.982' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'numpy==1.24.4' 'pyarrow==12.0.1' numpydoc 'jinja2<3.0.0' 'black==22.6.0' 'pandas<=2.0.3' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' python3.9 -m pip install 'pandas-stubs==1.2.0.53' ipython 'grpcio==1.56.0' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' - name: Python linter run: PYTHON_EXECUTABLE=python3.9 ./dev/lint-python @@ -489,7 +485,7 @@ jobs: Rscript -e "devtools::install_version('pkgdown', version='2.0.1', repos='https://cloud.r-project.org')" Rscript -e "devtools::install_version('preferably', version='0.4', repos='https://cloud.r-project.org')" # Should unpin 'sphinxcontrib-*' after upgrading sphinx>5 - python3.9 -m pip install 'Sphinx==3.0.4' mkdocs 'docutils==0.17.1' 'pydata-sphinx-theme==0.8.0' 'alabaster==0.7.12' sphinx-copybutton 'nbsphinx==0.9.2' 'numpydoc==1.4.0' 'jinja2==2.11.3' 'markupsafe==2.0.1' 'pyzmq<24.0.0' 'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5' + python3.9 -m pip install 'sphinx==3.0.4' mkdocs 'docutils==0.17.1' 'pydata-sphinx-theme==0.8.0' 'alabaster==0.7.12' sphinx-copybutton 'nbsphinx==0.9.2' 'numpydoc==1.4.0' 'jinja2==2.11.3' 'markupsafe==2.0.1' 'pyzmq<24.0.0' 'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5' python3.9 -m pip install ipython_genutils # See SPARK-38517 python3.9 -m pip install sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly>=4.8' - name: Install dependencies for documentation generation From c17993415e8637c511f41e552ad03a925cfa946e Mon Sep 17 00:00:00 2001 From: panbingkun Date: Tue, 19 Mar 2024 16:45:05 +0800 Subject: [PATCH 50/71] final branch-3.4 test --- .github/workflows/build_and_test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 38bb81456aa2..f37ec9ae3f98 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -178,7 +178,7 @@ jobs: java: - 8 branch: - - branch-3.5 + - branch-3.4 modules: - >- pyspark-sql, pyspark-resource, pyspark-testing @@ -377,7 +377,7 @@ jobs: with: fetch-depth: 0 repository: apache/spark - ref: branch-3.5 + ref: branch-3.4 - name: Add GITHUB_WORKSPACE to git trust safe.directory run: | git config --global --add safe.directory ${GITHUB_WORKSPACE} From 6683762b773c3721d36a6a90e813fae3aa1020f7 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Tue, 19 Mar 2024 18:43:39 +0800 Subject: [PATCH 51/71] Revert "final branch-3.4 test" This reverts commit c17993415e8637c511f41e552ad03a925cfa946e. --- .github/workflows/build_and_test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index f37ec9ae3f98..38bb81456aa2 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -178,7 +178,7 @@ jobs: java: - 8 branch: - - branch-3.4 + - branch-3.5 modules: - >- pyspark-sql, pyspark-resource, pyspark-testing @@ -377,7 +377,7 @@ jobs: with: fetch-depth: 0 repository: apache/spark - ref: branch-3.4 + ref: branch-3.5 - name: Add GITHUB_WORKSPACE to git trust safe.directory run: | git config --global --add safe.directory ${GITHUB_WORKSPACE} From 94acde54ed55446ac991b2448840659ff89dbaac Mon Sep 17 00:00:00 2001 From: panbingkun Date: Tue, 19 Mar 2024 18:49:56 +0800 Subject: [PATCH 52/71] Final branch-3.5 Jobs test --- .github/workflows/build_and_test.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 38bb81456aa2..4161d07421a3 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -271,7 +271,7 @@ jobs: - name: Install Python test dependencies for branch-3.4 if: matrix.branch == 'branch-3.4' || matrix.branch == 'branch-3.5' run: | - python3.9 -m pip install 'numpy==1.24.4' 'pandas<=2.0.3''pyarrow==12.0.1' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' 'scikit-learn==1.1.*' + python3.9 -m pip install 'numpy==1.25.1' 'pandas<=2.0.3''pyarrow==12.0.1' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' 'scikit-learn==1.1.*' # Run the tests. - name: Run tests env: ${{ fromJSON(inputs.envs) }} @@ -434,13 +434,13 @@ jobs: run: | # SPARK-44554: Copy from https://github.com/apache/spark/blob/a05c27e85829fe742c1828507a1fd180cdc84b54/.github/workflows/build_and_test.yml#L571-L578 # Should delete this section after SPARK 3.4 EOL. - python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.920' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'numpy==1.24.4' 'pyarrow==12.0.1' numpydoc 'jinja2<3.0.0' 'black==22.6.0' 'pandas<=2.0.3' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' + python3.9 -m pip install 'flake8==3.9.0' pydata-sphinx-theme 'mypy==0.920' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'numpy==1.25.1' 'pyarrow==12.0.1' numpydoc 'jinja2<3.0.0' 'black==22.6.0' 'pandas<=2.0.3' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' python3.9 -m pip install 'pandas-stubs==1.2.0.53' ipython 'grpcio==1.48.1' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' - name: Install Python linter dependencies for branch-3.5 run: | # SPARK-45212: Copy from https://github.com/apache/spark/blob/555c8def51e5951c7bf5165a332795e9e330ec9d/.github/workflows/build_and_test.yml#L631-L638 # Should delete this section after SPARK 3.5 EOL. - python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.982' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'numpy==1.24.4' 'pyarrow==12.0.1' numpydoc 'jinja2<3.0.0' 'black==22.6.0' 'pandas<=2.0.3' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' + python3.9 -m pip install 'flake8==3.9.0' pydata-sphinx-theme 'mypy==0.982' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'numpy==1.25.1' 'pyarrow==12.0.1' numpydoc 'jinja2<3.0.0' 'black==22.6.0' 'pandas<=2.0.3' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' python3.9 -m pip install 'pandas-stubs==1.2.0.53' ipython 'grpcio==1.56.0' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' - name: Python linter run: PYTHON_EXECUTABLE=python3.9 ./dev/lint-python From 11fc324fae6c2cce86085404bb8a461bf296e941 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Tue, 19 Mar 2024 18:51:31 +0800 Subject: [PATCH 53/71] Final branch-3.4 Jobs test This reverts commit 6683762b773c3721d36a6a90e813fae3aa1020f7. --- .github/workflows/build_and_test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 4161d07421a3..c29ded060828 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -178,7 +178,7 @@ jobs: java: - 8 branch: - - branch-3.5 + - branch-3.4 modules: - >- pyspark-sql, pyspark-resource, pyspark-testing @@ -377,7 +377,7 @@ jobs: with: fetch-depth: 0 repository: apache/spark - ref: branch-3.5 + ref: branch-3.4 - name: Add GITHUB_WORKSPACE to git trust safe.directory run: | git config --global --add safe.directory ${GITHUB_WORKSPACE} From 9b1cea5cadb7fa9b1f441b98cdba99b9fe33ae08 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Tue, 19 Mar 2024 19:44:30 +0800 Subject: [PATCH 54/71] Final branch-3.5 Jobs test --- .github/workflows/build_and_test.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index c29ded060828..ec595a5df486 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -178,7 +178,7 @@ jobs: java: - 8 branch: - - branch-3.4 + - branch-3.5 modules: - >- pyspark-sql, pyspark-resource, pyspark-testing @@ -268,10 +268,10 @@ jobs: curl -s https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh > miniconda.sh bash miniconda.sh -b -p $HOME/miniconda rm miniconda.sh - - name: Install Python test dependencies for branch-3.4 + - name: Install Python test dependencies for branch-3.4, branch-3.5 if: matrix.branch == 'branch-3.4' || matrix.branch == 'branch-3.5' run: | - python3.9 -m pip install 'numpy==1.25.1' 'pandas<=2.0.3''pyarrow==12.0.1' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' 'scikit-learn==1.1.*' + python3.9 -m pip install 'numpy==1.25.1' 'pandas<=2.0.3' 'pyarrow==12.0.1' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' 'scikit-learn==1.1.*' # Run the tests. - name: Run tests env: ${{ fromJSON(inputs.envs) }} @@ -377,7 +377,7 @@ jobs: with: fetch-depth: 0 repository: apache/spark - ref: branch-3.4 + ref: branch-3.5 - name: Add GITHUB_WORKSPACE to git trust safe.directory run: | git config --global --add safe.directory ${GITHUB_WORKSPACE} From 6137f9b8b75fbd9dd1776d134dd38c708ebcac02 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Tue, 19 Mar 2024 19:46:13 +0800 Subject: [PATCH 55/71] Final branch-3.4 Jobs test --- .github/workflows/build_and_test.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index ec595a5df486..e42f4e4af293 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -178,7 +178,7 @@ jobs: java: - 8 branch: - - branch-3.5 + - branch-3.4 modules: - >- pyspark-sql, pyspark-resource, pyspark-testing @@ -377,7 +377,7 @@ jobs: with: fetch-depth: 0 repository: apache/spark - ref: branch-3.5 + ref: branch-3.4 - name: Add GITHUB_WORKSPACE to git trust safe.directory run: | git config --global --add safe.directory ${GITHUB_WORKSPACE} @@ -430,13 +430,13 @@ jobs: - name: Spark connect jvm client mima check run: ./dev/connect-jvm-client-mima-check - name: Install Python linter dependencies for branch-3.4 - if: inputs.branch == 'branch-3.4' run: | # SPARK-44554: Copy from https://github.com/apache/spark/blob/a05c27e85829fe742c1828507a1fd180cdc84b54/.github/workflows/build_and_test.yml#L571-L578 # Should delete this section after SPARK 3.4 EOL. python3.9 -m pip install 'flake8==3.9.0' pydata-sphinx-theme 'mypy==0.920' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'numpy==1.25.1' 'pyarrow==12.0.1' numpydoc 'jinja2<3.0.0' 'black==22.6.0' 'pandas<=2.0.3' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' python3.9 -m pip install 'pandas-stubs==1.2.0.53' ipython 'grpcio==1.48.1' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' - name: Install Python linter dependencies for branch-3.5 + if: inputs.branch == 'branch-3.5' run: | # SPARK-45212: Copy from https://github.com/apache/spark/blob/555c8def51e5951c7bf5165a332795e9e330ec9d/.github/workflows/build_and_test.yml#L631-L638 # Should delete this section after SPARK 3.5 EOL. From 826947f098b1549007516f83c48bf5eb27be606b Mon Sep 17 00:00:00 2001 From: panbingkun Date: Tue, 19 Mar 2024 22:53:28 +0800 Subject: [PATCH 56/71] Final branch-3.4 Jobs test --- .github/workflows/build_and_test.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index e42f4e4af293..b844bd88dbb9 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -269,7 +269,11 @@ jobs: bash miniconda.sh -b -p $HOME/miniconda rm miniconda.sh - name: Install Python test dependencies for branch-3.4, branch-3.5 - if: matrix.branch == 'branch-3.4' || matrix.branch == 'branch-3.5' + if: matrix.branch == 'branch-3.4' + run: | + python3.9 -m pip install 'numpy==1.25.1' 'pandas<=1.5.3' 'pyarrow==12.0.1' 'matplotlib<3.3.0' 'torch==2.0.1' 'torchvision==0.15.2' 'scikit-learn==1.1.*' + - name: Install Python test dependencies for branch-3.4, branch-3.5 + if: matrix.branch == 'branch-3.5' run: | python3.9 -m pip install 'numpy==1.25.1' 'pandas<=2.0.3' 'pyarrow==12.0.1' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' 'scikit-learn==1.1.*' # Run the tests. From 1fcc0b165014934e637eb4906113be567700ad1d Mon Sep 17 00:00:00 2001 From: panbingkun Date: Tue, 19 Mar 2024 23:04:03 +0800 Subject: [PATCH 57/71] Final branch-3.4 Jobs test --- .github/workflows/build_and_test.yml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index b844bd88dbb9..029cea68b2de 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -268,11 +268,11 @@ jobs: curl -s https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh > miniconda.sh bash miniconda.sh -b -p $HOME/miniconda rm miniconda.sh - - name: Install Python test dependencies for branch-3.4, branch-3.5 + - name: Install Python test dependencies for branch-3.4 if: matrix.branch == 'branch-3.4' run: | - python3.9 -m pip install 'numpy==1.25.1' 'pandas<=1.5.3' 'pyarrow==12.0.1' 'matplotlib<3.3.0' 'torch==2.0.1' 'torchvision==0.15.2' 'scikit-learn==1.1.*' - - name: Install Python test dependencies for branch-3.4, branch-3.5 + python3.9 -m pip install 'numpy==1.25.1' 'pandas<=1.5.3' 'pyarrow==12.0.1' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' 'scikit-learn==1.1.*' + - name: Install Python test dependencies for branch-3.5 if: matrix.branch == 'branch-3.5' run: | python3.9 -m pip install 'numpy==1.25.1' 'pandas<=2.0.3' 'pyarrow==12.0.1' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' 'scikit-learn==1.1.*' @@ -450,6 +450,7 @@ jobs: run: PYTHON_EXECUTABLE=python3.9 ./dev/lint-python # Should delete this section after SPARK 3.5 EOL. - name: Install dependencies for Python code generation check for branch-3.5 + if: inputs.branch == 'branch-3.5' run: | # See more in "Installation" https://docs.buf.build/installation#tarball curl -LO https://github.com/bufbuild/buf/releases/download/v1.28.1/buf-Linux-x86_64.tar.gz @@ -459,6 +460,7 @@ jobs: python3.9 -m pip install 'protobuf==4.25.1' 'mypy-protobuf==3.3.0' # Should delete this section after SPARK 3.5 EOL. - name: Python code generation check for branch-3.5 + if: inputs.branch == 'branch-3.5' run: if test -f ./dev/connect-check-protos.py; then PATH=$PATH:$HOME/buf/bin PYTHON_EXECUTABLE=python3.9 ./dev/connect-check-protos.py; fi # Should delete this section after SPARK 3.5 EOL. - name: Install JavaScript linter dependencies for branch-3.4, branch-3.5 From 99a75c94369e96b09bae15a272ce63b7089fb5e3 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Wed, 20 Mar 2024 07:42:09 +0800 Subject: [PATCH 58/71] Final branch-3.4 Jobs test --- .github/workflows/build_and_test.yml | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 029cea68b2de..a7ce851bf36e 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -271,7 +271,7 @@ jobs: - name: Install Python test dependencies for branch-3.4 if: matrix.branch == 'branch-3.4' run: | - python3.9 -m pip install 'numpy==1.25.1' 'pandas<=1.5.3' 'pyarrow==12.0.1' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' 'scikit-learn==1.1.*' + python3.9 -m pip install 'numpy==1.20.3' 'pandas<=1.5.3' 'pyarrow==12.0.1' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' 'scikit-learn==1.1.*' - name: Install Python test dependencies for branch-3.5 if: matrix.branch == 'branch-3.5' run: | @@ -493,7 +493,13 @@ jobs: # Should unpin 'sphinxcontrib-*' after upgrading sphinx>5 python3.9 -m pip install 'sphinx==3.0.4' mkdocs 'docutils==0.17.1' 'pydata-sphinx-theme==0.8.0' 'alabaster==0.7.12' sphinx-copybutton 'nbsphinx==0.9.2' 'numpydoc==1.4.0' 'jinja2==2.11.3' 'markupsafe==2.0.1' 'pyzmq<24.0.0' 'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5' python3.9 -m pip install ipython_genutils # See SPARK-38517 - python3.9 -m pip install sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly>=4.8' + - name: Install dependencies for documentation generation for branch-3.4 + run: | + python3.9 -m pip install sphinx_plotly_directive 'numpy>=1.20.0' pyarrow 'pandas==1.5.3' 'plotly>=4.8' + - name: Install dependencies for documentation generation for branch-3.5 + if: inputs.branch == 'branch-3.5' + run: | + python3.9 -m pip install sphinx_plotly_directive 'numpy>=1.20.0' pyarrow 'pandas=2.0.3' 'plotly>=4.8' - name: Install dependencies for documentation generation run: | gem install bundler -v 2.4.22 From 666900638e908f66806177df2917c042fc99e83f Mon Sep 17 00:00:00 2001 From: panbingkun Date: Wed, 20 Mar 2024 08:17:05 +0800 Subject: [PATCH 59/71] Final branch-3.4 Jobs test --- .github/workflows/build_and_test.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index a7ce851bf36e..2a1cd93d43a2 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -281,6 +281,8 @@ jobs: env: ${{ fromJSON(inputs.envs) }} shell: 'script -q -e -c "bash {0}"' run: | + python3.9 -m pip list + pypy3 -m pip list export SCALA_PROFILE="scala2.13" unset GITHUB_ACTIONS if [[ "$MODULES_TO_TEST" == *"pyspark-errors"* ]]; then @@ -289,9 +291,11 @@ jobs: echo "Python Packaging Tests Enabled!" fi if [ ! -z "$PYTHON_TO_TEST" ]; then + echo "1" ./dev/run-tests --parallelism 1 --modules "$MODULES_TO_TEST" --python-executables "$PYTHON_TO_TEST" else # For branch-3.5 and below, it uses the default Python versions. + echo "2" ./dev/run-tests --parallelism 1 --modules "$MODULES_TO_TEST" fi - name: Upload coverage to Codecov From 18c211cc1e22138ddecf81903ac541a5ee025832 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Wed, 20 Mar 2024 09:48:39 +0800 Subject: [PATCH 60/71] Final branch-3.4 Jobs test --- .github/workflows/build_and_test.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 2a1cd93d43a2..73460c9df4b7 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -271,6 +271,7 @@ jobs: - name: Install Python test dependencies for branch-3.4 if: matrix.branch == 'branch-3.4' run: | + pypy3 -m pip install 'numpy==1.20.3' 'pandas<=1.5.3' 'pyarrow==12.0.1' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' 'scikit-learn==1.1.*' python3.9 -m pip install 'numpy==1.20.3' 'pandas<=1.5.3' 'pyarrow==12.0.1' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' 'scikit-learn==1.1.*' - name: Install Python test dependencies for branch-3.5 if: matrix.branch == 'branch-3.5' From cc29d4edf84181104b9f227cc8940dea539cc64f Mon Sep 17 00:00:00 2001 From: panbingkun Date: Wed, 20 Mar 2024 10:08:05 +0800 Subject: [PATCH 61/71] set PYTHON_TO_TEST: 'python3.9' --- .github/workflows/build_and_test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 73460c9df4b7..8d0a3f322904 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -202,7 +202,7 @@ jobs: pyspark-pandas-connect-part3 env: MODULES_TO_TEST: ${{ matrix.modules }} - PYTHON_TO_TEST: '' + PYTHON_TO_TEST: 'python3.9' HADOOP_PROFILE: ${{ inputs.hadoop }} HIVE_PROFILE: hive2.3 GITHUB_PREV_SHA: ${{ github.event.before }} @@ -271,7 +271,7 @@ jobs: - name: Install Python test dependencies for branch-3.4 if: matrix.branch == 'branch-3.4' run: | - pypy3 -m pip install 'numpy==1.20.3' 'pandas<=1.5.3' 'pyarrow==12.0.1' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' 'scikit-learn==1.1.*' + # pypy3 -m pip install 'numpy==1.20.3' 'pandas<=1.5.3' 'pyarrow==12.0.1' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' 'scikit-learn==1.1.*' python3.9 -m pip install 'numpy==1.20.3' 'pandas<=1.5.3' 'pyarrow==12.0.1' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' 'scikit-learn==1.1.*' - name: Install Python test dependencies for branch-3.5 if: matrix.branch == 'branch-3.5' From 48bbee30ce017513b31f8dd314f698c48e372dda Mon Sep 17 00:00:00 2001 From: panbingkun Date: Wed, 20 Mar 2024 10:53:39 +0800 Subject: [PATCH 62/71] test --- .github/workflows/build_and_test.yml | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 8d0a3f322904..3671cf752e7c 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -64,7 +64,7 @@ jobs: with: fetch-depth: 0 repository: apache/spark - ref: ${{ inputs.branch }} + ref: branch-3.4 - name: Sync the current branch with the latest in Apache Spark if: github.repository != 'apache/spark' run: | @@ -113,7 +113,7 @@ jobs: run: | # Convert to lowercase to meet Docker repo name requirement REPO_OWNER=$(echo "${{ github.repository_owner }}" | tr '[:upper:]' '[:lower:]') - IMG_NAME="apache-spark-ci-image:${{ inputs.branch }}-${{ github.run_id }}" + IMG_NAME="apache-spark-ci-image:branch-3.4-${{ github.run_id }}" IMG_URL="ghcr.io/$REPO_OWNER/$IMG_NAME" echo "image_url=$IMG_URL" >> $GITHUB_OUTPUT @@ -140,7 +140,7 @@ jobs: with: fetch-depth: 0 repository: apache/spark - ref: ${{ inputs.branch }} + ref: branch-3.4 - name: Sync the current branch with the latest in Apache Spark if: github.repository != 'apache/spark' run: | @@ -161,7 +161,7 @@ jobs: tags: | ${{ needs.precondition.outputs.image_url }} # Use the infra image cache to speed up - cache-from: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-cache:${{ inputs.branch }} + cache-from: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-cache:branch-3.4 pyspark: needs: [precondition, infra-image] @@ -202,7 +202,7 @@ jobs: pyspark-pandas-connect-part3 env: MODULES_TO_TEST: ${{ matrix.modules }} - PYTHON_TO_TEST: 'python3.9' + PYTHON_TO_TEST: '' HADOOP_PROFILE: ${{ inputs.hadoop }} HIVE_PROFILE: hive2.3 GITHUB_PREV_SHA: ${{ github.event.before }} @@ -271,7 +271,6 @@ jobs: - name: Install Python test dependencies for branch-3.4 if: matrix.branch == 'branch-3.4' run: | - # pypy3 -m pip install 'numpy==1.20.3' 'pandas<=1.5.3' 'pyarrow==12.0.1' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' 'scikit-learn==1.1.*' python3.9 -m pip install 'numpy==1.20.3' 'pandas<=1.5.3' 'pyarrow==12.0.1' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' 'scikit-learn==1.1.*' - name: Install Python test dependencies for branch-3.5 if: matrix.branch == 'branch-3.5' From 8a45b478daaa611959bbaa2a4026252f3d3c2f30 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Wed, 20 Mar 2024 11:17:20 +0800 Subject: [PATCH 63/71] test --- .github/workflows/build_and_test.yml | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 3671cf752e7c..767aa6c86785 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -65,13 +65,6 @@ jobs: fetch-depth: 0 repository: apache/spark ref: branch-3.4 - - name: Sync the current branch with the latest in Apache Spark - if: github.repository != 'apache/spark' - run: | - echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV - git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/} - git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD - git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty - name: Check all modules id: set-outputs run: | @@ -141,13 +134,6 @@ jobs: fetch-depth: 0 repository: apache/spark ref: branch-3.4 - - name: Sync the current branch with the latest in Apache Spark - if: github.repository != 'apache/spark' - run: | - echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV - git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/} - git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD - git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty - name: Set up QEMU uses: docker/setup-qemu-action@v3 - name: Set up Docker Buildx From a53f8ee7385cf54ad93109d2f07b0ad88a3655bd Mon Sep 17 00:00:00 2001 From: panbingkun Date: Wed, 20 Mar 2024 11:20:08 +0800 Subject: [PATCH 64/71] test --- .github/workflows/build_and_test.yml | 38 ++++------------------------ 1 file changed, 5 insertions(+), 33 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 767aa6c86785..dc2d61ae4edd 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -68,39 +68,11 @@ jobs: - name: Check all modules id: set-outputs run: | - if [ -z "${{ inputs.jobs }}" ]; then - pyspark=true; sparkr=true; tpcds=true; docker=true; - pyspark_modules=`cd dev && python -c "import sparktestsupport.modules as m; print(','.join(m.name for m in m.all_modules if m.name.startswith('pyspark')))"` - pyspark=`./dev/is-changed.py -m $pyspark_modules` - sparkr=`./dev/is-changed.py -m sparkr` - tpcds=`./dev/is-changed.py -m sql` - docker=`./dev/is-changed.py -m docker-integration-tests` - # 'build' and 'maven-build' are always true for now. - # It does not save significant time and most of PRs trigger the build. - precondition=" - { - \"build\": \"true\", - \"pyspark\": \"$pyspark\", - \"sparkr\": \"$sparkr\", - \"tpcds-1g\": \"$tpcds\", - \"docker-integration-tests\": \"$docker\", - \"maven-build\": \"true\", - \"lint\" : \"true\", - \"k8s-integration-tests\" : \"true\", - \"buf\" : \"true\", - \"ui\" : \"true\", - }" - echo $precondition # For debugging - # Remove `\n` to avoid "Invalid format" error - precondition="${precondition//$'\n'/}}" - echo "required=$precondition" >> $GITHUB_OUTPUT - else - # This is usually set by scheduled jobs. - precondition='${{ inputs.jobs }}' - echo $precondition # For debugging - precondition="${precondition//$'\n'/}" - echo "required=$precondition" >> $GITHUB_OUTPUT - fi + # This is usually set by scheduled jobs. + precondition='${{ inputs.jobs }}' + echo $precondition # For debugging + precondition="${precondition//$'\n'/}" + echo "required=$precondition" >> $GITHUB_OUTPUT - name: Generate infra image URL id: infra-image-outputs run: | From f5a20d401999b92f8a90c9c19a6b0281c3c20657 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Wed, 20 Mar 2024 11:26:18 +0800 Subject: [PATCH 65/71] test --- .github/workflows/build_and_test.yml | 34 ++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index dc2d61ae4edd..206d1b0de296 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -68,11 +68,35 @@ jobs: - name: Check all modules id: set-outputs run: | - # This is usually set by scheduled jobs. - precondition='${{ inputs.jobs }}' - echo $precondition # For debugging - precondition="${precondition//$'\n'/}" - echo "required=$precondition" >> $GITHUB_OUTPUT + if [ -z "${{ inputs.jobs }}" ]; then + pyspark=true; sparkr=true; tpcds=true; docker=true; + pyspark_modules=`cd dev && python -c "import sparktestsupport.modules as m; print(','.join(m.name for m in m.all_modules if m.name.startswith('pyspark')))"` + # 'build' and 'maven-build' are always true for now. + # It does not save significant time and most of PRs trigger the build. + precondition=" + { + \"build\": \"true\", + \"pyspark\": \"$pyspark\", + \"sparkr\": \"$sparkr\", + \"tpcds-1g\": \"$tpcds\", + \"docker-integration-tests\": \"$docker\", + \"maven-build\": \"true\", + \"lint\" : \"true\", + \"k8s-integration-tests\" : \"true\", + \"buf\" : \"true\", + \"ui\" : \"true\", + }" + echo $precondition # For debugging + # Remove `\n` to avoid "Invalid format" error + precondition="${precondition//$'\n'/}}" + echo "required=$precondition" >> $GITHUB_OUTPUT + else + # This is usually set by scheduled jobs. + precondition='${{ inputs.jobs }}' + echo $precondition # For debugging + precondition="${precondition//$'\n'/}" + echo "required=$precondition" >> $GITHUB_OUTPUT + fi - name: Generate infra image URL id: infra-image-outputs run: | From 20dbf0e343807e3694570e925c044b2f2560a232 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Wed, 20 Mar 2024 13:29:20 +0800 Subject: [PATCH 66/71] branch-3.4 --- .github/workflows/build_and_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 206d1b0de296..45af3bdbe5d8 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -423,7 +423,7 @@ jobs: run: | # SPARK-44554: Copy from https://github.com/apache/spark/blob/a05c27e85829fe742c1828507a1fd180cdc84b54/.github/workflows/build_and_test.yml#L571-L578 # Should delete this section after SPARK 3.4 EOL. - python3.9 -m pip install 'flake8==3.9.0' pydata-sphinx-theme 'mypy==0.920' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'numpy==1.25.1' 'pyarrow==12.0.1' numpydoc 'jinja2<3.0.0' 'black==22.6.0' 'pandas<=2.0.3' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' + python3.9 -m pip install 'flake8==3.9.0' pydata-sphinx-theme 'mypy==0.920' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'numpy==1.25.1' 'pyarrow==12.0.1' numpydoc 'jinja2<3.0.0' 'black==22.6.0' 'pandas==1.5.3' 'matplotlib<3.3.0' 'torch==1.13.1' 'torchvision==0.14.1' python3.9 -m pip install 'pandas-stubs==1.2.0.53' ipython 'grpcio==1.48.1' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' - name: Install Python linter dependencies for branch-3.5 if: inputs.branch == 'branch-3.5' From d70b07ed78d23b6e72cff21c111bf69c5747300c Mon Sep 17 00:00:00 2001 From: panbingkun Date: Wed, 20 Mar 2024 13:59:44 +0800 Subject: [PATCH 67/71] test --- .github/workflows/build_and_test.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 45af3bdbe5d8..f126db7543eb 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -423,7 +423,8 @@ jobs: run: | # SPARK-44554: Copy from https://github.com/apache/spark/blob/a05c27e85829fe742c1828507a1fd180cdc84b54/.github/workflows/build_and_test.yml#L571-L578 # Should delete this section after SPARK 3.4 EOL. - python3.9 -m pip install 'flake8==3.9.0' pydata-sphinx-theme 'mypy==0.920' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'numpy==1.25.1' 'pyarrow==12.0.1' numpydoc 'jinja2<3.0.0' 'black==22.6.0' 'pandas==1.5.3' 'matplotlib<3.3.0' 'torch==1.13.1' 'torchvision==0.14.1' + python3.9 -m pip list + python3.9 -m pip install 'flake8==3.9.0' pydata-sphinx-theme 'mypy==0.920' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'numpy==1.25.1' 'pyarrow==12.0.1' numpydoc 'jinja2<3.0.0' 'black==22.6.0' 'pandas==1.5.3' python3.9 -m pip install 'pandas-stubs==1.2.0.53' ipython 'grpcio==1.48.1' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' - name: Install Python linter dependencies for branch-3.5 if: inputs.branch == 'branch-3.5' From a8d29d062012fcf8fe7e65f335406fe76494f512 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Wed, 20 Mar 2024 14:05:17 +0800 Subject: [PATCH 68/71] test --- .github/workflows/build_and_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index f126db7543eb..3c1deda398f6 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -253,7 +253,7 @@ jobs: - name: Install Python test dependencies for branch-3.4 if: matrix.branch == 'branch-3.4' run: | - python3.9 -m pip install 'numpy==1.20.3' 'pandas<=1.5.3' 'pyarrow==12.0.1' 'matplotlib==3.7.2' 'torch==2.0.1' 'torchvision==0.15.2' 'scikit-learn==1.1.*' + python3.9 -m pip install 'numpy==1.23.5' 'pandas<=1.5.3' 'pyarrow==12.0.1' 'matplotlib==3.8.3' 'torch==2.2.1' 'torchvision==0.17.1' 'scikit-learn==1.1.*' - name: Install Python test dependencies for branch-3.5 if: matrix.branch == 'branch-3.5' run: | From 8d564c66588b973533ccc3c239bfdbd3237509e9 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Wed, 20 Mar 2024 14:51:25 +0800 Subject: [PATCH 69/71] test --- .github/workflows/build_and_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 3c1deda398f6..061949fc788f 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -424,7 +424,7 @@ jobs: # SPARK-44554: Copy from https://github.com/apache/spark/blob/a05c27e85829fe742c1828507a1fd180cdc84b54/.github/workflows/build_and_test.yml#L571-L578 # Should delete this section after SPARK 3.4 EOL. python3.9 -m pip list - python3.9 -m pip install 'flake8==3.9.0' pydata-sphinx-theme 'mypy==0.920' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'numpy==1.25.1' 'pyarrow==12.0.1' numpydoc 'jinja2<3.0.0' 'black==22.6.0' 'pandas==1.5.3' + python3.9 -m pip install 'flake8==3.9.0' pydata-sphinx-theme 'mypy==0.920' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'numpy==1.25.1' 'pyarrow==12.0.1' numpydoc 'jinja2<3.0.0' 'black==22.6.0' 'pandas==1.5.3' 'matplotlib<3.3.0' 'torch==1.13.1' 'torchvision==0.14.1' python3.9 -m pip install 'pandas-stubs==1.2.0.53' ipython 'grpcio==1.48.1' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' - name: Install Python linter dependencies for branch-3.5 if: inputs.branch == 'branch-3.5' From 53d54777b66356e432104f2e47e6f4063ce08e38 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Wed, 20 Mar 2024 15:45:21 +0800 Subject: [PATCH 70/71] test --- .github/workflows/build_and_test.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 061949fc788f..cc24e1bebd3f 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -165,7 +165,9 @@ jobs: - >- pyspark-sql, pyspark-resource, pyspark-testing - >- - pyspark-core, pyspark-errors, pyspark-streaming + pyspark-core, pyspark-streaming + - >- + pyspark-errors - >- pyspark-mllib, pyspark-ml, pyspark-ml-connect - >- @@ -424,7 +426,7 @@ jobs: # SPARK-44554: Copy from https://github.com/apache/spark/blob/a05c27e85829fe742c1828507a1fd180cdc84b54/.github/workflows/build_and_test.yml#L571-L578 # Should delete this section after SPARK 3.4 EOL. python3.9 -m pip list - python3.9 -m pip install 'flake8==3.9.0' pydata-sphinx-theme 'mypy==0.920' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'numpy==1.25.1' 'pyarrow==12.0.1' numpydoc 'jinja2<3.0.0' 'black==22.6.0' 'pandas==1.5.3' 'matplotlib<3.3.0' 'torch==1.13.1' 'torchvision==0.14.1' + python3.9 -m pip install 'flake8==3.9.0' pydata-sphinx-theme 'mypy==0.920' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'numpy==1.25.1' 'pyarrow==12.0.1' numpydoc 'jinja2<3.0.0' 'black==22.6.0' 'pandas==1.5.3' 'matplotlib==3.8.3' 'torch==2.2.1' 'torchvision==0.17.1' python3.9 -m pip install 'pandas-stubs==1.2.0.53' ipython 'grpcio==1.48.1' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' - name: Install Python linter dependencies for branch-3.5 if: inputs.branch == 'branch-3.5' From a306f5a7b286954838e6a6c4d80893f21c5e9b03 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Wed, 20 Mar 2024 15:57:48 +0800 Subject: [PATCH 71/71] matplotlib==3.7.2 --- .github/workflows/build_and_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index cc24e1bebd3f..2610f50a6244 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -426,7 +426,7 @@ jobs: # SPARK-44554: Copy from https://github.com/apache/spark/blob/a05c27e85829fe742c1828507a1fd180cdc84b54/.github/workflows/build_and_test.yml#L571-L578 # Should delete this section after SPARK 3.4 EOL. python3.9 -m pip list - python3.9 -m pip install 'flake8==3.9.0' pydata-sphinx-theme 'mypy==0.920' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'numpy==1.25.1' 'pyarrow==12.0.1' numpydoc 'jinja2<3.0.0' 'black==22.6.0' 'pandas==1.5.3' 'matplotlib==3.8.3' 'torch==2.2.1' 'torchvision==0.17.1' + python3.9 -m pip install 'flake8==3.9.0' pydata-sphinx-theme 'mypy==0.920' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'numpy==1.25.1' 'pyarrow==12.0.1' numpydoc 'jinja2<3.0.0' 'black==22.6.0' 'pandas==1.5.3' 'matplotlib==3.7.2' python3.9 -m pip install 'pandas-stubs==1.2.0.53' ipython 'grpcio==1.48.1' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' - name: Install Python linter dependencies for branch-3.5 if: inputs.branch == 'branch-3.5'