diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 3a9157010e9b0..5768fb413994d 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -56,10 +56,14 @@ jobs: GITHUB_PREV_SHA: ${{ github.event.before }} outputs: required: ${{ steps.set-outputs.outputs.required }} + # For branch3.2, branch3.3, still use previous image tag. + # If infra image has changes, use dynamic infra image URL. + # If infra image hasn't changed, use pre-build image directly. image_url: >- ${{ - (inputs.branch == 'master' && steps.infra-image-outputs.outputs.image_url) - || 'dongjoon/apache-spark-github-action-image:20220207' + (inputs.branch == 'branch-3.2' || inputs.branch == 'branch-3.3') && 'dongjoon/apache-spark-github-action-image:20220207' + || (fromJson(steps.set-outputs.outputs.required).infra-image == 'true' && steps.infra-image-outputs.outputs.image_url) + || format('ghcr.io/apache/spark/apache-spark-github-action-image-cache:{0}-static', inputs.branch) }} steps: - name: Checkout Spark repository @@ -87,6 +91,7 @@ jobs: sparkr=`./dev/is-changed.py -m sparkr` tpcds=`./dev/is-changed.py -m sql` docker=`./dev/is-changed.py -m docker-integration-tests` + infra_image=`./dev/is-changed.py -m infra-image` fi # 'build', 'scala-213', and 'java-11-17' are always true for now. # It does not save significant time and most of PRs trigger the build. @@ -97,6 +102,7 @@ jobs: \"sparkr\": \"$sparkr\", \"tpcds-1g\": \"$tpcds\", \"docker-integration-tests\": \"$docker\", + \"infra-image\": \"$infra_image\", \"scala-213\": \"true\", \"java-11-17\": \"true\", \"lint\" : \"true\", @@ -116,6 +122,7 @@ jobs: fi - name: Generate infra image URL id: infra-image-outputs + if: fromJson(steps.set-outputs.outputs.required).infra-image == 'true' run: | # Convert to lowercase to meet Docker repo name requirement REPO_OWNER=$(echo "${{ github.repository_owner }}" | tr '[:upper:]' '[:lower:]') @@ -270,6 +277,7 @@ jobs: needs: precondition # Currently, only enable docker build from cache for `master` branch jobs if: >- + fromJson(needs.precondition.outputs.required).infra-image == 'true' && (fromJson(needs.precondition.outputs.required).pyspark == 'true' || fromJson(needs.precondition.outputs.required).lint == 'true' || fromJson(needs.precondition.outputs.required).sparkr == 'true') && diff --git a/dev/is-changed.py b/dev/is-changed.py index 85f0d3cda6df4..457d280ae857e 100755 --- a/dev/is-changed.py +++ b/dev/is-changed.py @@ -71,7 +71,10 @@ def main(): print("false") if opts.fail: sys.exit(1) - elif "root" in test_modules or modules.root in changed_modules: + # `./dev/is-changed.py -m infra-image` == True only when changing the infra dockerfile + elif ("root" in test_modules or modules.root in changed_modules) and ( + ["infra-image"] != test_modules + ): print("true") elif len(set(test_modules).intersection(module_names)) == 0: print("false") diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py index 2b9d526937942..a514fcc0ac679 100644 --- a/dev/sparktestsupport/modules.py +++ b/dev/sparktestsupport/modules.py @@ -781,6 +781,14 @@ def __hash__(self): test_tags=["org.apache.spark.tags.DockerTest"], ) +infra_image = Module( + name="infra-image", + dependencies=[], + source_file_regexes=[ + "dev/infra/", + ], +) + # The root module is a dummy module which is used to run all of the tests. # No other modules should directly depend on this module. root = Module(