From fa4d17ba1be6e4217d1cd017bcdffa2a771b17b7 Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Thu, 8 Apr 2021 15:32:39 +0900 Subject: [PATCH] Distribute workflow runs to the forked repositories --- .github/workflows/build_and_test.yml | 53 +++++++++++++--------- .github/workflows/notify_test_workflow.yml | 50 ++++++++++++++++++++ dev/run-tests.py | 12 ++--- 3 files changed, 85 insertions(+), 30 deletions(-) create mode 100644 .github/workflows/notify_test_workflow.yml diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 3abe20608a11a..744b7c029f41c 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -3,15 +3,8 @@ name: Build and test on: push: branches: - - master - pull_request: - branches: - - master - workflow_dispatch: - inputs: - target: - description: 'Target branch to run' - required: true + - '**' + - '!branch-*.*' jobs: # Build: build Spark and run the tests for specified modules. @@ -82,16 +75,21 @@ jobs: # GitHub Actions' default miniconda to use in pip packaging test. CONDA_PREFIX: /usr/share/miniconda GITHUB_PREV_SHA: ${{ github.event.before }} - GITHUB_INPUT_BRANCH: ${{ github.event.inputs.target }} steps: - name: Checkout Spark repository uses: actions/checkout@v2 # In order to fetch changed files with: fetch-depth: 0 - - name: Merge dispatched input branch - if: ${{ github.event.inputs.target != '' }} - run: git merge --progress --ff-only origin/${{ github.event.inputs.target }} + repository: apache/spark + ref: master + - name: Sync the current branch with the latest in Apache Spark + id: sync-branch + run: | + apache_spark_ref=`git rev-parse HEAD` + git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF##*/} + git merge --progress --ff-only FETCH_HEAD + echo "::set-output name=APACHE_SPARK_REF::$apache_spark_ref" # Cache local repositories. Note that GitHub Actions cache has a 2G limit. - name: Cache Scala, SBT and Maven uses: actions/cache@v2 @@ -132,6 +130,7 @@ jobs: # Run the tests. - name: Run tests run: | + export APACHE_SPARK_REF=${{ steps.sync-branch.outputs.APACHE_SPARK_REF }} # Hive and SQL tests become flaky when running in parallel as it's too intensive. if [[ "$MODULES_TO_TEST" == "hive" ]] || [[ "$MODULES_TO_TEST" == "sql" ]]; then export SERIAL_SBT_TESTS=1; fi ./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST" --included-tags "$INCLUDED_TAGS" --excluded-tags "$EXCLUDED_TAGS" @@ -170,16 +169,21 @@ jobs: # GitHub Actions' default miniconda to use in pip packaging test. CONDA_PREFIX: /usr/share/miniconda GITHUB_PREV_SHA: ${{ github.event.before }} - GITHUB_INPUT_BRANCH: ${{ github.event.inputs.target }} steps: - name: Checkout Spark repository uses: actions/checkout@v2 # In order to fetch changed files with: fetch-depth: 0 - - name: Merge dispatched input branch - if: ${{ github.event.inputs.target != '' }} - run: git merge --progress --ff-only origin/${{ github.event.inputs.target }} + repository: apache/spark + ref: master + - name: Sync the current branch with the latest in Apache Spark + id: sync-branch + run: | + apache_spark_ref=`git rev-parse HEAD` + git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF##*/} + git merge --progress --ff-only FETCH_HEAD + echo "::set-output name=APACHE_SPARK_REF::$apache_spark_ref" # Cache local repositories. Note that GitHub Actions cache has a 2G limit. - name: Cache Scala, SBT and Maven uses: actions/cache@v2 @@ -214,6 +218,7 @@ jobs: # Run the tests. - name: Run tests run: | + export APACHE_SPARK_REF=${{ steps.sync-branch.outputs.APACHE_SPARK_REF }} ./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST" - name: Upload test results to report if: always() @@ -237,16 +242,21 @@ jobs: HADOOP_PROFILE: hadoop3.2 HIVE_PROFILE: hive2.3 GITHUB_PREV_SHA: ${{ github.event.before }} - GITHUB_INPUT_BRANCH: ${{ github.event.inputs.target }} steps: - name: Checkout Spark repository uses: actions/checkout@v2 # In order to fetch changed files with: fetch-depth: 0 - - name: Merge dispatched input branch - if: ${{ github.event.inputs.target != '' }} - run: git merge --progress --ff-only origin/${{ github.event.inputs.target }} + repository: apache/spark + ref: master + - name: Sync the current branch with the latest in Apache Spark + id: sync-branch + run: | + apache_spark_ref=`git rev-parse HEAD` + git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF##*/} + git merge --progress --ff-only FETCH_HEAD + echo "::set-output name=APACHE_SPARK_REF::$apache_spark_ref" # Cache local repositories. Note that GitHub Actions cache has a 2G limit. - name: Cache Scala, SBT and Maven uses: actions/cache@v2 @@ -272,6 +282,7 @@ jobs: # R issues at docker environment export TZ=UTC export _R_CHECK_SYSTEM_CLOCK_=FALSE + export APACHE_SPARK_REF=${{ steps.sync-branch.outputs.APACHE_SPARK_REF }} ./dev/run-tests --parallelism 2 --modules sparkr - name: Upload test results to report if: always() diff --git a/.github/workflows/notify_test_workflow.yml b/.github/workflows/notify_test_workflow.yml new file mode 100644 index 0000000000000..4c30b74c3c6e9 --- /dev/null +++ b/.github/workflows/notify_test_workflow.yml @@ -0,0 +1,50 @@ +name: Notify test workflow +on: + pull_request_target: + types: [synchronize] + +jobs: + notify: + runs-on: ubuntu-20.04 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + steps: + - name: "Notify test workflow" + uses: actions/github-script@v3 + if: ${{ github.base_ref == 'master' }} + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + script: | + var runID + const client = github.getOctokit(process.env.GITHUB_TOKEN) + const endpoint = "GET /repos/:owner/:repo/actions/workflows/:id/runs?&branch=:branch" + const params = { + owner: context.payload.pull_request.head.repo.owner.login, + repo: context.payload.pull_request.head.repo.name, + id: "build_and_test.yml", + branch: context.payload.pull_request.head.ref, + } + + for await (const runs of client.paginate.iterator(endpoint, params)) { + const run = runs.data.find(r => { + return true + }) + if (run) { + runID = run.id + break + } + } + + var msg = "**[Test build #" + runID + " has started]" + + "(https://api.github.com/repos/octo-org/octo-repo/actions/runs/" + runID + ")** " + + "for PR " + github.event.pull_request.number + + " at commit [`" + context.payload.pull_request.head.sha + "`]" + + "(https://github.com/apache/spark/commit/" + + context.payload.pull_request.head.sha + ")." + + github.issues.createComment({ + issue_number: context.issue.number, + owner: context.repo.owner, + repo: context.repo.repo, + body: msg + }) diff --git a/dev/run-tests.py b/dev/run-tests.py index c5b412d4d4f58..d5d3445bd6125 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -656,16 +656,10 @@ def main(): # If we're running the tests in GitHub Actions, attempt to detect and test # only the affected modules. if test_env == "github_actions": - if os.environ["GITHUB_INPUT_BRANCH"] != "": - # Dispatched request - # Note that it assumes GitHub Actions has already merged - # the given `GITHUB_INPUT_BRANCH` branch. + if os.environ["APACHE_SPARK_REF"] != "": + # Fork repository changed_files = identify_changed_files_from_git_commits( - "HEAD", target_branch=os.environ["GITHUB_SHA"]) - elif os.environ["GITHUB_BASE_REF"] != "": - # Pull requests - changed_files = identify_changed_files_from_git_commits( - os.environ["GITHUB_SHA"], target_branch=os.environ["GITHUB_BASE_REF"]) + "HEAD", target_ref=os.environ["APACHE_SPARK_REF"]) else: # Build for each commit. changed_files = identify_changed_files_from_git_commits(