From 26c9c056b5ba022d08de4f6793381d8bf2641f54 Mon Sep 17 00:00:00 2001 From: Daniel Su Date: Mon, 23 Jun 2025 22:52:57 -0400 Subject: [PATCH 1/2] [Ex CI] create centralized Azure CI dispatcher workflow --- .github/requirements.txt | 2 + .github/scripts/azure_resolve_subtree_deps.py | 109 ++++++++++ .github/workflows/azure-ci-dispatcher.yml | 202 ++++++++++++++++++ 3 files changed, 313 insertions(+) create mode 100644 .github/requirements.txt create mode 100644 .github/scripts/azure_resolve_subtree_deps.py create mode 100644 .github/workflows/azure-ci-dispatcher.yml diff --git a/.github/requirements.txt b/.github/requirements.txt new file mode 100644 index 00000000000..76aa8db95e1 --- /dev/null +++ b/.github/requirements.txt @@ -0,0 +1,2 @@ +pydantic +requests \ No newline at end of file diff --git a/.github/scripts/azure_resolve_subtree_deps.py b/.github/scripts/azure_resolve_subtree_deps.py new file mode 100644 index 00000000000..3a8e3b67e66 --- /dev/null +++ b/.github/scripts/azure_resolve_subtree_deps.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python3 + +""" +Azure Pipeline Resolver Script +------------------------------ +This script determines which Azure pipelines to run based on changed subtrees. +Using a predefined dependency map, the script resolves which projects need to be processed, +skipping those that will be covered by their dependencies. + +Steps: + 1. Load a list of changed projects from a file. + 2. Consult a dependency map to determine transitive and direct dependencies. + 3. Identify projects that should be processed, excluding those handled by dependencies. + 4. Output the list of projects to be run, along with their Azure pipeline IDs. + +Arguments: + --subtree-file : Path to the file containing a newline-separated list of changed subtrees. + +Outputs: + Prints a newline-separated list of "project_name=definition_id" for the projects that need + to be processed, where `definition_id` is the Azure pipeline ID associated with the project. + +Example Usage: + To determine which pipelines to run given the changed subtrees listed in a file: + python azure_pipeline_resolver.py --subtree-file changed_subtrees.txt +""" + +import argparse +from typing import List, Optional + + +def parse_arguments(argv: Optional[List[str]] = None) -> argparse.Namespace: + """Parse command-line arguments.""" + parser = argparse.ArgumentParser( + description="Given a list of changed subtrees, determine which Azure pipelines to run.") + parser.add_argument("--subtree-file", required=True, + help="Path to the file containing changed subtrees") + return parser.parse_args(argv) + + +def read_file_into_set(file_path): + """Reads the project names from the file into a set.""" + with open(file_path, 'r') as file: + return {line.strip() for line in file} + + +def resolve_dependencies(projects, dependencies): + """Resolves projects to be run by checking dependencies.""" + projects_to_run = set(projects) + + for project in projects: + if project in dependencies: + for dependency in dependencies[project]: + if dependency in projects: + # Skip project if its dependency is present + projects_to_run.discard(project) + + return projects_to_run + + +def main(argv=None) -> None: + """Main function to process the projects and output those to be run.""" + # Mathlib build+test dependency tree as defined in Azure CI and TheRock + math_dependencies = { + "shared/tensile": {}, + "projects/rocrand": {}, + "projects/hiprand": {"projects/rocrand"}, + "projects/rocfft": {"projects/hiprand"}, + "projects/hipfft": {"projects/rocfft"}, + "projects/rocprim": {}, + "projects/hipcub": {"projects/rocprim"}, + "projects/rocthrust": {"projects/rocprim"}, + "projects/hipblas-common": {}, + "projects/hipblaslt": {"projects/hipblas-common"}, + "projects/rocblas": {"projects/hipblaslt"}, + "projects/rocsolver": {"projects/rocprim", "projects/hipblaslt"}, + "projects/rocsparse": {"projects/rocprim", "projects/hipblaslt"}, + "projects/hipblas": {"projects/rocsolver"}, + "projects/hipsolver": {"projects/rocsolver", "projects/rocsparse"}, + "projects/hipSPARSE": {"projects/rocsparse"}, + "projects/MIOpen": {"projects/rocrand", "projects/hipblas"} + } + # Azure pipeline IDs for each project, to be populated as projects are enabled + definition_ids = { + "shared/tensile": 305, + "projects/rocrand": 274, + "projects/hiprand": 275, + "projects/rocfft": 282, + "projects/hipfft": 283, + "projects/rocprim": 273, + "projects/hipcub": 277, + "projects/rocthrust": 276, + "projects/hipblas-common": 300, + "projects/hipblaslt": 301, + "projects/rocblas": 302, + "projects/rocsolver": 303, + } + + args = parse_arguments(argv) + projects = read_file_into_set(args.subtree_file) + projects_to_run = resolve_dependencies(projects, math_dependencies) + + for project in projects_to_run: + if project in definition_ids: + print(f"{project}={definition_ids[project]}") + + +if __name__ == "__main__": + main() diff --git a/.github/workflows/azure-ci-dispatcher.yml b/.github/workflows/azure-ci-dispatcher.yml new file mode 100644 index 00000000000..a77ec17dc69 --- /dev/null +++ b/.github/workflows/azure-ci-dispatcher.yml @@ -0,0 +1,202 @@ +# Azure CI Dispatcher +# ------------------ +# This workflow allows Azure CI to be centralized in a single PR check. +# It detects which subtrees (from a monorepo structure) were changed in a +# pull request, and automatically requests Azure CI runs for the corresponding +# subtrees. +# +# For any given subtree, if an upstream subtree is also included in the PR, +# it will not run CI for the downstream subtree. +# Eg. A PR that touches rocprim and rocthrust will only trigger rocprim CI. +# +# Requires an Azure Personal Access Token with permissions to manage builds. +# The token should be stored in the repository secrets as `AZ_PAT`. + +name: Dispatch Azure CI + +on: + workflow_dispatch: + pull_request: + types: + - opened + - synchronize + - reopened + - ready_for_review + branches: + - develop + - staging + - main + - release-staging/rocm-rel-7.* + - users/danielsu/az-ci-dispatch + +jobs: + dispatch-azure-ci: + name: Dispatch Azure CI + runs-on: ubuntu-latest + steps: + # - name: Generate a token + # id: generate-token + # uses: actions/create-github-app-token@df432ceedc7162793a195dd1713ff69aefc7379e # v2.0.6 + # with: + # app-id: ${{ secrets.APP_ID }} + # private-key: ${{ secrets.APP_PRIVATE_KEY }} + # owner: ${{ github.repository_owner }} + + - name: Checkout code + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + # ref: refs/pull/${{ github.event.pull_request.number }}/merge + ref: users/danielsu/az-ci-dispatch + sparse-checkout: .github + sparse-checkout-cone-mode: true + token: ${{ secrets.GH_TOKEN }} + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r .github/requirements.txt + + - name: Detect changed subtrees + id: detect + env: + GH_TOKEN: ${{ secrets.GH_TOKEN }} + run: | + python .github/scripts/pr_detect_changed_subtrees.py \ + --repo "${{ github.repository }}" \ + --pr "${{ github.event.pull_request.number }}" \ + --config ".github/repos-config.json" \ + --require-auto-push + + - name: Dispatch Azure CI runs + id: dispatch + if: steps.detect.outputs.subtrees + env: + GH_TOKEN: ${{ secrets.GH_TOKEN }} + run: | + echo "${{ steps.detect.outputs.subtrees }}" > changed_subtrees.txt + + python .github/scripts/azure_resolve_subtree_deps.py \ + --subtree-file changed_subtrees.txt \ + > resolved_subtrees.txt + + run_ids=() + + while IFS= read -r line; do + IFS='=' read -r project_name definition_id <<< "$line" + echo "Requesting run for $project_name with definition ID $definition_id" + + max_attempts=3 + retry_delay=5 + attempt=1 + success=false + + while [ $attempt -le $max_attempts ]; do + response=$(curl -sSX POST https://dev.azure.com/ROCm-CI/ROCm-CI/_apis/pipelines/$definition_id/runs?api-version=7.1 \ + -u ":${{ secrets.AZ_PAT }}" \ + -H "Content-Type: application/json" \ + -d '{ + "resources": { + "repositories": { + "self": { + "refName": "refs/pull/328/merge" + } + } + } + }') + + if [ $? -eq 0 ]; then + success=true + break + fi + + echo "Attempt $attempt failed. Retrying in $retry_delay seconds..." + sleep $retry_delay + attempt=$((attempt + 1)) + done + + if [ "$success" = true ]; then + run_id=$(echo "$response" | jq -r '.id' || echo "null") + if [ "$run_id" != "null" ]; then + echo "Run ID for $project_name: $run_id" + echo "https://dev.azure.com/ROCm-CI/ROCm-CI/_build/results?buildId=$run_id" + run_ids+=("$run_id") + else + echo "Failed to request run for $project_name" + fi + else + echo "Failed to request run for $project_name after $max_attempts attempts" + fi + + echo "" + done < resolved_subtrees.txt + + echo "run_ids=${run_ids[*]}" >> $GITHUB_OUTPUT + + # todo: replace refName with ${{ github.ref }} + # todo: uncomment generate-token + # todo: change checkout ref + # todo: remove all refs to users/danielsu/az-ci-dispatch + + - name: Wait for and report Azure CI status + if: steps.dispatch.outputs.run_ids + run: | + run_ids=(${{ steps.dispatch.outputs.run_ids }}) + echo "Waiting on runs: ${run_ids[*]}" + + all_finished=0 + exit_status=0 + + while [[ $all_finished -eq 0 ]]; do + sleep 120 + all_finished=1 + echo "==================================================================" + + for run_id in "${run_ids[@]}"; do + max_attempts=3 + retry_delay=5 + attempt=1 + success=false + + while [ $attempt -le $max_attempts ]; do + response=$(curl -sSX GET https://dev.azure.com/ROCm-CI/ROCm-CI/_apis/build/builds/$run_id?api-version=7.1 \ + -u ":${{ secrets.AZ_PAT }}" \ + -H "Content-Type: application/json") + + if [[ $? -eq 0 ]]; then + success=true + break + fi + + echo "Attempt $attempt failed. Retrying in $retry_delay seconds..." + sleep $retry_delay + attempt=$((attempt + 1)) + done + + if [[ "$success" == true ]]; then + pipeline_name=$(echo "$response" | jq -r '.definition.name') + status=$(echo "$response" | jq -r '.status') + result=$(echo "$response" | jq -r '.result') + + if [[ "$status" == "completed" ]]; then + echo "$pipeline_name - run $run_id is finished with result: $result" + echo "https://dev.azure.com/ROCm-CI/ROCm-CI/_build/results?buildId=$run_id" + if [[ "$result" != "succeeded" ]]; then + exit_status=1 + fi + else + all_finished=0 + echo "$pipeline_name - run $run_id is in progress..." + echo "https://dev.azure.com/ROCm-CI/ROCm-CI/_build/results?buildId=$run_id" + fi + else + echo "Failed to fetch status for run $run_id after $max_attempts attempts" + echo "https://dev.azure.com/ROCm-CI/ROCm-CI/_build/results?buildId=$run_id" + exit_status=1 + continue + fi + done + done + + if [[ $exit_status -ne 0 ]] then + exit $exit_status + fi From ffd0e6615d39c43f59aeb80b1cb5a32d00fb4d9f Mon Sep 17 00:00:00 2001 From: Daniel Su Date: Tue, 24 Jun 2025 11:01:09 -0400 Subject: [PATCH 2/2] Remove manual test values --- .github/workflows/azure-ci-dispatcher.yml | 31 +++++++++-------------- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/.github/workflows/azure-ci-dispatcher.yml b/.github/workflows/azure-ci-dispatcher.yml index a77ec17dc69..ed1469dcbd9 100644 --- a/.github/workflows/azure-ci-dispatcher.yml +++ b/.github/workflows/azure-ci-dispatcher.yml @@ -27,29 +27,27 @@ on: - staging - main - release-staging/rocm-rel-7.* - - users/danielsu/az-ci-dispatch jobs: dispatch-azure-ci: name: Dispatch Azure CI runs-on: ubuntu-latest steps: - # - name: Generate a token - # id: generate-token - # uses: actions/create-github-app-token@df432ceedc7162793a195dd1713ff69aefc7379e # v2.0.6 - # with: - # app-id: ${{ secrets.APP_ID }} - # private-key: ${{ secrets.APP_PRIVATE_KEY }} - # owner: ${{ github.repository_owner }} + - name: Generate a token + id: generate-token + uses: actions/create-github-app-token@df432ceedc7162793a195dd1713ff69aefc7379e # v2.0.6 + with: + app-id: ${{ secrets.APP_ID }} + private-key: ${{ secrets.APP_PRIVATE_KEY }} + owner: ${{ github.repository_owner }} - name: Checkout code uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: - # ref: refs/pull/${{ github.event.pull_request.number }}/merge - ref: users/danielsu/az-ci-dispatch + ref: refs/pull/${{ github.event.pull_request.number }}/merge sparse-checkout: .github sparse-checkout-cone-mode: true - token: ${{ secrets.GH_TOKEN }} + token: token: ${{ steps.generate-token.outputs.token }} - name: Install dependencies run: | @@ -59,7 +57,7 @@ jobs: - name: Detect changed subtrees id: detect env: - GH_TOKEN: ${{ secrets.GH_TOKEN }} + GH_TOKEN: token: ${{ steps.generate-token.outputs.token }} run: | python .github/scripts/pr_detect_changed_subtrees.py \ --repo "${{ github.repository }}" \ @@ -71,7 +69,7 @@ jobs: id: dispatch if: steps.detect.outputs.subtrees env: - GH_TOKEN: ${{ secrets.GH_TOKEN }} + GH_TOKEN: token: ${{ steps.generate-token.outputs.token }} run: | echo "${{ steps.detect.outputs.subtrees }}" > changed_subtrees.txt @@ -98,7 +96,7 @@ jobs: "resources": { "repositories": { "self": { - "refName": "refs/pull/328/merge" + "refName": "${{ github.ref }}" } } } @@ -132,11 +130,6 @@ jobs: echo "run_ids=${run_ids[*]}" >> $GITHUB_OUTPUT - # todo: replace refName with ${{ github.ref }} - # todo: uncomment generate-token - # todo: change checkout ref - # todo: remove all refs to users/danielsu/az-ci-dispatch - - name: Wait for and report Azure CI status if: steps.dispatch.outputs.run_ids run: |