From 22798b4e96cb765c03c0c40554bd8618e0715d17 Mon Sep 17 00:00:00 2001 From: geomin12 Date: Wed, 24 Sep 2025 13:04:52 -0700 Subject: [PATCH 1/2] Adding TheRock CI sharding --- .github/workflows/therock-test-component.yml | 90 ++++++++++++++++++++ .github/workflows/therock-test-packages.yml | 72 ++-------------- 2 files changed, 99 insertions(+), 63 deletions(-) create mode 100644 .github/workflows/therock-test-component.yml diff --git a/.github/workflows/therock-test-component.yml b/.github/workflows/therock-test-component.yml new file mode 100644 index 00000000000..fac52d6aefc --- /dev/null +++ b/.github/workflows/therock-test-component.yml @@ -0,0 +1,90 @@ +name: Test component + +on: + workflow_call: + inputs: + artifact_run_id: + type: string + default: "" + amdgpu_families: + type: string + test_runs_on: + type: string + platform: + type: string + component: + type: string + + +permissions: + contents: read + +jobs: + test_component: + name: 'Test ${{ fromJSON(inputs.component).job_name }} (shard ${{ matrix.shard }} of ${{ fromJSON(inputs.component).total_shards }})' + runs-on: ${{ inputs.test_runs_on }} + container: + image: ${{ inputs.platform == 'linux' && 'ghcr.io/rocm/no_rocm_image_ubuntu24_04@sha256:4150afe4759d14822f0e3f8930e1124f26e11f68b5c7b91ec9a02b20b1ebbb98' || null }} + options: --ipc host + --group-add video + --device /dev/kfd + --device /dev/dri + --group-add 992 + --env-file /etc/podinfo/gha-gpu-isolation-settings + strategy: + fail-fast: false + matrix: + # The shard array is based on "total_shards" from "fetch_test_configurations.py" + # The test executable will shard based on the array. (ex: [1, 2, 3, 4] = four test shards) + shard: ${{ fromJSON(inputs.component).shard_arr }} + defaults: + run: + shell: bash + env: + VENV_DIR: ${{ github.workspace }}/.venv + ARTIFACT_RUN_ID: "${{ inputs.artifact_run_id != '' && inputs.artifact_run_id || github.run_id }}" + OUTPUT_ARTIFACTS_DIR: "./build" + THEROCK_BIN_DIR: "./build/bin" + AMDGPU_FAMILIES: ${{ inputs.amdgpu_families }} + steps: + - name: "Fetch 'build_tools' from repository" + if: ${{ runner.os == 'Windows' }} + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + with: + sparse-checkout: build_tools + path: "prejob" + + - name: Pre-job cleanup processes on Windows + if: ${{ runner.os == 'Windows' }} + shell: powershell + run: . '${{ github.workspace }}\prejob\build_tools\github_actions\cleanup_processes.ps1' + + - name: Checkout Repository + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + with: + repository: "ROCm/TheRock" + + - name: Run setup test environment workflow + uses: './.github/actions/setup_test_environment' + with: + ARTIFACT_RUN_ID: ${{ env.ARTIFACT_RUN_ID }} + AMDGPU_FAMILIES: ${{ inputs.amdgpu_families }} + OUTPUT_ARTIFACTS_DIR: ${{ env.OUTPUT_ARTIFACTS_DIR }} + VENV_DIR: ${{ env.VENV_DIR }} + FETCH_ARTIFACT_ARGS: ${{ fromJSON(inputs.component).fetch_artifact_args }} + IS_PR_FROM_FORK: ${{ github.event.pull_request.head.repo.fork }} + + - name: Test + timeout-minutes: ${{ fromJSON(inputs.component).timeout_minutes }} + env: + SHARD_INDEX: ${{ matrix.shard }} + TOTAL_SHARDS: ${{ fromJSON(inputs.component).total_shards }} + run: | + ${{ fromJSON(inputs.component).test_script }} + + # GitHub's 'Complete job' step is unaware of launched executables + # and will fail to clean up orphan processes. + - name: Post-job cleanup processes on Windows + if: ${{ always() && runner.os == 'Windows' }} + shell: powershell + run: . '${{ github.workspace }}\build_tools\github_actions\cleanup_processes.ps1' diff --git a/.github/workflows/therock-test-packages.yml b/.github/workflows/therock-test-packages.yml index d398e141231..c28d68d742e 100644 --- a/.github/workflows/therock-test-packages.yml +++ b/.github/workflows/therock-test-packages.yml @@ -55,72 +55,18 @@ jobs: run: python ./build_tools/github_actions/fetch_test_configurations.py test_components: - name: 'Test ${{ matrix.components.job_name }} on ${{ inputs.amdgpu_families }}' - runs-on: ${{ inputs.test_runs_on }} - container: - image: ${{ inputs.platform == 'linux' && 'ghcr.io/rocm/no_rocm_image_ubuntu24_04@sha256:405945a40deaff9db90b9839c0f41d4cba4a383c1a7459b28627047bf6302a26' || null }} - options: --ipc host - --group-add video - --device /dev/kfd - --device /dev/dri - --group-add 992 - --env-file /etc/podinfo/gha-gpu-isolation-settings - needs: configure_test_matrix + name: 'Test ${{ matrix.components.job_name }}' + needs: [configure_test_matrix] # skip tests if no test matrix to run if: ${{ needs.configure_test_matrix.outputs.components != '[]' }} strategy: fail-fast: false matrix: components: ${{ fromJSON(needs.configure_test_matrix.outputs.components) }} - defaults: - run: - shell: bash - env: - VENV_DIR: ${{ github.workspace }}/.venv - ARTIFACT_RUN_ID: "${{ github.run_id }}" - OUTPUT_ARTIFACTS_DIR: ./build - THEROCK_BIN_DIR: "./build/bin" - steps: - - name: "Fetch 'build_tools' from repository" - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - with: - sparse-checkout: build_tools - path: "prejob" - repository: "ROCm/TheRock" - - - name: Pre-job cleanup processes on Windows - if: ${{ runner.os == 'Windows' }} - shell: powershell - run: . '${{ github.workspace }}\prejob\build_tools\github_actions\cleanup_processes.ps1' - - - name: Checkout Repository - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - with: - repository: "ROCm/TheRock" - - - name: Run setup test environment workflow - uses: './.github/actions/setup_test_environment' - with: - ARTIFACT_RUN_ID: ${{ env.ARTIFACT_RUN_ID }} - AMDGPU_FAMILIES: ${{ inputs.amdgpu_families }} - OUTPUT_ARTIFACTS_DIR: ${{ env.OUTPUT_ARTIFACTS_DIR }} - VENV_DIR: ${{ env.VENV_DIR }} - FETCH_ARTIFACT_ARGS: ${{ matrix.components.fetch_artifact_args }} - IS_PR_FROM_FORK: ${{ github.event.pull_request.head.repo.fork }} - - - name: Install additional packages - if: ${{ inputs.platform == 'linux' && (matrix.components.job_name == 'rocblas' || matrix.components.job_name == 'hipblaslt') }} - run: sudo apt install libgfortran5 -y - - - name: Test - timeout-minutes: ${{ matrix.components.timeout_minutes }} - run: | - if [ "${{ inputs.PLATFORM }}" == "linux" ]; then source ${VENV_DIR}/bin/activate ; else . ${VENV_DIR}/Scripts/activate ; fi - ${{ matrix.components.test_script }} - - # GitHub's 'Complete job' step is unaware of launched executables - # and will fail to clean up orphan processes. - - name: Post-job cleanup processes on Windows - if: ${{ always() && runner.os == 'Windows' }} - shell: powershell - run: . '${{ github.workspace }}\build_tools\github_actions\cleanup_processes.ps1' + uses: './.github/workflows/therock-test-component.yml' + with: + artifact_run_id: ${{ github.run_id }} + amdgpu_families: ${{ inputs.amdgpu_families }} + test_runs_on: ${{ inputs.test_runs_on }} + platform: ${{ inputs.platform }} + component: ${{ toJSON(matrix.components) }} From 940b5ff5412d67d953bc7cb63fc228fa4e8ae837 Mon Sep 17 00:00:00 2001 From: geomin12 Date: Wed, 24 Sep 2025 14:19:31 -0700 Subject: [PATCH 2/2] Adding repository --- .github/workflows/therock-test-component.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/therock-test-component.yml b/.github/workflows/therock-test-component.yml index fac52d6aefc..d81d2ef94a0 100644 --- a/.github/workflows/therock-test-component.yml +++ b/.github/workflows/therock-test-component.yml @@ -51,6 +51,7 @@ jobs: if: ${{ runner.os == 'Windows' }} uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 with: + repository: "ROCm/TheRock" sparse-checkout: build_tools path: "prejob"