diff --git a/.github/workflows/test_rocm_wheels.yml b/.github/workflows/test_rocm_wheels.yml new file mode 100644 index 00000000000..cb045df296a --- /dev/null +++ b/.github/workflows/test_rocm_wheels.yml @@ -0,0 +1,104 @@ +name: Test ROCm Wheels + +on: + workflow_dispatch: + inputs: + amdgpu_family: + description: GPU family to test (e.g., gfx94X-dcgpu, gfx110X-all) + required: true + type: string + default: "gfx94X-dcgpu" + test_runs_on: + description: Runner label to use. The selected runner should have a GPU supported by amdgpu_family + required: true + type: string + default: "linux-mi325-1gpu-ossci-rocm-frac" + package_index_url: + description: Base Python package index URL (without GPU family subdir) + required: true + type: string + default: "https://rocm.nightlies.amd.com/v2" + python_version: + required: true + type: string + default: "3.12" + rocm_version: + description: ROCm version to pip install (e.g. "7.10.0a20251124") + required: true + type: string + + workflow_call: + inputs: + amdgpu_family: + required: true + type: string + test_runs_on: + required: true + type: string + package_index_url: + required: true + type: string + python_version: + required: true + type: string + rocm_version: + required: true + type: string + repository: + description: "Repository to checkout. Otherwise, defaults to `github.repository`." + type: string + ref: + description: "Branch, tag or SHA to checkout. Defaults to the reference or SHA that triggered the workflow." + type: string + +permissions: + contents: read + +run-name: Test ROCm Wheels (${{ inputs.amdgpu_family }}, ${{ inputs.rocm_version }}, ${{ inputs.test_runs_on }}) + +jobs: + test_wheels: + name: Test ROCm Wheels | ${{ inputs.amdgpu_family }} + runs-on: ${{ inputs.test_runs_on }} + container: + image: ${{ contains(inputs.test_runs_on, 'linux') && 'ghcr.io/rocm/no_rocm_image_ubuntu24_04@sha256:405945a40deaff9db90b9839c0f41d4cba4a383c1a7459b28627047bf6302a26' || null }} + options: --ipc host + --group-add video + --device /dev/kfd + --device /dev/dri + --group-add 110 + --env-file /etc/podinfo/gha-gpu-isolation-settings + --user 0:0 # Running as root, by recommendation of GitHub: https://docs.github.com/en/actions/reference/workflows-and-actions/dockerfile-support#user + defaults: + run: + shell: bash + env: + VENV_DIR: ${{ github.workspace }}/.venv + + steps: + - name: Checkout + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + with: + repository: ${{ inputs.repository || github.repository }} + ref: ${{ inputs.ref || '' }} + + - name: Set up Python + uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 + with: + python-version: ${{ inputs.python_version }} + + - name: Set up virtual environment and install ROCm packages + run: | + python build_tools/setup_venv.py ${VENV_DIR} \ + --packages "rocm[libraries,devel]==${{ inputs.rocm_version }}" \ + --index-url=${{ inputs.package_index_url }} \ + --index-subdir=${{ inputs.amdgpu_family }} \ + --activate-in-future-github-actions-steps + + - name: Show installed packages + run: | + pip freeze + + - name: Run rocm-sdk sanity tests + run: | + rocm-sdk test