Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
17ff75d
new ci weekly + new, refactored amdgpu matrix generator + base repres…
HereThereBeDragons Oct 9, 2025
1783b5c
rename ci to make clear it is WIP
HereThereBeDragons Oct 9, 2025
25b364f
some fix in matrix creation
HereThereBeDragons Nov 13, 2025
1167d83
update image
HereThereBeDragons Jan 5, 2026
189084a
adjust for rebase before christmas
HereThereBeDragons Jan 5, 2026
22c4a22
update image in new_setup.yml
HereThereBeDragons Jan 5, 2026
cbc367c
use new cmake4 image, remove unneded params from workflows that are t…
HereThereBeDragons Jan 7, 2026
019e3d8
refactor to integrate build variant, move into separate functions to …
HereThereBeDragons Jan 7, 2026
a150b7c
code style changes + adding one important TODO to auto detect machine…
HereThereBeDragons Jan 7, 2026
cf1cf39
try to enable t orun the ci weekly
HereThereBeDragons Jan 7, 2026
7b5a98d
fix syntax error
HereThereBeDragons Jan 7, 2026
fce472c
make new_setup.yml find the input params
HereThereBeDragons Jan 7, 2026
1bb6326
add runner selection based on pr label
HereThereBeDragons Jan 20, 2026
4c3eb1b
update cmake4 image
HereThereBeDragons Jan 21, 2026
f3d8113
extract more values from the amdgpu config instead of giving the para…
HereThereBeDragons Jan 21, 2026
e1f0133
use a proper dev version to hopefully build pytorch.
HereThereBeDragons Jan 21, 2026
0d9497b
add ROCM_THEROCK_TEST_RUNNERS for configure_amdgpu_matrix.
HereThereBeDragons Jan 26, 2026
5f44466
update GITHUB_WORKFLOWS_CI_PATTERNS
HereThereBeDragons Jan 26, 2026
836f96c
add tests configure_amdgpu_matrix_test.py
HereThereBeDragons Jan 26, 2026
cb20873
updating to use most recent dockerfile
HereThereBeDragons Jan 27, 2026
a4fad1c
add "LOAD_TEST_RUNNERS_FROM_VAR" guard for "ROCM_THEROCK_TEST_RUNNERS…
HereThereBeDragons Jan 27, 2026
e60326e
update manylinux image sha
HereThereBeDragons Feb 4, 2026
a8fc950
move git path filtering to determine ci should run into a separate fi…
HereThereBeDragons Feb 4, 2026
e7cf255
change request: move path filtering test into separate file, renamed …
HereThereBeDragons Feb 5, 2026
1756bc1
change request: make note more verbose, make gpus all lower case
HereThereBeDragons Feb 5, 2026
0c1b080
update to final version of new_amdgpu_matrix.py #3248
HereThereBeDragons Feb 12, 2026
f6052e6
big refactor of configure_amdgpu_matrix to use dataclasses from PR ht…
HereThereBeDragons Mar 12, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion .github/workflows/build_portable_linux_artifacts.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,11 @@ on:
default: false
extra_cmake_options:
type: string
# TODO remove late the default when switchting to using the new_setup.yml
container_image:
type: string
required: false
default: 'ghcr.io/rocm/therock_build_manylinux_x86_64@sha256:db2b63f938941dde2abc80b734e64b45b9995a282896d513a0f3525d4591d6cb'

workflow_call:
inputs:
Expand All @@ -49,6 +54,11 @@ on:
type: boolean
extra_cmake_options:
type: string
# TODO remove late the default when switchting to using the new_setup.yml
container_image:
type: string
required: false
default: 'ghcr.io/rocm/therock_build_manylinux_x86_64@sha256:db2b63f938941dde2abc80b734e64b45b9995a282896d513a0f3525d4591d6cb'

# See the details regarding permissions from the link:
# https://github.com/aws-actions/configure-aws-credentials?tab=readme-ov-file#oidc
Expand All @@ -66,7 +76,7 @@ jobs:
permissions:
id-token: write
container:
image: ghcr.io/rocm/therock_build_manylinux_x86_64@sha256:d6ae5712a9c7e8b88281d021e907b312cd8a26295b95690baef3e8dde4805858
image: ${{ inputs.container_image }}
# --cap-add=SYS_PTRACE : to enable ptrace insided the build container for tsan builds
# --security-opt seccomp=unconfined : to disable the system call filtering for tsan builds
options: -v /runner/config:/home/awsconfig/ --cap-add=SYS_PTRACE --security-opt seccomp=unconfined
Expand Down
9 changes: 5 additions & 4 deletions .github/workflows/ci_weekly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ on:


jobs:
donothing:
runs-on: ubuntu-latest
steps:
- run: echo "Skipped"
call_new_weekly_ci:
uses: ./.github/workflows/new_ci_weekly.yml
secrets: inherit
with:
linux_amdgpu_families: "gfx94X-dcgpu"
125 changes: 125 additions & 0 deletions .github/workflows/new_ci_linux.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
name: CI - Linux

on:
workflow_call:
inputs:
amdgpu_family_config:
type: string
test_labels:
type: string
artifact_run_id:
type: string
use_prebuilt_artifacts:
type: string
rocm_package_version:
type: string
test_type:
type: string
container_image:
type: string

permissions:
contents: read

jobs:
build_portable_linux_artifacts:
name: Build Artifacts
if: ${{ inputs.use_prebuilt_artifacts == 'false' }}
uses: ./.github/workflows/build_portable_linux_artifacts.yml
secrets: inherit
with:
artifact_group: ${{ fromJSON(inputs.amdgpu_family_config).build.artifact_group }}
package_version: ${{ inputs.rocm_package_version }}
amdgpu_families: ${{ fromJSON(inputs.amdgpu_family_config).amdgpu_family }}
build_variant_label: ${{ fromJSON(inputs.amdgpu_family_config).build.build_variant_label }}
build_variant_cmake_preset: ${{ fromJSON(inputs.amdgpu_family_config).build.build_variant_cmake_preset }}
build_variant_suffix: ${{ fromJSON(inputs.amdgpu_family_config).build.build_variant_suffix }}
expect_failure: ${{ fromJSON(inputs.amdgpu_family_config).build.expect_failure }}
container_image: ${{ inputs.container_image }}
permissions:
contents: read
id-token: write

# TODO: rework "artifact_run_id" and "use_prebuilt_artifacts" here?
# I don't want to copy/paste this condition and special case plumbing
# through multiple workflows. All the packaging and testing workflows need
# to know is what artifact run id to use. That could be the current
# (implicit) run id, or it could be an explicit run id.
# How about having the "build artifacts" job run as a passthrough?

test_linux_artifacts:
needs: [build_portable_linux_artifacts]
name: Test Artifacts
# If the dependent job failed/cancelled, this job will not be run
# The use_prebuilt_artifacts "or" statement ensures that tests will run if
# previous build step is run or skipped.concurrency.
# If we are expecting a build failure, do not run tests to save machine capacity
if: >-
${{
!failure() &&
!cancelled() &&
(
inputs.use_prebuilt_artifacts == 'false' ||
inputs.use_prebuilt_artifacts == 'true'
) &&
fromJSON(inputs.amdgpu_family_config).build.expect_failure == false
&&
fromJSON(inputs.amdgpu_family_config).test.run_tests == true
}}
uses: ./.github/workflows/test_artifacts.yml
with:
artifact_group: ${{ fromJSON(inputs.amdgpu_family_config).build.artifact_group }}
amdgpu_families: ${{ fromJSON(inputs.amdgpu_family_config).amdgpu_family }}
test_runs_on: ${{ fromJSON(inputs.amdgpu_family_config).test.runs_on.test }}
artifact_run_id: ${{ inputs.artifact_run_id }}
test_type: ${{ inputs.test_type }}
test_labels: ${{ inputs.test_labels }}
sanity_check_only_for_family: ${{ fromJSON(inputs.amdgpu_family_config).test.sanity_check_only_for_family == true }}

test_linux_benchmarks:
needs: [build_portable_linux_artifacts]
name: Test Linux Benchmarks
# Run benchmarks if:
# - Build succeeded (or using prebuilt artifacts)
# - Not expecting failure
# - Benchmark runner is available (runs_on.benchmark is set)
if: >-
${{
!failure() &&
!cancelled() &&
(
inputs.use_prebuilt_artifacts == 'false' ||
inputs.use_prebuilt_artifacts == 'true'
) &&
fromJSON(inputs.amdgpu_family_config).build.expect_failure == false &&
fromJSON(inputs.amdgpu_family_config).test.runs_on.benchmark != ''
}}
uses: ./.github/workflows/test_benchmarks.yml
secrets: inherit
with:
artifact_group: ${{ fromJSON(inputs.amdgpu_family_config).build.artifact_group }}
amdgpu_families: ${{ fromJSON(inputs.amdgpu_family_config).amdgpu_family }}
test_runs_on: ${{ fromJSON(inputs.amdgpu_family_config).test.runs_on.benchmark }}
artifact_run_id: ${{ inputs.artifact_run_id }}

build_portable_linux_python_packages:
needs: [build_portable_linux_artifacts]
name: Build Python
# If the dependent job failed/cancelled, this job will not be run
# The use_prebuilt_artifacts "or" statement ensures that tests will run if
# previous build step is run or skipped.concurrency.
if: >-
${{
!failure() &&
!cancelled() &&
(
inputs.use_prebuilt_artifacts == 'false' ||
inputs.use_prebuilt_artifacts == 'true'
) &&
fromJSON(inputs.amdgpu_family_config).build.expect_failure == false
}}
uses: ./.github/workflows/build_portable_linux_python_packages.yml
with:
artifact_run_id: "${{ inputs.artifact_run_id != '' && inputs.artifact_run_id || github.run_id }}"
artifact_group: ${{ fromJSON(inputs.amdgpu_family_config).build.artifact_group }}
package_version: ${{ inputs.rocm_package_version }}
163 changes: 163 additions & 0 deletions .github/workflows/new_ci_weekly.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
# This CI workflow is triggered by:
# - scheduled run
#
# In the scheduled run, we run all targets from amdgpu_family_matrix.py and amdgpu_family_matrix_xfail.py
# As some of these builds are xfail, we allow errors to occur with `continue-on-error`, where the job will fail but the workflow is green

name: WIP!!! CI Weekly (New)

on:
# For AMD GPU families that expect_failure, we run builds and tests from this scheduled trigger
schedule:
- cron: "0 2 * * 0" # Runs weekly at 2 AM UTC every Sunday
workflow_dispatch:
inputs:
linux_amdgpu_families:
type: string
description: "Insert comma-separated list of Linux GPU families to build and test. ex: gfx94X, gfx1201X"
default: ""
linux_amdgpu_families_predefined_groups:
type: string
description: "Insert comma-separated list of predefined group(s) of Linux GPU families found in build_tools/github_actions/new_amdgpu_family_matrix.py (e.g. amdgpu_presubmit)"
default: ""
linux_use_prebuilt_artifacts:
type: boolean
description: "If enabled, the CI will pull Linux artifacts using artifact_run_id and only run tests"
default: false
windows_amdgpu_families:
type: string
description: "Insert comma-separated list of Windows GPU families to build and test. ex: gfx94X, gfx1201X"
default: ""
windows_amdgpu_families_predefined_groups:
type: string
description: "Insert comma-separated list of predefined group(s) of Windows GPU families found in build_tools/github_actions/new_amdgpu_family_matrix.py (e.g. amdgpu_presubmit)"
default: ""
windows_use_prebuilt_artifacts:
type: boolean
description: "If enabled, the CI will pull Windows artifacts using artifact_run_id and only run tests"
default: false
artifact_run_id:
type: string
description: "If provided, the tests will run on this artifact ID"
default: ""
workflow_call:
inputs:
linux_amdgpu_families:
type: string
description: "Insert comma-separated list of Linux GPU families to build and test. ex: gfx94X, gfx1201X"
default: ""
linux_amdgpu_families_predefined_groups:
type: string
description: "Insert comma-separated list of predefined group(s) of Linux GPU families found in build_tools/github_actions/new_amdgpu_family_matrix.py (e.g. amdgpu_presubmit)"
default: ""
linux_use_prebuilt_artifacts:
type: boolean
description: "If enabled, the CI will pull Linux artifacts using artifact_run_id and only run tests"
default: false
windows_amdgpu_families:
type: string
description: "Insert comma-separated list of Windows GPU families to build and test. ex: gfx94X, gfx1201X"
default: ""
windows_amdgpu_families_predefined_groups:
type: string
description: "Insert comma-separated list of predefined group(s) of Windows GPU families found in build_tools/github_actions/new_amdgpu_family_matrix.py (e.g. amdgpu_presubmit)"
default: ""
windows_use_prebuilt_artifacts:
type: boolean
description: "If enabled, the CI will pull Windows artifacts using artifact_run_id and only run tests"
default: false
artifact_run_id:
type: string
description: "If provided, the tests will run on this artifact ID"
default: ""

permissions:
contents: read

concurrency:
# A PR number if a pull request and otherwise the commit hash. This cancels
# queued and in-progress runs for the same PR (presubmit) or commit
# (postsubmit). The workflow name is prepended to avoid conflicts between
# different workflows.
group: ${{ github.workflow }}-${{ github.event.number || github.sha }}
cancel-in-progress: true

jobs:
# setup:
# uses: ./.github/workflows/new_setup.yml
# with:
# # ci triggered via cron do not accept input values so we have to set them here
# linux_amdgpu_families: ${{ github.event_name == 'schedule' && 'gfx1151' || inputs.linux_amdgpu_families }}
# linux_amdgpu_families_predefined_groups: ${{ github.event_name == 'schedule' && '' || inputs.linux_amdgpu_families_predefined_groups }}
# linux_use_prebuilt_artifacts: ${{ github.event_name == 'schedule' && '' || inputs.linux_use_prebuilt_artifacts }}
# windows_amdgpu_families: ${{ github.event_name == 'schedule' && '' || inputs.windows_amdgpu_families }}
# windows_amdgpu_families_predefined_groups: ${{ github.event_name == 'schedule' && '' || inputs.windows_amdgpu_families_predefined_groups }}
# windows_use_prebuilt_artifacts: ${{ github.event_name == 'schedule' && '' || inputs.windows_use_prebuilt_artifacts }}
# artifact_run_id: ${{ github.event_name == 'schedule' && '' || inputs.artifact_run_id }}

setup_cmake4:
uses: ./.github/workflows/new_setup.yml
with:
# ci triggered via cron do not accept input values so we have to set them here
linux_amdgpu_families: ${{ github.event_name == 'schedule' && 'gfx1151' || inputs.linux_amdgpu_families }}
linux_amdgpu_families_predefined_groups: ${{ github.event_name == 'schedule' && '' || inputs.linux_amdgpu_families_predefined_groups }}
linux_use_prebuilt_artifacts: ${{ github.event_name == 'schedule' && '' || inputs.linux_use_prebuilt_artifacts }}
windows_amdgpu_families: ${{ github.event_name == 'schedule' && '' || inputs.windows_amdgpu_families }}
windows_amdgpu_families_predefined_groups: ${{ github.event_name == 'schedule' && '' || inputs.windows_amdgpu_families_predefined_groups }}
windows_use_prebuilt_artifacts: ${{ github.event_name == 'schedule' && '' || inputs.windows_use_prebuilt_artifacts }}
artifact_run_id: ${{ github.event_name == 'schedule' && '' || inputs.artifact_run_id }}
# ghcr.io/rocm/therock_build_manylinux_x86_64:users-lpromber-DockerCmake4
container_image_linux: "ghcr.io/rocm/therock_build_manylinux_x86_64@sha256:4a584d50cdcfa69ea0643f4b0065d30c68ed964a15c954fc2574ee31f4f849fc"
# we always want to build and not use artifacts
force_build: true

weekly_linux_cmake4_build_and_test:
name: 'Weekly: Linux CMake 4'
needs: setup_cmake4
if: >-
${{ fromJSON(needs.setup_cmake4.outputs.amdgpu_family_matrix).linux != '[]' }}
strategy:
fail-fast: false
matrix:
linux_config: ${{ fromJSON(needs.setup_cmake4.outputs.amdgpu_family_matrix).linux }}
uses: ./.github/workflows/new_ci_linux.yml
secrets: inherit
with:
amdgpu_family_config: ${{ toJSON(matrix.linux_config) }}
test_labels: ""
artifact_run_id: ${{ inputs.artifact_run_id }}
use_prebuilt_artifacts: ${{ inputs.linux_use_prebuilt_artifacts == true && 'true' || 'false' }}
rocm_package_version: ${{ needs.setup_cmake4.outputs.rocm_package_version }}
test_type: "smoke"
container_image: ${{ needs.setup_cmake4.outputs.container_image_linux }}
permissions:
contents: read
id-token: write

# windows_build_and_test:
# name: Windows
# needs: setup
# if: >-
# ${{
# needs.setup.outputs.windows_amdgpu_families != '[]' &&
# needs.setup.outputs.enable_build_jobs == 'true'
# }}
# strategy:
# fail-fast: false
# matrix:
# families: ${{ fromJSON(needs.setup.outputs.windows_amdgpu_families) }}
# uses: ./.github/workflows/ci_windows.yml
# with:
# amdgpu_families: ${{ matrix.families.family }}
# test_runs_on: ${{ matrix.families.test-runs-on }}
# artifact_run_id: ${{ inputs.artifact_run_id }}
# extra_cmake_options: ${{ matrix.extra_cmake_options }}
# expect_failure: ${{ matrix.families.expect_failure == true }}
# windows_use_prebuilt_artifacts: ${{ inputs.windows_use_prebuilt_artifacts == true && 'true' || 'false' }}
# permissions:
# contents: read
# id-token: write

# build_python_packages:
# name: Build Python Packages
# uses: ./.github/workflows/build_python_packages.yml
Loading
Loading