Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
ab0b1e5
extract stage actions; runner_config mapping
hnyls2002 May 14, 2026
67370be
run-test action derives partition from matrix + json
hnyls2002 May 14, 2026
8419a20
setup action: derive artifact_version + install_timeout from runner_c…
hnyls2002 May 14, 2026
944edcf
runner_configs: yml data + python wrapper
hnyls2002 May 14, 2026
45851a4
fix bootstrap: caller-level checkout before composite action
hnyls2002 May 14, 2026
fe63f31
simplify runner_configs.py
hnyls2002 May 14, 2026
3dfc806
extract stage into reusable workflow
hnyls2002 May 14, 2026
72347a2
inline setup/run/teardown into _pr-test-stage.yml
hnyls2002 May 14, 2026
c6f9609
remove dead code
hnyls2002 May 14, 2026
895d927
Merge branch 'main' into lsyin/ci-extract-stage-actions
hnyls2002 May 14, 2026
0cdfaef
fix: strip ${{}} from runs_on description
hnyls2002 May 14, 2026
8dedc2e
fix: caller if not failure to allow skipped needs
hnyls2002 May 14, 2026
d59d146
Merge branch 'main' into lsyin/ci-extract-stage-actions
hnyls2002 May 14, 2026
63d609e
ci: B200 conditional split + LPT_SLOP removal (stage-c partition 8→3)…
alisonshao May 14, 2026
a2081a5
forward pr_head_sha/git_ref/skip_stage_health_check + mirror env block
hnyls2002 May 14, 2026
01a7b88
Merge branch 'main' into lsyin/ci-extract-stage-actions
alisonshao May 14, 2026
eb36149
update stale docstrings in runner_configs
hnyls2002 May 14, 2026
3ba6cae
wait-for-jobs: match reusable-workflow matrix shape
hnyls2002 May 14, 2026
be410e1
Revert "wait-for-jobs: match reusable-workflow matrix shape"
hnyls2002 May 14, 2026
991eba6
Merge branch 'main' into lsyin/ci-extract-stage-actions
hnyls2002 May 14, 2026
5df32a9
port #24253 warmup updates to caller stubs
hnyls2002 May 14, 2026
a3abfb6
Merge remote-tracking branch 'origin/main' into lsyin/ci-extract-stag…
hnyls2002 May 14, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
198 changes: 198 additions & 0 deletions .github/workflows/_pr-test-stage.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
name: PR Test Stage
# Reusable workflow that owns one CUDA test stage. Caller (pr-test.yml) passes
# (self_name, runner_config, runs_on, partitions, ...) and the
# per-commit/target_stage gating + matrix fanout + setup/run/teardown all live
# here. Only stage-a-test-cpu still lives inline in pr-test.yml (bespoke
# uv pip / protoc / rust-cache install path).

on:
workflow_call:
inputs:
self_name:
description: 'Caller job key, used for $GITHUB_JOB-style gating + partitions[suite] lookup.'
type: string
required: true
runner_config:
description: 'Looked up in scripts/ci/runner_configs.yml for install script / artifact version / install timeout.'
type: string
required: true
runs_on:
description: 'Physical GHA runner label, e.g. "1-gpu-5090" or the b200_runner output from check-changes for B200 stages.'
type: string
required: true
target_stage:
description: 'Forwarded from pr-test.yml inputs.target_stage (used by /rerun-stage to skip everything except the targeted stage).'
type: string
default: ''
test_parallel_dispatch:
description: 'Forwarded from pr-test.yml inputs.test_parallel_dispatch.'
type: string
default: 'false'
partitions:
description: 'JSON from check-changes (size, arr, max_parallel per suite).'
type: string
required: true
main_package:
description: 'check-changes.outputs.main_package — gates the default per-commit run.'
type: string
required: true
sgl_kernel:
description: 'check-changes.outputs.sgl_kernel — both gates the run and forwarded to the install/download steps.'
type: string
required: true
continue_on_error_flag:
description: 'Empty or `--continue-on-error`; forwarded to run_suite.py.'
type: string
default: ''
run_timeout_minutes:
description: 'Per-suite wall-clock cap (minutes), enforced via bash `timeout` around run_suite.py.'
type: string
default: '30'
timeout_per_file:
description: 'Optional run_suite.py --timeout-per-file value.'
type: string
default: ''
warmup_deep_gemm_models:
description: 'Space-separated `model:gpus` list. Empty = skip DeepGEMM warmup.'
type: string
default: ''
warmup_server_models:
description: 'Space-separated `model:gpus` list. Empty = skip server-CUDA-graph warmup.'
type: string
default: ''
warmup_timeout_minutes:
description: 'Wall-clock cap for each warmup step (DeepGEMM + server). Default 25min; bump for stages with large cold-cache warmup model lists.'
type: string
default: '25'
extra_pytest_path:
description: 'Optional pytest path to run after the suite (stage-b-test-4-gpu-b200 uses this for FA4 jit_kernel tests). Empty = skip.'
type: string
default: ''
pr_head_sha:
description: 'Forwarded from pr-test.yml inputs.pr_head_sha (for /rerun-stage on fork PRs).'
type: string
default: ''
git_ref:
description: 'Forwarded from pr-test.yml inputs.git_ref (for workflow_call from main).'
type: string
default: ''
skip_stage_health_check:
description: 'Forwarded from pr-test.yml inputs.skip_stage_health_check (release branch cut bypass).'
type: boolean
default: false

# Mirror pr-test.yml top-level env. Reusable workflows do NOT inherit caller's
# workflow-level env across the workflow_call boundary, so anything pr-test.yml
# defines must be redeclared here for the called job to see the same context.
env:
SGLANG_IS_IN_CI: true
SGLANG_CUDA_COREDUMP: "1"
SGLANG_JIT_DEEPGEMM_FAST_WARMUP: true
SKIP_STAGE_HEALTH_CHECK: ${{ inputs.skip_stage_health_check && 'true' || 'false' }}
FORCE_REBUILD_DEEPEP: '1'
PR_TEST_BYPASS_MAINTENANCE_ON_MAIN: ${{ github.ref == 'refs/heads/main' && 'true' || 'false' }}
USE_VENV: false

jobs:
run:
# Mirror the inline gating that used to live in pr-test.yml on every CUDA
# stage job. target_stage takes precedence; otherwise default per-commit
# gating runs the stage on schedule / parallel-dispatch / non-failed PR
# with main_package or sgl_kernel changes.
if: |
always() &&
(
(inputs.target_stage == inputs.self_name) ||
(
!inputs.target_stage &&
((github.event_name == 'schedule' || inputs.test_parallel_dispatch == 'true') || (!failure() && !cancelled())) &&
(inputs.main_package == 'true' || inputs.sgl_kernel == 'true')
)
)
runs-on: ${{ inputs.runs_on }}
timeout-minutes: 240
env:
# Only stage-c-test-8-gpu-h20 needs the RDMA device list. Empty for
# everyone else (env var unset, harmless).
SGLANG_CI_RDMA_ALL_DEVICES: ${{ inputs.runner_config == '8-gpu-h20' && 'mlx5_1,mlx5_2,mlx5_3,mlx5_4' || '' }}
strategy:
fail-fast: false
max-parallel: ${{ fromJson(inputs.partitions)[inputs.self_name].max_parallel }}
matrix:
partition: ${{ fromJson(inputs.partitions)[inputs.self_name].arr }}
steps:
- uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}

- name: Resolve runner_config
id: rc
run: python3 scripts/ci/runner_configs.py '${{ inputs.runner_config }}' >> "$GITHUB_OUTPUT"

- uses: ./.github/actions/check-stage-health

- uses: ./.github/actions/check-maintenance

- name: Download artifacts (v4)
if: ${{ inputs.sgl_kernel == 'true' && steps.rc.outputs.artifact_version == 'v4' }}
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda*

- name: Download artifacts (v6)
if: ${{ inputs.sgl_kernel == 'true' && steps.rc.outputs.artifact_version == 'v6' }}
uses: actions/download-artifact@v6
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda*

- name: Install dependencies
timeout-minutes: ${{ fromJson(steps.rc.outputs.install_timeout) }}
run: |
CUSTOM_BUILD_SGL_KERNEL=${{ inputs.sgl_kernel }} bash ${{ steps.rc.outputs.install }}

- name: Warmup DeepGEMM JIT Compilation
if: inputs.warmup_deep_gemm_models != ''
timeout-minutes: ${{ fromJson(inputs.warmup_timeout_minutes) }}
run: |
# Activate venv if available (GITHUB_ENV may have failed to propagate)
[ -f "${SGLANG_CI_VENV_PATH:-/dev/null}/bin/activate" ] && source "${SGLANG_CI_VENV_PATH}/bin/activate"
[ -f "${SGLANG_CI_VENV_PATH:-/dev/null}/env.sh" ] && source "${SGLANG_CI_VENV_PATH}/env.sh"
python3 scripts/ci/cuda/warmup_deep_gemm.py ${{ inputs.warmup_deep_gemm_models }}

- name: Warmup Server CUDA Graphs
if: inputs.warmup_server_models != ''
timeout-minutes: ${{ fromJson(inputs.warmup_timeout_minutes) }}
run: |
[ -f "${SGLANG_CI_VENV_PATH:-/dev/null}/bin/activate" ] && source "${SGLANG_CI_VENV_PATH}/bin/activate"
[ -f "${SGLANG_CI_VENV_PATH:-/dev/null}/env.sh" ] && source "${SGLANG_CI_VENV_PATH}/env.sh"
python3 scripts/ci/cuda/warmup_server.py ${{ inputs.warmup_server_models }}

- name: Run test
timeout-minutes: ${{ fromJson(inputs.run_timeout_minutes) }}
env:
CONTINUE_ON_ERROR_FLAG: ${{ inputs.continue_on_error_flag }}
run: |
cd test
python3 run_suite.py --hw cuda --suite ${{ inputs.self_name }} \
--auto-partition-id ${{ matrix.partition }} \
--auto-partition-size ${{ fromJson(inputs.partitions)[inputs.self_name].size }} \
${{ inputs.timeout_per_file && format('--timeout-per-file {0}', inputs.timeout_per_file) || '' }} \
$CONTINUE_ON_ERROR_FLAG

- name: Run extra pytest
if: inputs.extra_pytest_path != ''
timeout-minutes: 10
run: python3 -m pytest -q ${{ inputs.extra_pytest_path }}

- uses: ./.github/actions/upload-cuda-coredumps
if: failure()
with:
artifact-suffix: ${{ matrix.partition }}

- name: Cleanup venv
if: always()
run: bash scripts/ci/cuda/ci_cleanup_venv.sh
Loading
Loading