sgl-project · hnyls2002 · May 14, 2026 · May 14, 2026 · May 14, 2026 · May 14, 2026
@@ -0,0 +1,198 @@
+name: PR Test Stage
+# Reusable workflow that owns one CUDA test stage. Caller (pr-test.yml) passes
+# (self_name, runner_config, runs_on, partitions, ...) and the
+# per-commit/target_stage gating + matrix fanout + setup/run/teardown all live
+# here. Only stage-a-test-cpu still lives inline in pr-test.yml (bespoke
+# uv pip / protoc / rust-cache install path).
+
+on:
+  workflow_call:
+    inputs:
+      self_name:
+        description: 'Caller job key, used for $GITHUB_JOB-style gating + partitions[suite] lookup.'
+        type: string
+        required: true
+      runner_config:
+        description: 'Looked up in scripts/ci/runner_configs.yml for install script / artifact version / install timeout.'
+        type: string
+        required: true
+      runs_on:
+        description: 'Physical GHA runner label, e.g. "1-gpu-5090" or the b200_runner output from check-changes for B200 stages.'
+        type: string
+        required: true
+      target_stage:
+        description: 'Forwarded from pr-test.yml inputs.target_stage (used by /rerun-stage to skip everything except the targeted stage).'
+        type: string
+        default: ''
+      test_parallel_dispatch:
+        description: 'Forwarded from pr-test.yml inputs.test_parallel_dispatch.'
+        type: string
+        default: 'false'
+      partitions:
+        description: 'JSON from check-changes (size, arr, max_parallel per suite).'
+        type: string
+        required: true
+      main_package:
+        description: 'check-changes.outputs.main_package — gates the default per-commit run.'
+        type: string
+        required: true
+      sgl_kernel:
+        description: 'check-changes.outputs.sgl_kernel — both gates the run and forwarded to the install/download steps.'
+        type: string
+        required: true
+      continue_on_error_flag:
+        description: 'Empty or `--continue-on-error`; forwarded to run_suite.py.'
+        type: string
+        default: ''
+      run_timeout_minutes:
+        description: 'Per-suite wall-clock cap (minutes), enforced via bash `timeout` around run_suite.py.'
+        type: string
+        default: '30'
+      timeout_per_file:
+        description: 'Optional run_suite.py --timeout-per-file value.'
+        type: string
+        default: ''
+      warmup_deep_gemm_models:
+        description: 'Space-separated `model:gpus` list. Empty = skip DeepGEMM warmup.'
+        type: string
+        default: ''
+      warmup_server_models:
+        description: 'Space-separated `model:gpus` list. Empty = skip server-CUDA-graph warmup.'
+        type: string
+        default: ''
+      warmup_timeout_minutes:
+        description: 'Wall-clock cap for each warmup step (DeepGEMM + server). Default 25min; bump for stages with large cold-cache warmup model lists.'
+        type: string
+        default: '25'
+      extra_pytest_path:
+        description: 'Optional pytest path to run after the suite (stage-b-test-4-gpu-b200 uses this for FA4 jit_kernel tests). Empty = skip.'
+        type: string
+        default: ''
+      pr_head_sha:
+        description: 'Forwarded from pr-test.yml inputs.pr_head_sha (for /rerun-stage on fork PRs).'
+        type: string
+        default: ''
+      git_ref:
+        description: 'Forwarded from pr-test.yml inputs.git_ref (for workflow_call from main).'
+        type: string
+        default: ''
+      skip_stage_health_check:
+        description: 'Forwarded from pr-test.yml inputs.skip_stage_health_check (release branch cut bypass).'
+        type: boolean
+        default: false
+
+# Mirror pr-test.yml top-level env. Reusable workflows do NOT inherit caller's
+# workflow-level env across the workflow_call boundary, so anything pr-test.yml
+# defines must be redeclared here for the called job to see the same context.
+env:
+  SGLANG_IS_IN_CI: true
+  SGLANG_CUDA_COREDUMP: "1"
+  SGLANG_JIT_DEEPGEMM_FAST_WARMUP: true
+  SKIP_STAGE_HEALTH_CHECK: ${{ inputs.skip_stage_health_check && 'true' || 'false' }}
+  FORCE_REBUILD_DEEPEP: '1'
+  PR_TEST_BYPASS_MAINTENANCE_ON_MAIN: ${{ github.ref == 'refs/heads/main' && 'true' || 'false' }}
+  USE_VENV: false
+
+jobs:
+  run:
+    # Mirror the inline gating that used to live in pr-test.yml on every CUDA
+    # stage job. target_stage takes precedence; otherwise default per-commit
+    # gating runs the stage on schedule / parallel-dispatch / non-failed PR
+    # with main_package or sgl_kernel changes.
+    if: |
+      always() &&
+      (
+        (inputs.target_stage == inputs.self_name) ||
+        (
+          !inputs.target_stage &&
+          ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == 'true') || (!failure() && !cancelled())) &&
+          (inputs.main_package == 'true' || inputs.sgl_kernel == 'true')
+        )
+      )
+    runs-on: ${{ inputs.runs_on }}
+    timeout-minutes: 240
+    env:
+      # Only stage-c-test-8-gpu-h20 needs the RDMA device list. Empty for
+      # everyone else (env var unset, harmless).
+      SGLANG_CI_RDMA_ALL_DEVICES: ${{ inputs.runner_config == '8-gpu-h20' && 'mlx5_1,mlx5_2,mlx5_3,mlx5_4' || '' }}
+    strategy:
+      fail-fast: false
+      max-parallel: ${{ fromJson(inputs.partitions)[inputs.self_name].max_parallel }}
+      matrix:
+        partition: ${{ fromJson(inputs.partitions)[inputs.self_name].arr }}
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}
+
+      - name: Resolve runner_config
+        id: rc
+        run: python3 scripts/ci/runner_configs.py '${{ inputs.runner_config }}' >> "$GITHUB_OUTPUT"
+
+      - uses: ./.github/actions/check-stage-health
+
+      - uses: ./.github/actions/check-maintenance
+
+      - name: Download artifacts (v4)
+        if: ${{ inputs.sgl_kernel == 'true' && steps.rc.outputs.artifact_version == 'v4' }}
+        uses: actions/download-artifact@v4
+        with:
+          path: sgl-kernel/dist/
+          merge-multiple: true
+          pattern: wheel-python3.10-cuda*
+
+      - name: Download artifacts (v6)
+        if: ${{ inputs.sgl_kernel == 'true' && steps.rc.outputs.artifact_version == 'v6' }}
+        uses: actions/download-artifact@v6
+        with:
+          path: sgl-kernel/dist/
+          merge-multiple: true
+          pattern: wheel-python3.10-cuda*
+
+      - name: Install dependencies
+        timeout-minutes: ${{ fromJson(steps.rc.outputs.install_timeout) }}
+        run: |
+          CUSTOM_BUILD_SGL_KERNEL=${{ inputs.sgl_kernel }} bash ${{ steps.rc.outputs.install }}
+
+      - name: Warmup DeepGEMM JIT Compilation
+        if: inputs.warmup_deep_gemm_models != ''
+        timeout-minutes: ${{ fromJson(inputs.warmup_timeout_minutes) }}
+        run: |
+          # Activate venv if available (GITHUB_ENV may have failed to propagate)
+          [ -f "${SGLANG_CI_VENV_PATH:-/dev/null}/bin/activate" ] && source "${SGLANG_CI_VENV_PATH}/bin/activate"
+          [ -f "${SGLANG_CI_VENV_PATH:-/dev/null}/env.sh" ] && source "${SGLANG_CI_VENV_PATH}/env.sh"
+          python3 scripts/ci/cuda/warmup_deep_gemm.py ${{ inputs.warmup_deep_gemm_models }}
+
+      - name: Warmup Server CUDA Graphs
+        if: inputs.warmup_server_models != ''
+        timeout-minutes: ${{ fromJson(inputs.warmup_timeout_minutes) }}
+        run: |
+          [ -f "${SGLANG_CI_VENV_PATH:-/dev/null}/bin/activate" ] && source "${SGLANG_CI_VENV_PATH}/bin/activate"
+          [ -f "${SGLANG_CI_VENV_PATH:-/dev/null}/env.sh" ] && source "${SGLANG_CI_VENV_PATH}/env.sh"
+          python3 scripts/ci/cuda/warmup_server.py ${{ inputs.warmup_server_models }}
+
+      - name: Run test
+        timeout-minutes: ${{ fromJson(inputs.run_timeout_minutes) }}
+        env:
+          CONTINUE_ON_ERROR_FLAG: ${{ inputs.continue_on_error_flag }}
+        run: |
+          cd test
+          python3 run_suite.py --hw cuda --suite ${{ inputs.self_name }} \
+            --auto-partition-id ${{ matrix.partition }} \
+            --auto-partition-size ${{ fromJson(inputs.partitions)[inputs.self_name].size }} \
+            ${{ inputs.timeout_per_file && format('--timeout-per-file {0}', inputs.timeout_per_file) || '' }} \
+            $CONTINUE_ON_ERROR_FLAG
+
+      - name: Run extra pytest
+        if: inputs.extra_pytest_path != ''
+        timeout-minutes: 10
+        run: python3 -m pytest -q ${{ inputs.extra_pytest_path }}
+
+      - uses: ./.github/actions/upload-cuda-coredumps
+        if: failure()
+        with:
+          artifact-suffix: ${{ matrix.partition }}
+
+      - name: Cleanup venv
+        if: always()
+        run: bash scripts/ci/cuda/ci_cleanup_venv.sh