Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
269 changes: 12 additions & 257 deletions .github/workflows/pr-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -274,19 +274,14 @@ jobs:
const pollIntervalSeconds = 120; // 2 minutes to reduce GH API calls
const maxAttempts = (maxWaitMinutes * 60) / pollIntervalSeconds;

// Stage-b jobs to wait for (all stage-b tests including performance and accuracy)
// Stage-b jobs to wait for
const stageBJobs = [
{ prefix: 'stage-b-test-small-1-gpu', expectedCount: 8 }, // partitions 0-7
{ prefix: 'stage-b-test-large-1-gpu', expectedCount: 12 }, // partitions 0-11
{ prefix: 'stage-b-test-large-2-gpu', expectedCount: 2 }, // partitions 0-1
{ prefix: 'stage-b-test-large-1-gpu', expectedCount: 14 }, // partitions 0-13
{ prefix: 'stage-b-test-large-2-gpu', expectedCount: 4 }, // partitions 0-3
{ prefix: 'stage-b-test-4-gpu-b200', expectedCount: 1 },
{ prefix: 'stage-b-test-small-1-gpu-performance', expectedCount: 1 },
{ prefix: 'stage-b-test-large-1-gpu-performance', expectedCount: 2 }, // partitions 0-1
{ prefix: 'stage-b-test-large-2-gpu-performance', expectedCount: 1 },
{ prefix: 'stage-b-test-small-1-gpu-accuracy', expectedCount: 1 },
{ prefix: 'stage-b-test-large-2-gpu-accuracy', expectedCount: 1 }
];
const totalExpectedJobs = stageBJobs.reduce((sum, j) => sum + j.expectedCount, 0); // 29 total
const totalExpectedJobs = stageBJobs.reduce((sum, j) => sum + j.expectedCount, 0); // 27 total

// Helper to match job names exactly (prefix alone or prefix + " (N)" for matrix jobs)
const matchesPrefix = (jobName, prefix) => {
Expand Down Expand Up @@ -841,6 +836,9 @@ jobs:
run: |
source /etc/profile.d/sglang-ci.sh
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh
git clone https://github.com/merrymercy/human-eval.git
cd human-eval
pip install -e .

- name: Run test
timeout-minutes: 30
Expand Down Expand Up @@ -874,7 +872,7 @@ jobs:
fail-fast: false
max-parallel: ${{ fromJson(needs.check-changes.outputs.max_parallel) }}
matrix:
partition: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
partition: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
steps:
- name: Checkout code
uses: actions/checkout@v4
Expand Down Expand Up @@ -902,7 +900,7 @@ jobs:
if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then
CONTINUE_ON_ERROR_FLAG="--continue-on-error"
fi
python3 run_suite.py --hw cuda --suite stage-b-test-large-1-gpu --auto-partition-id ${{ matrix.partition }} --auto-partition-size 12 $CONTINUE_ON_ERROR_FLAG
python3 run_suite.py --hw cuda --suite stage-b-test-large-1-gpu --auto-partition-id ${{ matrix.partition }} --auto-partition-size 14 --timeout-per-file 1800 $CONTINUE_ON_ERROR_FLAG

stage-b-test-large-2-gpu:
needs: [check-changes, call-gate, wait-for-stage-a, sgl-kernel-build-wheels]
Expand All @@ -923,245 +921,7 @@ jobs:
strategy:
fail-fast: false
matrix:
partition: [0, 1]
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}

- name: Download artifacts
if: needs.check-changes.outputs.sgl_kernel == 'true'
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9

- name: Install dependencies
timeout-minutes: 10
run: |
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh

- name: Run test
timeout-minutes: 30
run: |
cd test/
CONTINUE_ON_ERROR_FLAG=""
if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then
CONTINUE_ON_ERROR_FLAG="--continue-on-error"
fi
python3 run_suite.py --hw cuda --suite stage-b-test-large-2-gpu --auto-partition-id ${{ matrix.partition }} --auto-partition-size 2 $CONTINUE_ON_ERROR_FLAG

stage-b-test-small-1-gpu-performance:
needs: [check-changes, call-gate, wait-for-stage-a, sgl-kernel-build-wheels]
if: |
always() &&
(
(inputs.target_stage == 'stage-b-test-small-1-gpu-performance') ||
(
!inputs.target_stage &&
((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
runs-on: 1-gpu-5090
timeout-minutes: 240
env:
RUNNER_LABELS: 1-gpu-5090
IS_BLACKWELL: "1"
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}

- name: Download artifacts
if: needs.check-changes.outputs.sgl_kernel == 'true'
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9

- name: Install dependencies
timeout-minutes: 10
run: |
source /etc/profile.d/sglang-ci.sh
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh

- name: Run test
timeout-minutes: 30
run: |
source /etc/profile.d/sglang-ci.sh
cd test/
CONTINUE_ON_ERROR_FLAG=""
if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then
CONTINUE_ON_ERROR_FLAG="--continue-on-error"
fi
python3 run_suite.py --hw cuda --suite stage-b-test-small-1-gpu-performance $CONTINUE_ON_ERROR_FLAG

stage-b-test-large-1-gpu-performance:
needs: [check-changes, call-gate, wait-for-stage-a, sgl-kernel-build-wheels]
if: |
always() &&
(
(inputs.target_stage == 'stage-b-test-large-1-gpu-performance') ||
(
!inputs.target_stage &&
((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
runs-on: 1-gpu-runner
timeout-minutes: 240
env:
RUNNER_LABELS: 1-gpu-runner
strategy:
fail-fast: false
matrix:
partition: [0, 1]
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}

- name: Download artifacts
if: needs.check-changes.outputs.sgl_kernel == 'true'
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9

- name: Install dependencies
timeout-minutes: 10
run: |
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh

- name: Run test
timeout-minutes: 40
run: |
cd test/
CONTINUE_ON_ERROR_FLAG=""
if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then
CONTINUE_ON_ERROR_FLAG="--continue-on-error"
fi
python3 run_suite.py --hw cuda --suite stage-b-test-large-1-gpu-performance --auto-partition-id ${{ matrix.partition }} --auto-partition-size 2 --timeout-per-file 1800 $CONTINUE_ON_ERROR_FLAG

stage-b-test-large-2-gpu-performance:
needs: [check-changes, call-gate, wait-for-stage-a, sgl-kernel-build-wheels]
if: |
always() &&
(
(inputs.target_stage == 'stage-b-test-large-2-gpu-performance') ||
(
!inputs.target_stage &&
((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
runs-on: 2-gpu-runner
timeout-minutes: 240
env:
RUNNER_LABELS: 2-gpu-runner
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}

- name: Download artifacts
if: needs.check-changes.outputs.sgl_kernel == 'true'
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9

- name: Install dependencies
timeout-minutes: 10
run: |
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh

- name: Run test
timeout-minutes: 30
run: |
cd test/
CONTINUE_ON_ERROR_FLAG=""
if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then
CONTINUE_ON_ERROR_FLAG="--continue-on-error"
fi
python3 run_suite.py --hw cuda --suite stage-b-test-large-2-gpu-performance $CONTINUE_ON_ERROR_FLAG

stage-b-test-small-1-gpu-accuracy:
needs: [check-changes, call-gate, wait-for-stage-a, sgl-kernel-build-wheels]
if: |
always() &&
(
(inputs.target_stage == 'stage-b-test-small-1-gpu-accuracy') ||
(
!inputs.target_stage &&
((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
runs-on: 1-gpu-5090
timeout-minutes: 240
env:
RUNNER_LABELS: 1-gpu-5090
IS_BLACKWELL: "1"
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}

- name: Download artifacts
if: needs.check-changes.outputs.sgl_kernel == 'true'
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9

- name: Install dependencies
timeout-minutes: 10
run: |
source /etc/profile.d/sglang-ci.sh
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh
git clone https://github.com/merrymercy/human-eval.git
cd human-eval
pip install -e .

- name: Run test
timeout-minutes: 25
run: |
source /etc/profile.d/sglang-ci.sh
cd test/
CONTINUE_ON_ERROR_FLAG=""
if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then
CONTINUE_ON_ERROR_FLAG="--continue-on-error"
fi
python3 run_suite.py --hw cuda --suite stage-b-test-small-1-gpu-accuracy $CONTINUE_ON_ERROR_FLAG

stage-b-test-large-2-gpu-accuracy:
needs: [check-changes, call-gate, wait-for-stage-a, sgl-kernel-build-wheels]
if: |
always() &&
(
(inputs.target_stage == 'stage-b-test-large-2-gpu-accuracy') ||
(
!inputs.target_stage &&
((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
runs-on: 2-gpu-runner
timeout-minutes: 240
env:
RUNNER_LABELS: 2-gpu-runner
partition: [0, 1, 2, 3]
steps:
- name: Checkout code
uses: actions/checkout@v4
Expand All @@ -1185,14 +945,14 @@ jobs:
pip install -e .

- name: Run test
timeout-minutes: 25
timeout-minutes: 30
run: |
cd test/
CONTINUE_ON_ERROR_FLAG=""
if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then
CONTINUE_ON_ERROR_FLAG="--continue-on-error"
fi
python3 run_suite.py --hw cuda --suite stage-b-test-large-2-gpu-accuracy $CONTINUE_ON_ERROR_FLAG
python3 run_suite.py --hw cuda --suite stage-b-test-large-2-gpu --auto-partition-id ${{ matrix.partition }} --auto-partition-size 4 $CONTINUE_ON_ERROR_FLAG

stage-b-test-4-gpu-b200:
needs: [check-changes, call-gate, wait-for-stage-a, sgl-kernel-build-wheels]
Expand Down Expand Up @@ -1829,11 +1589,6 @@ jobs:
stage-b-test-small-1-gpu,
stage-b-test-large-1-gpu,
stage-b-test-large-2-gpu,
stage-b-test-small-1-gpu-performance,
stage-b-test-large-1-gpu-performance,
stage-b-test-large-2-gpu-performance,
stage-b-test-small-1-gpu-accuracy,
stage-b-test-large-2-gpu-accuracy,
stage-c-test-large-4-gpu,
stage-b-test-4-gpu-b200,
unit-test-backend-4-gpu,
Expand Down
10 changes: 10 additions & 0 deletions python/sglang/test/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1495,6 +1495,10 @@ def run_bench_one_batch(model, other_args):
command += ["--model-path", model]
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)

prefill_latency = None
decode_throughput = None
decode_latency = None

try:
stdout, stderr = process.communicate()
output = stdout.decode(errors="backslashreplace")
Expand All @@ -1517,6 +1521,12 @@ def run_bench_one_batch(model, other_args):
finally:
kill_process_tree(process.pid)

if prefill_latency is None or decode_throughput is None or decode_latency is None:
raise RuntimeError(
f"Failed to parse benchmark output. "
f"prefill_latency={prefill_latency}, decode_throughput={decode_throughput}, decode_latency={decode_latency}"
)

return prefill_latency, decode_throughput, decode_latency


Expand Down
4 changes: 2 additions & 2 deletions test/registered/eval/test_eval_accuracy_large.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@
write_github_step_summary,
)

register_cuda_ci(est_time=300, suite="stage-b-test-small-1-gpu-accuracy")
register_amd_ci(est_time=300, suite="stage-b-test-small-1-gpu-accuracy-amd")
register_cuda_ci(est_time=300, suite="stage-b-test-small-1-gpu")
register_amd_ci(est_time=300, suite="stage-b-test-small-1-gpu-amd")


class TestEvalAccuracyLarge(CustomTestCase):
Expand Down
4 changes: 2 additions & 2 deletions test/registered/eval/test_moe_eval_accuracy_large.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@
write_github_step_summary,
)

register_cuda_ci(est_time=500, suite="stage-b-test-large-2-gpu-accuracy")
register_amd_ci(est_time=500, suite="stage-b-test-large-2-gpu-accuracy-amd")
register_cuda_ci(est_time=500, suite="stage-b-test-large-2-gpu")
register_amd_ci(est_time=500, suite="stage-b-test-large-2-gpu-amd")


class TestMoEEvalAccuracyLarge(CustomTestCase):
Expand Down
4 changes: 2 additions & 2 deletions test/registered/perf/test_bench_one_batch_1gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
write_github_step_summary,
)

register_cuda_ci(est_time=120, suite="stage-b-test-large-1-gpu-performance")
register_amd_ci(est_time=120, suite="stage-b-test-large-1-gpu-performance-amd")
register_cuda_ci(est_time=120, suite="stage-b-test-large-1-gpu")
register_amd_ci(est_time=120, suite="stage-b-test-large-1-gpu-amd")


class TestBenchOneBatch1GPU(CustomTestCase):
Expand Down
4 changes: 2 additions & 2 deletions test/registered/perf/test_bench_one_batch_2gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
write_github_step_summary,
)

register_cuda_ci(est_time=180, suite="stage-b-test-large-2-gpu-performance")
register_amd_ci(est_time=630, suite="stage-b-test-large-2-gpu-performance-amd")
register_cuda_ci(est_time=180, suite="stage-b-test-large-2-gpu")
register_amd_ci(est_time=630, suite="stage-b-test-large-2-gpu-amd")


class TestBenchOneBatch2GPU(CustomTestCase):
Expand Down
Loading
Loading