Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
7b18bf6
Migrate performance, accuracy, and quantization tests to CI registry
alisonshao Jan 15, 2026
e4bde31
Merge branch 'main' into ci/migrate-perf-accuracy-quant-tests
Kangyan-Zhou Jan 15, 2026
c27b60b
Move bench_serving tests to test/registered/perf/
alisonshao Jan 15, 2026
a95fdef
Migrate quantization and accuracy tests to test/registered/
alisonshao Jan 15, 2026
71c9af8
Remove quantization-test job from workflow
alisonshao Jan 15, 2026
9c79802
Remove performance and accuracy jobs, migrate test_bench_one_batch
alisonshao Jan 16, 2026
24d0d6a
Add separate performance and accuracy workflow jobs
alisonshao Jan 16, 2026
31d1478
Update AMD workflow to use new test locations
alisonshao Jan 16, 2026
4f8eed5
Add 1-hour job timeout and move test_bench_serving_1gpu.py to large r…
alisonshao Jan 16, 2026
4909c40
Merge branch 'main' into ci/migrate-perf-accuracy-quant-tests
alisonshao Jan 16, 2026
1abb383
Fix small-1-gpu performance/accuracy jobs to use 5090 runners
alisonshao Jan 16, 2026
5248623
Move test_bench_one_batch_1gpu.py to large-1-gpu-performance suite
alisonshao Jan 16, 2026
d64db41
Add VLM performance tests for 5090 GPU
alisonshao Jan 16, 2026
b70b03b
Move failing quantization tests from small-1-gpu to large-1-gpu suite
alisonshao Jan 16, 2026
e06dcab
Fix runner label for stage-b-test-large-1-gpu-performance
alisonshao Jan 16, 2026
cfcb601
Move test_eagle_infer_beta.py to large-1-gpu suite
alisonshao Jan 16, 2026
8ac2029
Move test_quantization.py to large-1-gpu suite
alisonshao Jan 16, 2026
065c581
Revert test_eagle_infer_beta.py back to small-1-gpu suite
alisonshao Jan 16, 2026
daaf60d
Increase large-1-gpu-performance timeout and add suites to registry
alisonshao Jan 17, 2026
847598e
Increase timeout for large-1-gpu-performance tests
alisonshao Jan 17, 2026
c9f9299
Merge branch 'main' into ci/migrate-perf-accuracy-quant-tests
alisonshao Jan 17, 2026
b777082
Split test_bench_serving_1gpu.py into 2 parts and add 2 partitions
alisonshao Jan 17, 2026
920c727
Fix AMD workflow paths for renamed test files
alisonshao Jan 17, 2026
7733b62
Merge branch 'main' into ci/migrate-perf-accuracy-quant-tests
alisonshao Jan 17, 2026
3e90af3
Merge branch 'main' into ci/migrate-perf-accuracy-quant-tests
alisonshao Jan 17, 2026
4a3d1a6
Merge branch 'main' into ci/migrate-perf-accuracy-quant-tests
alisonshao Jan 18, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 16 additions & 16 deletions .github/workflows/pr-test-amd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -713,23 +713,23 @@ jobs:
- name: Benchmark single latency
timeout-minutes: 20
run: |
bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_bs1_small
bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_bs1_default
bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test/registered/perf python3 -m unittest test_bench_one_batch_1gpu.TestBenchOneBatch1GPU.test_bs1_small
bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test/registered/perf python3 -m unittest test_bench_one_batch_1gpu.TestBenchOneBatch1GPU.test_bs1_default

- name: Benchmark online latency
timeout-minutes: 15
run: |
bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_online_latency_default
bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test/registered/perf python3 -m unittest test_bench_serving_1gpu_part1.TestBenchServing1GPUPart1.test_online_latency_default

- name: Benchmark offline throughput
timeout-minutes: 15
run: |
bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_default
bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test/registered/perf python3 -m unittest test_bench_serving_1gpu_part1.TestBenchServing1GPUPart1.test_offline_throughput_default

- name: Benchmark offline throughput (Non-streaming, small batch size)
timeout-minutes: 15
run: |
bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_non_stream_small_batch_size
bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test/registered/perf python3 -m unittest test_bench_serving_1gpu_part1.TestBenchServing1GPUPart1.test_offline_throughput_non_stream_small_batch_size

performance-test-1-gpu-part-2-amd:
needs: [check-changes, stage-a-test-1-amd]
Expand Down Expand Up @@ -768,17 +768,17 @@ jobs:
- name: Benchmark offline throughput (w/o RadixAttention)
timeout-minutes: 15
run: |
bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_without_radix_cache
bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test/registered/perf python3 -m unittest test_bench_serving_1gpu_part1.TestBenchServing1GPUPart1.test_offline_throughput_without_radix_cache

- name: Benchmark offline throughput (w/ Triton)
timeout-minutes: 15
run: |
bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_with_triton_attention_backend
bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test/registered/perf python3 -m unittest test_bench_serving_1gpu_part1.TestBenchServing1GPUPart1.test_offline_throughput_with_triton_attention_backend

- name: Benchmark offline throughput (w/ FP8)
timeout-minutes: 15
run: |
bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_default_fp8
bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test/registered/perf python3 -m unittest test_bench_serving_1gpu_large.TestBenchServing1GPULarge.test_offline_throughput_default_fp8

performance-test-2-gpu-amd:
needs: [check-changes, stage-a-test-1-amd]
Expand Down Expand Up @@ -822,32 +822,32 @@ jobs:
- name: Benchmark single latency (TP=2)
timeout-minutes: 25
run: |
bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_tp2_bs1
bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test/registered/perf python3 -m unittest test_bench_one_batch_2gpu.TestBenchOneBatch2GPU.test_moe_tp2_bs1

- name: Benchmark single latency + torch.compile (TP=2)
timeout-minutes: 25
run: |
bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_torch_compile_tp2_bs1
bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test/registered/perf python3 -m unittest test_bench_one_batch_2gpu.TestBenchOneBatch2GPU.test_torch_compile_tp2_bs1

- name: Benchmark offline throughput (TP=2)
timeout-minutes: 25
run: |
bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_moe_offline_throughput_default
bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test/registered/perf python3 -m unittest test_bench_serving_2gpu.TestBenchServing2GPU.test_moe_offline_throughput_default

- name: Benchmark offline throughput (w/o RadixAttention) (TP=2)
timeout-minutes: 25
run: |
bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_moe_offline_throughput_without_radix_cache
bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test/registered/perf python3 -m unittest test_bench_serving_2gpu.TestBenchServing2GPU.test_moe_offline_throughput_without_radix_cache

- name: Benchmark offline PP decode throughput (PP=2)
timeout-minutes: 10
run: |
bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_pp_offline_throughput_default_decode
bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test/registered/perf python3 -m unittest test_bench_serving_2gpu.TestBenchServing2GPU.test_pp_offline_throughput_default_decode

- name: Benchmark offline PP prefill throughput (PP=2)
timeout-minutes: 10
run: |
bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_pp_long_context_prefill
bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test/registered/perf python3 -m unittest test_bench_serving_2gpu.TestBenchServing2GPU.test_pp_long_context_prefill

accuracy-test-1-gpu-amd:
needs: [check-changes, stage-a-test-1-amd]
Expand Down Expand Up @@ -886,7 +886,7 @@ jobs:
- name: Evaluate Accuracy
timeout-minutes: 30
run: |
bash scripts/ci/amd_ci_exec.sh -e SGLANG_USE_AITER=0 python3 test_eval_accuracy_large.py
bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test/registered/eval -e SGLANG_USE_AITER=0 python3 test_eval_accuracy_large.py

accuracy-test-2-gpu-amd:
needs: [check-changes, accuracy-test-1-gpu-amd]
Expand Down Expand Up @@ -926,7 +926,7 @@ jobs:
- name: Evaluate accuracy (TP=2)
timeout-minutes: 30
run: |
bash scripts/ci/amd_ci_exec.sh -e SGLANG_USE_AITER_AR=0 -e SGLANG_USE_AITER=0 -e HF_HUB_ENABLE_HF_TRANSFER=0 python3 test_moe_eval_accuracy_large.py
bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test/registered/eval -e SGLANG_USE_AITER_AR=0 -e SGLANG_USE_AITER=0 -e HF_HUB_ENABLE_HF_TRANSFER=0 python3 test_moe_eval_accuracy_large.py

pr-test-amd-finish:
needs:
Expand Down
Loading
Loading