Skip to content
129 changes: 44 additions & 85 deletions .github/workflows/pr-test-amd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,46 @@ jobs:
- name: Run test
timeout-minutes: 30
run: |
bash scripts/ci/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-b-test-small-1-gpu --auto-partition-id ${{ matrix.part }} --auto-partition-size 12
bash scripts/ci/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-b-test-small-1-gpu-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 12

stage-b-test-small-1-gpu-amd-mi35x:
needs: [check-changes, stage-a-test-1-amd]
if: |
always() &&
(
(inputs.target_stage == 'stage-b-test-small-1-gpu-amd-mi35x') ||
(
!inputs.target_stage &&
(!failure() && !cancelled()) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
strategy:
fail-fast: false
matrix:
runner: [linux-mi35x-gpu-1]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}

- name: Ensure VRAM is clear
run: bash scripts/ensure_vram_clear.sh rocm

- name: Start CI container
run: bash scripts/ci/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}

- name: Install dependencies
run: bash scripts/ci/amd_ci_install_dependency.sh

- name: Run test
timeout-minutes: 30
run: |
bash scripts/ci/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-b-test-small-1-gpu-amd-mi35x

stage-b-test-large-2-gpu-amd:
needs: [check-changes, stage-a-test-1-amd]
Expand Down Expand Up @@ -545,87 +584,8 @@ jobs:
run: |
bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 2

unit-test-backend-1-gpu-amd-mi35x:
needs: [check-changes, stage-a-test-1-amd]
if: |
always() &&
(
(inputs.target_stage == 'unit-test-backend-1-gpu-amd-mi35x') ||
(
!inputs.target_stage &&
(!failure() && !cancelled()) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
strategy:
fail-fast: false
matrix:
runner: [linux-mi35x-gpu-1]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}

- name: Ensure VRAM is clear
run: bash scripts/ensure_vram_clear.sh rocm

- name: Start CI container
run: bash scripts/ci/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}

- name: Install dependencies
run: bash scripts/ci/amd_ci_install_dependency.sh

- name: Run test
timeout-minutes: 15
run: |
bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-amd-mi35x

unit-test-backend-2-gpu-amd:
needs: [check-changes, stage-a-test-1-amd]
if: |
always() &&
(
(inputs.target_stage == 'unit-test-backend-2-gpu-amd') ||
(
!inputs.target_stage &&
(!failure() && !cancelled()) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
strategy:
fail-fast: false
matrix:
runner: [linux-mi325-gpu-2]
part: [0, 1]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}

- name: Ensure VRAM is clear
run: bash scripts/ensure_vram_clear.sh rocm

- name: Start CI container
run: bash scripts/ci/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}

- name: Install dependencies
run: bash scripts/ci/amd_ci_install_dependency.sh

- name: Run test
timeout-minutes: 30
run: |
bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-2-gpu-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 2

unit-test-backend-8-gpu-amd:
needs: [check-changes, unit-test-backend-2-gpu-amd]
needs: [check-changes, stage-a-test-1-amd]
if: |
always() &&
(
Expand Down Expand Up @@ -673,7 +633,7 @@ jobs:
bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-8-gpu-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 --timeout-per-file 3600

unit-test-backend-8-gpu-amd-mi35x:
needs: [check-changes, unit-test-backend-2-gpu-amd]
needs: [check-changes, stage-a-test-1-amd]
if: always() && !failure() && !cancelled() &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
strategy:
Expand Down Expand Up @@ -806,7 +766,7 @@ jobs:
bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_default_fp8

performance-test-2-gpu-amd:
needs: [check-changes, unit-test-backend-2-gpu-amd]
needs: [check-changes, stage-a-test-1-amd]
if: |
always() &&
(
Expand Down Expand Up @@ -965,10 +925,9 @@ jobs:

stage-a-test-1-amd,
stage-b-test-small-1-gpu-amd,
stage-b-test-small-1-gpu-amd-mi35x,
stage-b-test-large-2-gpu-amd,
unit-test-backend-1-gpu-amd,
unit-test-backend-1-gpu-amd-mi35x,
unit-test-backend-2-gpu-amd,
unit-test-backend-8-gpu-amd,
unit-test-backend-8-gpu-amd-mi35x,
performance-test-1-gpu-part-1-amd,
Expand Down
1 change: 1 addition & 0 deletions scripts/ci/slash_command_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ def handle_rerun_stage(
"sgl-kernel-unit-test-amd",
"stage-a-test-1-amd",
"stage-b-test-small-1-gpu-amd",
"stage-b-test-small-1-gpu-amd-mi35x",
"stage-b-test-large-2-gpu-amd",
"unit-test-backend-1-gpu-amd",
"unit-test-backend-2-gpu-amd",
Expand Down
2 changes: 1 addition & 1 deletion test/registered/attention/test_create_kvindices.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

# Triton kernel unit test for KV indices creation
register_cuda_ci(est_time=10, suite="stage-b-test-small-1-gpu")
register_amd_ci(est_time=10, suite="stage-b-test-small-1-gpu")
register_amd_ci(est_time=10, suite="stage-b-test-small-1-gpu-amd")


class TestCreateKvIndices(CustomTestCase):
Expand Down
2 changes: 1 addition & 1 deletion test/registered/attention/test_radix_attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

# RadixAttention server integration tests
register_cuda_ci(est_time=100, suite="stage-b-test-small-1-gpu")
register_amd_ci(est_time=100, suite="stage-b-test-small-1-gpu")
register_amd_ci(est_time=100, suite="stage-b-test-small-1-gpu-amd")


class TestRadixCacheFCFS(CustomTestCase):
Expand Down
2 changes: 1 addition & 1 deletion test/registered/attention/test_swa_unittest.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci

register_cuda_ci(est_time=8, suite="stage-b-test-small-1-gpu")
register_amd_ci(est_time=10, suite="stage-b-test-small-1-gpu")
register_amd_ci(est_time=10, suite="stage-b-test-small-1-gpu-amd")


class TestSWA(unittest.TestCase):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

# Torch native attention backend integration test with MMLU eval
register_cuda_ci(est_time=150, suite="stage-b-test-small-1-gpu")
register_amd_ci(est_time=150, suite="stage-b-test-small-1-gpu")
register_amd_ci(est_time=150, suite="stage-b-test-small-1-gpu-amd")


class TestTorchNativeAttnBackend(CustomTestCase):
Expand Down
2 changes: 1 addition & 1 deletion test/registered/attention/test_triton_attention_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

# Triton attention backend integration test with latency benchmark and MMLU eval
register_cuda_ci(est_time=200, suite="stage-b-test-small-1-gpu")
register_amd_ci(est_time=1110, suite="stage-b-test-small-1-gpu")
register_amd_ci(est_time=1110, suite="stage-b-test-small-1-gpu-amd")


class TestTritonAttnBackend(CustomTestCase):
Expand Down
2 changes: 1 addition & 1 deletion test/registered/attention/test_triton_attention_kernels.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
register_cuda_ci(est_time=30, suite="stage-b-test-small-1-gpu")
register_amd_ci(
est_time=30,
suite="stage-b-test-small-1-gpu",
suite="stage-b-test-small-1-gpu-amd",
disabled="test was never enabled for AMD CI, needs validation",
)

Expand Down
2 changes: 1 addition & 1 deletion test/registered/attention/test_triton_sliding_window.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

# Sliding window attention with Triton backend (Gemma-3 model)
register_cuda_ci(est_time=100, suite="stage-b-test-small-1-gpu")
register_amd_ci(est_time=100, suite="stage-b-test-small-1-gpu")
register_amd_ci(est_time=100, suite="stage-b-test-small-1-gpu-amd")


class TestSlidingWindowAttentionTriton(CustomTestCase):
Expand Down
2 changes: 1 addition & 1 deletion test/registered/backends/test_torch_compile.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
)

register_cuda_ci(est_time=190, suite="stage-b-test-small-1-gpu")
register_amd_ci(est_time=1100, suite="stage-b-test-small-1-gpu")
register_amd_ci(est_time=1100, suite="stage-b-test-small-1-gpu-amd")


class TestTorchCompile(CustomTestCase):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci

register_cuda_ci(est_time=111, suite="stage-b-test-small-1-gpu")
register_amd_ci(est_time=179, suite="stage-b-test-small-1-gpu")
register_amd_ci(est_time=179, suite="stage-b-test-small-1-gpu-amd")

import unittest

Expand Down
2 changes: 1 addition & 1 deletion test/registered/core/test_gpt_oss_1gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from sglang.test.gpt_oss_common import BaseTestGptOss

register_cuda_ci(est_time=402, suite="stage-b-test-small-1-gpu")
register_amd_ci(est_time=750, suite="stage-b-test-small-1-gpu-amd")
register_amd_ci(est_time=750, suite="stage-b-test-small-1-gpu-amd-mi35x")


class TestGptOss1Gpu(BaseTestGptOss):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci

register_cuda_ci(est_time=9, suite="stage-b-test-small-1-gpu")
register_amd_ci(est_time=15, suite="stage-b-test-small-1-gpu")
register_amd_ci(est_time=15, suite="stage-b-test-small-1-gpu-amd")

import unittest

Expand Down
2 changes: 1 addition & 1 deletion test/registered/dllm/test_llada2_mini_amd.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from sglang.test.ci.ci_register import register_amd_ci

register_amd_ci(est_time=520, suite="stage-b-test-small-1-gpu")
register_amd_ci(est_time=520, suite="stage-b-test-small-1-gpu-amd")

"""
Test LLaDA2 (Diffusion Language Model) on AMD GPUs.
Expand Down
2 changes: 1 addition & 1 deletion test/registered/lora/test_lora.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from sglang.test.test_utils import CustomTestCase, is_in_ci

register_cuda_ci(est_time=82, suite="stage-b-test-small-1-gpu")
register_amd_ci(est_time=82, suite="stage-b-test-small-1-gpu")
register_amd_ci(est_time=82, suite="stage-b-test-small-1-gpu-amd")


class TestLoRA(CustomTestCase):
Expand Down
2 changes: 1 addition & 1 deletion test/registered/lora/test_lora_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
register_cuda_ci(est_time=200, suite="stage-b-test-small-1-gpu")
register_amd_ci(
est_time=200,
suite="stage-b-test-small-1-gpu",
suite="stage-b-test-small-1-gpu-amd",
disabled="see https://github.com/sgl-project/sglang/issues/13107",
)

Expand Down
2 changes: 1 addition & 1 deletion test/registered/lora/test_lora_eviction.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from sglang.test.test_utils import CustomTestCase

register_cuda_ci(est_time=224, suite="stage-b-test-small-1-gpu")
register_amd_ci(est_time=224, suite="stage-b-test-small-1-gpu")
register_amd_ci(est_time=224, suite="stage-b-test-small-1-gpu-amd")

PROMPTS = [
"AI is a field of computer science focused on",
Expand Down
2 changes: 1 addition & 1 deletion test/registered/lora/test_multi_lora_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from sglang.test.test_utils import CustomTestCase, is_in_ci

register_cuda_ci(est_time=60, suite="stage-b-test-small-1-gpu")
register_amd_ci(est_time=60, suite="stage-b-test-small-1-gpu")
register_amd_ci(est_time=60, suite="stage-b-test-small-1-gpu-amd")

# All prompts are used at once in a batch.
PROMPTS = [
Expand Down
2 changes: 1 addition & 1 deletion test/registered/metrics/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci

register_cuda_ci(est_time=32, suite="stage-b-test-small-1-gpu")
register_amd_ci(est_time=32, suite="stage-b-test-small-1-gpu")
register_amd_ci(est_time=32, suite="stage-b-test-small-1-gpu-amd")
from prometheus_client.parser import text_string_to_metric_families
from prometheus_client.samples import Sample

Expand Down
2 changes: 1 addition & 1 deletion test/registered/mla/test_mla.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
register_cuda_ci(est_time=194, suite="stage-b-test-small-1-gpu")
register_amd_ci(
est_time=242,
suite="stage-a-test-1",
suite="stage-b-test-small-1-gpu-amd",
disabled="see https://github.com/sgl-project/sglang/issues/13107",
)

Expand Down
2 changes: 1 addition & 1 deletion test/registered/mla/test_mla_deepseek_v3.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
register_cuda_ci(est_time=442, suite="stage-b-test-small-1-gpu")
register_amd_ci(
est_time=221,
suite="stage-a-test-1",
suite="stage-b-test-small-1-gpu-amd",
disabled="see https://github.com/sgl-project/sglang/issues/12574",
)

Expand Down
2 changes: 1 addition & 1 deletion test/registered/models/test_compressed_tensors_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

# Model tests for compressed tensors (FP8)
register_cuda_ci(est_time=42, suite="stage-b-test-small-1-gpu")
register_amd_ci(est_time=42, suite="stage-b-test-small-1-gpu")
register_amd_ci(est_time=42, suite="stage-b-test-small-1-gpu-amd")

import unittest
from types import SimpleNamespace
Expand Down
2 changes: 1 addition & 1 deletion test/registered/models/test_cross_encoder_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

# Cross encoder model tests
register_cuda_ci(est_time=100, suite="stage-b-test-small-1-gpu")
register_amd_ci(est_time=150, suite="stage-b-test-small-1-gpu")
register_amd_ci(est_time=150, suite="stage-b-test-small-1-gpu-amd")

import multiprocessing as mp
import random
Expand Down
2 changes: 1 addition & 1 deletion test/registered/models/test_embedding_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
register_cuda_ci(est_time=73, suite="stage-b-test-small-1-gpu")
register_amd_ci(
est_time=73,
suite="stage-b-test-small-1-gpu",
suite="stage-b-test-small-1-gpu-amd",
disabled="see https://github.com/sgl-project/sglang/issues/11127",
)

Expand Down
2 changes: 1 addition & 1 deletion test/registered/models/test_qwen_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

# Qwen model tests
register_cuda_ci(est_time=90, suite="stage-b-test-small-1-gpu")
register_amd_ci(est_time=130, suite="stage-b-test-small-1-gpu")
register_amd_ci(est_time=130, suite="stage-b-test-small-1-gpu-amd")

import unittest
from types import SimpleNamespace
Expand Down
2 changes: 1 addition & 1 deletion test/registered/models/test_reward_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

# Reward model tests
register_cuda_ci(est_time=103, suite="stage-b-test-small-1-gpu")
register_amd_ci(est_time=132, suite="stage-b-test-small-1-gpu")
register_amd_ci(est_time=132, suite="stage-b-test-small-1-gpu-amd")

# Copyright 2023-2024 SGLang Team
# Licensed under the Apache License, Version 2.0 (the "License");
Expand Down
2 changes: 1 addition & 1 deletion test/registered/models/test_transformers_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

# Transformers fallback model tests
register_cuda_ci(est_time=245, suite="stage-b-test-small-1-gpu")
register_amd_ci(est_time=320, suite="stage-b-test-small-1-gpu")
register_amd_ci(est_time=320, suite="stage-b-test-small-1-gpu-amd")

import dataclasses
import multiprocessing as mp
Expand Down
Loading
Loading