Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/pr-test-amd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ jobs:
- name: Run test
timeout-minutes: 30
run: |
bash scripts/ci/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-b-test-small-1-gpu-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 12
bash scripts/ci/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-b-test-small-1-gpu-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 12 --timeout-per-file 1800

stage-b-test-small-1-gpu-amd-mi35x:
needs: [check-changes, stage-a-test-1-amd]
Expand Down
30 changes: 18 additions & 12 deletions test/registered/attention/test_triton_attention_kernels.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,11 @@
)
from sglang.srt.utils import get_device
from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci
from sglang.test.test_utils import CustomTestCase
from sglang.test.test_utils import CustomTestCase, is_in_amd_ci

# Triton attention kernel unit tests (decode, extend, prefill)
register_cuda_ci(est_time=30, suite="stage-b-test-small-1-gpu")
register_amd_ci(
est_time=30,
suite="stage-b-test-small-1-gpu-amd",
disabled="test was never enabled for AMD CI, needs validation",
)
register_amd_ci(est_time=30, suite="stage-b-test-small-1-gpu-amd")


def extend_attention_fwd_torch(
Expand Down Expand Up @@ -627,7 +623,10 @@ def _test_grouped_decode_attention_once(self, B, S, H_Q, H_KV, D, D_V):
)
print(cos_sim.item())
self.assertTrue(cos_sim.item() > 0.99)
self.assertTrue(torch.allclose(o, o_grouped, atol=3e-2))
if is_in_amd_ci():
self.assertTrue(torch.allclose(o, o_grouped, atol=5e-2))
else:
self.assertTrue(torch.allclose(o, o_grouped, atol=3e-2))

def test_grouped_decode_attention(self):
seq_lens = [5, 100, 128, 500]
Expand Down Expand Up @@ -764,11 +763,18 @@ def _test_extend_attention_unified_vs_regular_once(self, B, N_CTX, H_Q, H_KV, D)
)

# Compare results
self.assertTrue(
torch.allclose(o_regular, o_unified, rtol=0.15, atol=0.15),
f"Unified kernel output differs from 2-stage kernel. "
f"Max diff: {(o_regular - o_unified).abs().max()}",
)
if is_in_amd_ci():
self.assertTrue(
torch.allclose(o_regular, o_unified, rtol=0.15, atol=0.17),
f"Unified kernel output differs from 2-stage kernel. "
f"Max diff: {(o_regular - o_unified).abs().max()}",
)
else:
self.assertTrue(
torch.allclose(o_regular, o_unified, rtol=0.15, atol=0.15),
f"Unified kernel output differs from 2-stage kernel. "
f"Max diff: {(o_regular - o_unified).abs().max()}",
)

def test_extend_attention_unified_vs_regular(self):
"""Test unified kernel matches 2-stage kernel across different configs."""
Expand Down
7 changes: 6 additions & 1 deletion test/registered/lora/test_lora_qwen3.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,18 @@
import multiprocessing as mp
import unittest

from sglang.test.ci.ci_register import register_cuda_ci
from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci
from sglang.test.lora_utils import (
LORA_MODELS_QWEN3,
run_lora_multiple_batch_on_model_cases,
)

register_cuda_ci(est_time=97, suite="nightly-1-gpu", nightly=True)
register_amd_ci(
est_time=30,
suite="stage-b-test-small-1-gpu-amd",
disabled="see https://github.com/sgl-project/sglang/issues/13107",
)

from sglang.test.test_utils import CustomTestCase

Expand Down
3 changes: 2 additions & 1 deletion test/registered/moe/test_torch_compile_moe.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from sglang.test.ci.ci_register import register_cuda_ci
from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci

register_cuda_ci(est_time=210, suite="stage-b-test-small-1-gpu")
register_amd_ci(est_time=1400, suite="stage-b-test-small-1-gpu-amd")

import time
import unittest
Expand Down
2 changes: 1 addition & 1 deletion test/srt/test_deepseek_r1_mxfp4_8gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
)

DEEPSEEK_R1_MODEL_PATH = "amd/DeepSeek-R1-MXFP4-Preview"
SERVER_LAUNCH_TIMEOUT = 1000
SERVER_LAUNCH_TIMEOUT = 1200


class TestDeepseekR1MXFP4(CustomTestCase):
Expand Down
Loading