sgl-project · bingxche · Jan 5, 2026 · Jan 5, 2026 · gemini-code-assist · Jan 5, 2026
diff --git a/test/registered/attention/test_create_kvindices.py b/test/registered/attention/test_create_kvindices.py
@@ -4,11 +4,12 @@
 import torch
 
 from sglang.srt.layers.attention.utils import create_flashinfer_kv_indices_triton
-from sglang.test.ci.ci_register import register_cuda_ci
+from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci
 from sglang.test.test_utils import CustomTestCase
 
 # Triton kernel unit test for KV indices creation
 register_cuda_ci(est_time=10, suite="stage-b-test-small-1-gpu")
+register_amd_ci(est_time=10, suite="stage-b-test-small-1-gpu")
 
 
 class TestCreateKvIndices(CustomTestCase):

diff --git a/test/registered/attention/test_radix_attention.py b/test/registered/attention/test_radix_attention.py
@@ -1,7 +1,7 @@
 import unittest
 
 from sglang.srt.environ import envs
-from sglang.test.ci.ci_register import register_cuda_ci
+from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci
 from sglang.test.kits.radix_cache_server_kit import run_radix_attention_test
 from sglang.test.test_utils import (
     DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
@@ -15,6 +15,7 @@
 
 # RadixAttention server integration tests
 register_cuda_ci(est_time=100, suite="stage-b-test-small-1-gpu")
+register_amd_ci(est_time=100, suite="stage-b-test-small-1-gpu")
 
 
 class TestRadixCacheFCFS(CustomTestCase):

diff --git a/test/registered/attention/test_torch_native_attention_backend.py b/test/registered/attention/test_torch_native_attention_backend.py
@@ -7,7 +7,7 @@
 from types import SimpleNamespace
 
 from sglang.srt.utils import kill_process_tree
-from sglang.test.ci.ci_register import register_cuda_ci
+from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci
 from sglang.test.run_eval import run_eval
 from sglang.test.test_utils import (
     DEFAULT_MODEL_NAME_FOR_TEST,
@@ -19,6 +19,7 @@
 
 # Torch native attention backend integration test with MMLU eval
 register_cuda_ci(est_time=150, suite="stage-b-test-small-1-gpu")
+register_amd_ci(est_time=150, suite="stage-b-test-small-1-gpu")
 
 
 class TestTorchNativeAttnBackend(CustomTestCase):

diff --git a/test/registered/attention/test_triton_attention_backend.py b/test/registered/attention/test_triton_attention_backend.py
@@ -7,7 +7,7 @@
 from types import SimpleNamespace
 
 from sglang.srt.utils import kill_process_tree
-from sglang.test.ci.ci_register import register_cuda_ci
+from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci
 from sglang.test.run_eval import run_eval
 from sglang.test.test_utils import (
     DEFAULT_MODEL_NAME_FOR_TEST,
@@ -21,6 +21,7 @@
 
 # Triton attention backend integration test with latency benchmark and MMLU eval
 register_cuda_ci(est_time=200, suite="stage-b-test-small-1-gpu")
+register_amd_ci(est_time=200, suite="stage-b-test-small-1-gpu")
 
 
 class TestTritonAttnBackend(CustomTestCase):

diff --git a/test/registered/attention/test_triton_attention_kernels.py b/test/registered/attention/test_triton_attention_kernels.py
@@ -19,11 +19,12 @@
     context_attention_fwd,
 )
 from sglang.srt.utils import get_device
-from sglang.test.ci.ci_register import register_cuda_ci
+from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci
 from sglang.test.test_utils import CustomTestCase
 
 # Triton attention kernel unit tests (decode, extend, prefill)
 register_cuda_ci(est_time=30, suite="stage-b-test-small-1-gpu")
+register_amd_ci(est_time=30, suite="stage-b-test-small-1-gpu", disabled=" ")
-register_amd_ci(est_time=30, suite="stage-b-test-small-1-gpu", disabled=" ")
+register_amd_ci(est_time=30, suite="stage-b-test-small-1-gpu", disabled="Triton kernels not yet supported on AMD")
-register_amd_ci(est_time=30, suite="stage-b-test-small-1-gpu", disabled=" ")
+register_amd_ci(est_time=30, suite="stage-b-test-small-1-gpu", disabled="Triton kernels not yet supported on AMD")
 
 
 def extend_attention_fwd_torch(

diff --git a/test/registered/attention/test_triton_sliding_window.py b/test/registered/attention/test_triton_sliding_window.py
@@ -4,7 +4,7 @@
 import requests
 
 from sglang.srt.utils import kill_process_tree
-from sglang.test.ci.ci_register import register_cuda_ci
+from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci
 from sglang.test.run_eval import run_eval
 from sglang.test.test_utils import (
     DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
@@ -16,6 +16,7 @@
 
 # Sliding window attention with Triton backend (Gemma-3 model)
 register_cuda_ci(est_time=100, suite="stage-b-test-small-1-gpu")
+register_amd_ci(est_time=100, suite="stage-b-test-small-1-gpu")
 
 
 class TestSlidingWindowAttentionTriton(CustomTestCase):