diff --git a/test/registered/attention/test_create_kvindices.py b/test/registered/attention/test_create_kvindices.py index 0642aa29cca5..f5e9be46441e 100644 --- a/test/registered/attention/test_create_kvindices.py +++ b/test/registered/attention/test_create_kvindices.py @@ -4,11 +4,12 @@ import torch from sglang.srt.layers.attention.utils import create_flashinfer_kv_indices_triton -from sglang.test.ci.ci_register import register_cuda_ci +from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci from sglang.test.test_utils import CustomTestCase # Triton kernel unit test for KV indices creation register_cuda_ci(est_time=10, suite="stage-b-test-small-1-gpu") +register_amd_ci(est_time=10, suite="stage-b-test-small-1-gpu") class TestCreateKvIndices(CustomTestCase): diff --git a/test/registered/attention/test_radix_attention.py b/test/registered/attention/test_radix_attention.py index c173d75bdb09..e72e4fa618f8 100644 --- a/test/registered/attention/test_radix_attention.py +++ b/test/registered/attention/test_radix_attention.py @@ -1,7 +1,7 @@ import unittest from sglang.srt.environ import envs -from sglang.test.ci.ci_register import register_cuda_ci +from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci from sglang.test.kits.radix_cache_server_kit import run_radix_attention_test from sglang.test.test_utils import ( DEFAULT_SMALL_MODEL_NAME_FOR_TEST, @@ -15,6 +15,7 @@ # RadixAttention server integration tests register_cuda_ci(est_time=100, suite="stage-b-test-small-1-gpu") +register_amd_ci(est_time=100, suite="stage-b-test-small-1-gpu") class TestRadixCacheFCFS(CustomTestCase): diff --git a/test/registered/attention/test_torch_native_attention_backend.py b/test/registered/attention/test_torch_native_attention_backend.py index e6c6a95468ac..c7da08a0395e 100644 --- a/test/registered/attention/test_torch_native_attention_backend.py +++ b/test/registered/attention/test_torch_native_attention_backend.py @@ -7,7 +7,7 @@ from types import SimpleNamespace from sglang.srt.utils import kill_process_tree -from sglang.test.ci.ci_register import register_cuda_ci +from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci from sglang.test.run_eval import run_eval from sglang.test.test_utils import ( DEFAULT_MODEL_NAME_FOR_TEST, @@ -19,6 +19,7 @@ # Torch native attention backend integration test with MMLU eval register_cuda_ci(est_time=150, suite="stage-b-test-small-1-gpu") +register_amd_ci(est_time=150, suite="stage-b-test-small-1-gpu") class TestTorchNativeAttnBackend(CustomTestCase): diff --git a/test/registered/attention/test_triton_attention_backend.py b/test/registered/attention/test_triton_attention_backend.py index d19bb61286be..cb963d194e4c 100644 --- a/test/registered/attention/test_triton_attention_backend.py +++ b/test/registered/attention/test_triton_attention_backend.py @@ -7,7 +7,7 @@ from types import SimpleNamespace from sglang.srt.utils import kill_process_tree -from sglang.test.ci.ci_register import register_cuda_ci +from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci from sglang.test.run_eval import run_eval from sglang.test.test_utils import ( DEFAULT_MODEL_NAME_FOR_TEST, @@ -21,6 +21,7 @@ # Triton attention backend integration test with latency benchmark and MMLU eval register_cuda_ci(est_time=200, suite="stage-b-test-small-1-gpu") +register_amd_ci(est_time=200, suite="stage-b-test-small-1-gpu") class TestTritonAttnBackend(CustomTestCase): diff --git a/test/registered/attention/test_triton_attention_kernels.py b/test/registered/attention/test_triton_attention_kernels.py index e2210ba2d7b2..6e28132a88ec 100644 --- a/test/registered/attention/test_triton_attention_kernels.py +++ b/test/registered/attention/test_triton_attention_kernels.py @@ -19,11 +19,12 @@ context_attention_fwd, ) from sglang.srt.utils import get_device -from sglang.test.ci.ci_register import register_cuda_ci +from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci from sglang.test.test_utils import CustomTestCase # Triton attention kernel unit tests (decode, extend, prefill) register_cuda_ci(est_time=30, suite="stage-b-test-small-1-gpu") +register_amd_ci(est_time=30, suite="stage-b-test-small-1-gpu", disabled=" ") def extend_attention_fwd_torch( diff --git a/test/registered/attention/test_triton_sliding_window.py b/test/registered/attention/test_triton_sliding_window.py index 9c43b0cd5267..439b220f0564 100644 --- a/test/registered/attention/test_triton_sliding_window.py +++ b/test/registered/attention/test_triton_sliding_window.py @@ -4,7 +4,7 @@ import requests from sglang.srt.utils import kill_process_tree -from sglang.test.ci.ci_register import register_cuda_ci +from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci from sglang.test.run_eval import run_eval from sglang.test.test_utils import ( DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, @@ -16,6 +16,7 @@ # Sliding window attention with Triton backend (Gemma-3 model) register_cuda_ci(est_time=100, suite="stage-b-test-small-1-gpu") +register_amd_ci(est_time=100, suite="stage-b-test-small-1-gpu") class TestSlidingWindowAttentionTriton(CustomTestCase):