From 5bd319985069b9ea4d286db0c54d958e6448c737 Mon Sep 17 00:00:00 2001
From: michaelzhang-ai <michaelzhang-ai@users.noreply.github.com>
Date: Sat, 21 Feb 2026 01:41:45 -0600
Subject: [PATCH 01/10] [AMD] Fix pre-existing AMD CI test failures

- Relax LoRA multi-batch ROUGE-L tolerance from 1.0 to 0.95 to account
  for minor numerical non-determinism on ROCm
- Fix aiter attention backend crashing on hybrid Mamba+attention models
  (e.g. LFM2-MoE): use get_v_head_dim() for hybrid KV pools instead of
  hardcoded get_value_buffer(0) which fails when layer 0 is not an
  attention layer
- Skip TestToolChoiceLfm2Moe on AMD: sgl_kernel ROCm build lacks
  causal_conv1d_update op needed by Mamba layers
---
 python/sglang/srt/layers/attention/aiter_backend.py        | 7 ++++++-
 python/sglang/test/lora_utils.py                           | 1 +
 .../openai_server/function_call/test_tool_choice.py        | 3 +++
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/python/sglang/srt/layers/attention/aiter_backend.py b/python/sglang/srt/layers/attention/aiter_backend.py
index d851040cfc37..dd4879bba3df 100644
--- a/python/sglang/srt/layers/attention/aiter_backend.py
+++ b/python/sglang/srt/layers/attention/aiter_backend.py
@@ -123,7 +123,12 @@ def __init__(
             model_runner.model_config.num_attention_heads // get_attention_tp_size()
         )
         self.head_dim = model_runner.model_config.head_dim
-        self.v_head_dim = model_runner.token_to_kv_pool.get_value_buffer(0).shape[-1]
+        if hasattr(model_runner.token_to_kv_pool, "get_v_head_dim"):
+            self.v_head_dim = model_runner.token_to_kv_pool.get_v_head_dim()
+        else:
+            self.v_head_dim = model_runner.token_to_kv_pool.get_value_buffer(0).shape[
+                -1
+            ]
         self.num_kv_head = model_runner.model_config.get_num_kv_heads(
             get_attention_tp_size()
         )
diff --git a/python/sglang/test/lora_utils.py b/python/sglang/test/lora_utils.py
index b1181f248c5b..28eb6fff8068 100644
--- a/python/sglang/test/lora_utils.py
+++ b/python/sglang/test/lora_utils.py
@@ -95,6 +95,7 @@ def __post_init__(self):
                 prefill_tolerance=3e-1,
             ),
         ],
+        rouge_l_tolerance=0.95,
         max_loras_per_batch=2,
         max_loaded_loras=4,
     ),
diff --git a/test/registered/openai_server/function_call/test_tool_choice.py b/test/registered/openai_server/function_call/test_tool_choice.py
index fd6039b3dcc9..d8aec8349cc2 100644
--- a/test/registered/openai_server/function_call/test_tool_choice.py
+++ b/test/registered/openai_server/function_call/test_tool_choice.py
@@ -889,6 +889,9 @@ def setUpClass(cls):
         cls.tokenizer = get_tokenizer(cls.model)
 
 
+@unittest.skipIf(
+    is_hip(), "sgl_kernel ROCm build lacks causal_conv1d_update for Mamba layers"
+)
 class TestToolChoiceLfm2Moe(TestToolChoiceLlama32):
     """Test tool_choice functionality with LiquidAI LFM2-MoE model"""
 

From 7f950076853925136599fbb33d51a1b786d77652 Mon Sep 17 00:00:00 2001
From: bingxche <Bingxu.Chen@amd.com>
Date: Wed, 25 Feb 2026 08:50:25 +0000
Subject: [PATCH 02/10] Fix LFM2 (Mamba) model on ROCm by falling back to
 Triton kernels when sgl_kernel causal_conv1d ops are unavailable

---
 .../layers/attention/mamba/causal_conv1d.py   | 46 ++++++++++++++++++-
 .../function_call/test_tool_choice.py         |  6 +--
 2 files changed, 45 insertions(+), 7 deletions(-)

diff --git a/python/sglang/srt/layers/attention/mamba/causal_conv1d.py b/python/sglang/srt/layers/attention/mamba/causal_conv1d.py
index 071a0ee6f749..e7beb75b0051 100644
--- a/python/sglang/srt/layers/attention/mamba/causal_conv1d.py
+++ b/python/sglang/srt/layers/attention/mamba/causal_conv1d.py
@@ -7,11 +7,23 @@
 from typing import Optional
 
 import torch
-from sgl_kernel import causal_conv1d_fwd
-from sgl_kernel import causal_conv1d_update as causal_conv1d_update_kernel
 
 from .causal_conv1d_triton import PAD_SLOT_ID
 
+try:
+    from sgl_kernel import causal_conv1d_fwd
+    from sgl_kernel import causal_conv1d_update as causal_conv1d_update_kernel
+
+    torch.ops.sgl_kernel.causal_conv1d_update
+    _USE_TRITON = False
+except (ImportError, AttributeError):
+    from .causal_conv1d_triton import causal_conv1d_fn as _causal_conv1d_fn_triton
+    from .causal_conv1d_triton import (
+        causal_conv1d_update as _causal_conv1d_update_triton,
+    )
+
+    _USE_TRITON = True
+
 
 def causal_conv1d_fn(
     x: torch.Tensor,
@@ -54,6 +66,25 @@ def causal_conv1d_fn(
 
     out: (batch, dim, seqlen)
     """
+    if _USE_TRITON:
+        seq_lens_cpu = (
+            (query_start_loc[1:] - query_start_loc[:-1]).cpu().tolist()
+            if query_start_loc is not None
+            else [x.shape[-1]]
+        )
+        return _causal_conv1d_fn_triton(
+            x,
+            weight,
+            bias,
+            conv_states=conv_states,
+            query_start_loc=query_start_loc,
+            seq_lens_cpu=seq_lens_cpu,
+            cache_indices=cache_indices,
+            has_initial_state=has_initial_state,
+            activation=activation,
+            pad_slot_id=pad_slot_id,
+            **kwargs,
+        )
     if activation not in [None, "silu", "swish"]:
         raise NotImplementedError("activation must be None, silu, or swish")
     if x.stride(-1) != 1:
@@ -106,6 +137,17 @@ def causal_conv1d_update(
             indices 0 and 3
     out: (batch, dim) or (batch, dim, seqlen)
     """
+    if _USE_TRITON:
+        return _causal_conv1d_update_triton(
+            x,
+            conv_state,
+            weight,
+            bias=bias,
+            activation=activation,
+            cache_seqlens=cache_seqlens,
+            conv_state_indices=conv_state_indices,
+            pad_slot_id=pad_slot_id,
+        )
     if activation not in [None, "silu", "swish"]:
         raise NotImplementedError(
             f"activation must be None, silu, or swish, actual: {activation}"
diff --git a/test/registered/openai_server/function_call/test_tool_choice.py b/test/registered/openai_server/function_call/test_tool_choice.py
index d8aec8349cc2..d463e8bbf076 100644
--- a/test/registered/openai_server/function_call/test_tool_choice.py
+++ b/test/registered/openai_server/function_call/test_tool_choice.py
@@ -12,7 +12,7 @@
 
 import openai
 
-from sglang.srt.utils import is_hip, kill_process_tree
+from sglang.srt.utils import kill_process_tree
 from sglang.srt.utils.hf_transformers_utils import get_tokenizer
 from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci
 from sglang.test.test_utils import (
@@ -860,7 +860,6 @@ def test_complex_parameters_required_non_streaming(self):
 #         cls.tokenizer = get_tokenizer(cls.model)
 
 
-@unittest.skipIf(is_hip(), "Disabled for AMD")
 class TestToolChoiceLfm2(TestToolChoiceLlama32):
     """Test tool_choice functionality with LiquidAI LFM2 model"""
 
@@ -889,9 +888,6 @@ def setUpClass(cls):
         cls.tokenizer = get_tokenizer(cls.model)
 
 
-@unittest.skipIf(
-    is_hip(), "sgl_kernel ROCm build lacks causal_conv1d_update for Mamba layers"
-)
 class TestToolChoiceLfm2Moe(TestToolChoiceLlama32):
     """Test tool_choice functionality with LiquidAI LFM2-MoE model"""
 

From 2722b9e366ea4f9f6f3153c8df0689ac37fbc39b Mon Sep 17 00:00:00 2001
From: yctseng0211 <yctseng@amd.com>
Date: Wed, 25 Feb 2026 03:59:18 -0600
Subject: [PATCH 03/10] fix(amd-ci): retry near-miss ROUGE-L flaky failures in
 lora test

---
 .../lora/test_multi_lora_backend.py           | 37 ++++++++++++++++++-
 1 file changed, 35 insertions(+), 2 deletions(-)

diff --git a/test/registered/lora/test_multi_lora_backend.py b/test/registered/lora/test_multi_lora_backend.py
index f34b9e622aa6..4f7444674457 100644
--- a/test/registered/lora/test_multi_lora_backend.py
+++ b/test/registered/lora/test_multi_lora_backend.py
@@ -14,6 +14,7 @@
 
 import multiprocessing as mp
 import os
+import re
 import unittest
 
 from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci
@@ -23,13 +24,45 @@
     run_lora_batch_splitting_equivalence_test,
     run_lora_multiple_batch_on_model_cases,
 )
-from sglang.test.test_utils import CustomTestCase, is_in_ci
+from sglang.test.test_utils import CustomTestCase, is_in_amd_ci, is_in_ci
 
 register_cuda_ci(est_time=100, suite="stage-b-test-large-1-gpu")
-register_amd_ci(est_time=100, suite="stage-b-test-small-1-gpu-amd")
+register_amd_ci(est_time=200, suite="stage-b-test-small-1-gpu-amd")
 
 
 class TestMultiLoRABackend(CustomTestCase):
+    def _callTestMethod(self, method):
+        from sglang.srt.environ import envs
+        from sglang.srt.utils.common import retry
+
+        max_retry = envs.SGLANG_TEST_MAX_RETRY.get()
+        if max_retry is None:
+            max_retry = 1 if is_in_ci() else 0
+
+        if is_in_amd_ci():
+            attempt_count = [0]
+
+            def should_retry(e):
+                attempt_count[0] += 1
+                match = re.search(r"ROUGE-L score ([\d.]+)", str(e))
+                if match:
+                    score = float(match.group(1))
+                    if score < 0.977:
+                        return False
+                    return True
+                return attempt_count[0] <= max_retry
+
+            retry(
+                lambda: super(CustomTestCase, self)._callTestMethod(method),
+                max_retry=max_retry + 2,
+                should_retry=should_retry,
+            )
+        else:
+            retry(
+                lambda: super(CustomTestCase, self)._callTestMethod(method),
+                max_retry=max_retry,
+            )
+
     def test_ci_lora_models_batch_splitting(self):
         run_lora_batch_splitting_equivalence_test(CI_MULTI_LORA_MODELS)
 

From 0b31772e333120a7160a0ff8e923d0582be33313 Mon Sep 17 00:00:00 2001
From: YC Tseng <yctseng@amd.com>
Date: Wed, 25 Feb 2026 18:01:52 +0800
Subject: [PATCH 04/10] revert the threshold change in lora_uitls.py

---
 python/sglang/test/lora_utils.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/python/sglang/test/lora_utils.py b/python/sglang/test/lora_utils.py
index 28eb6fff8068..b1181f248c5b 100644
--- a/python/sglang/test/lora_utils.py
+++ b/python/sglang/test/lora_utils.py
@@ -95,7 +95,6 @@ def __post_init__(self):
                 prefill_tolerance=3e-1,
             ),
         ],
-        rouge_l_tolerance=0.95,
         max_loras_per_batch=2,
         max_loaded_loras=4,
     ),

From 39d1391e7f2e668389f581651d9d24cbe9844636 Mon Sep 17 00:00:00 2001
From: yctseng0211 <yctseng@amd.com>
Date: Wed, 25 Feb 2026 09:31:03 -0600
Subject: [PATCH 05/10] increase the additional retry limit

---
 test/registered/lora/test_multi_lora_backend.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/registered/lora/test_multi_lora_backend.py b/test/registered/lora/test_multi_lora_backend.py
index 4f7444674457..7de87966a698 100644
--- a/test/registered/lora/test_multi_lora_backend.py
+++ b/test/registered/lora/test_multi_lora_backend.py
@@ -54,7 +54,7 @@ def should_retry(e):
 
             retry(
                 lambda: super(CustomTestCase, self)._callTestMethod(method),
-                max_retry=max_retry + 2,
+                max_retry=max_retry + 3,
                 should_retry=should_retry,
             )
         else:

From 23f8e58a863266a474a202ebf0727d195880280f Mon Sep 17 00:00:00 2001
From: michaelzhang-ai <michaelzhang-ai@users.noreply.github.com>
Date: Wed, 25 Feb 2026 16:20:48 -0600
Subject: [PATCH 06/10] [AMD] Disable aiter RoPE in LoRA test for deterministic
 outputs

The aiter RoPE backend has lower precision (as warned by apex), causing
consistent single-token differences between SRT and HF reference outputs
(ROUGE-L 0.9774 vs required 1.0). Disable it for the LoRA multi-batch
test to produce exact matches.
---
 test/registered/lora/test_multi_lora_backend.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/test/registered/lora/test_multi_lora_backend.py b/test/registered/lora/test_multi_lora_backend.py
index 7de87966a698..f6dc0ba5bb17 100644
--- a/test/registered/lora/test_multi_lora_backend.py
+++ b/test/registered/lora/test_multi_lora_backend.py
@@ -17,7 +17,11 @@
 import re
 import unittest
 
+from sglang.srt.utils import is_hip
 from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci
+
+if is_hip():
+    os.environ.setdefault("USE_ROCM_AITER_ROPE_BACKEND", "0")
 from sglang.test.lora_utils import (
     ALL_OTHER_MULTI_LORA_MODELS,
     CI_MULTI_LORA_MODELS,

From 094f05daf32c27401d755fa6dbd39b00a26d5efe Mon Sep 17 00:00:00 2001
From: michaelzhang-ai <michaelzhang-ai@users.noreply.github.com>
Date: Wed, 25 Feb 2026 20:20:37 -0600
Subject: [PATCH 07/10] [AMD] Fix aiter backend v_head_dim for hybrid
 Mamba+attention models

The existing check only covers hybrid_gdn_config and kimi_linear_config,
but LFM2 models use HybridLinearKVPool without either config. Use
hasattr(get_v_head_dim) to cover all hybrid KV pool types, matching
triton_backend.py.
---
 python/sglang/srt/layers/attention/aiter_backend.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/python/sglang/srt/layers/attention/aiter_backend.py b/python/sglang/srt/layers/attention/aiter_backend.py
index 0062a55737fa..e86a178b1c90 100755
--- a/python/sglang/srt/layers/attention/aiter_backend.py
+++ b/python/sglang/srt/layers/attention/aiter_backend.py
@@ -139,11 +139,9 @@ def __init__(
         if self.use_mla:
             # For MLA models, get v_head_dim from model config
             self.v_head_dim = model_runner.model_config.v_head_dim
-        elif (
-            model_runner.hybrid_gdn_config is not None
-            or model_runner.kimi_linear_config is not None
-        ):
-            # For hybrid linear models, layer_id = 0 may not be full attention
+        elif hasattr(model_runner.token_to_kv_pool, "get_v_head_dim"):
+            # For hybrid models (Mamba+attention, GDN, Kimi linear),
+            # layer_id=0 may not be a full attention layer
             self.v_head_dim = model_runner.token_to_kv_pool.get_v_head_dim()
         else:
             self.v_head_dim = model_runner.token_to_kv_pool.get_value_buffer(0).shape[

From eba80754e7ef340820d3537bb53a86da684d18c8 Mon Sep 17 00:00:00 2001
From: yctseng0211 <yctseng@amd.com>
Date: Wed, 25 Feb 2026 22:36:42 -0600
Subject: [PATCH 08/10] set SGLANG_USE_AITER = 0

---
 test/registered/lora/test_multi_lora_backend.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/test/registered/lora/test_multi_lora_backend.py b/test/registered/lora/test_multi_lora_backend.py
index f6dc0ba5bb17..cc8efde6d953 100644
--- a/test/registered/lora/test_multi_lora_backend.py
+++ b/test/registered/lora/test_multi_lora_backend.py
@@ -21,7 +21,8 @@
 from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci
 
 if is_hip():
-    os.environ.setdefault("USE_ROCM_AITER_ROPE_BACKEND", "0")
+    os.environ.setdefault("SGLANG_USE_AITER", "0")
+
 from sglang.test.lora_utils import (
     ALL_OTHER_MULTI_LORA_MODELS,
     CI_MULTI_LORA_MODELS,

From 46de4f4f2b9ed5e64dd23eca9fd3b89eabd9b2af Mon Sep 17 00:00:00 2001
From: yctseng0211 <yctseng@amd.com>
Date: Thu, 26 Feb 2026 20:58:14 -0600
Subject: [PATCH 09/10] create new amd stage-b job

---
 .github/workflows/pr-test-amd-rocm720.yml     | 40 +++++++++++++++++++
 .github/workflows/pr-test-amd.yml             | 40 +++++++++++++++++++
 scripts/ci/utils/slash_command_handler.py     |  1 +
 .../lora/test_multi_lora_backend.py           |  2 +-
 test/run_suite.py                             |  1 +
 5 files changed, 83 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/pr-test-amd-rocm720.yml b/.github/workflows/pr-test-amd-rocm720.yml
index 42bcad325a07..1c032c6d8712 100644
--- a/.github/workflows/pr-test-amd-rocm720.yml
+++ b/.github/workflows/pr-test-amd-rocm720.yml
@@ -321,6 +321,45 @@ jobs:
         run: |
           bash scripts/ci/amd/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-b-test-small-1-gpu-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 13 --timeout-per-file 1800 --continue-on-error
 
+  stage-b-test-small-1-gpu-amd-nondeterministic:
+    needs: [check-changes]
+    if: |
+      always() &&
+      (
+        (inputs.target_stage == 'stage-b-test-small-1-gpu-amd-nondeterministic') ||
+        (
+          !inputs.target_stage &&
+          (!failure() && !cancelled()) &&
+          ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
+        )
+      )
+    strategy:
+      fail-fast: false
+      matrix:
+        runner: [linux-mi325-gpu-1]
+    runs-on: ${{matrix.runner}}
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
+
+      - name: Ensure VRAM is clear
+        run: bash scripts/ensure_vram_clear.sh rocm
+
+      - name: Start CI container
+        run: bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
+        env:
+          GITHUB_WORKSPACE: ${{ github.workspace }}
+
+      - name: Install dependencies
+        run: bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-aiter-build
+
+      - name: Run test
+        timeout-minutes: 30
+        run: |
+          bash scripts/ci/amd/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-b-test-small-1-gpu-amd-nondeterministic --timeout-per-file 1800 --continue-on-error
+
   stage-b-test-small-1-gpu-amd-mi35x:
     needs: [check-changes]
     if: |
@@ -801,6 +840,7 @@ jobs:
         stage-a-test-1-amd,
         jit-kernel-unit-test-amd,
         stage-b-test-small-1-gpu-amd,
+        stage-b-test-small-1-gpu-amd-nondeterministic,
         stage-b-test-small-1-gpu-amd-mi35x,
         stage-b-test-large-1-gpu-amd,
         stage-b-test-large-2-gpu-amd,
diff --git a/.github/workflows/pr-test-amd.yml b/.github/workflows/pr-test-amd.yml
index 454397c783b8..c19985433d47 100644
--- a/.github/workflows/pr-test-amd.yml
+++ b/.github/workflows/pr-test-amd.yml
@@ -318,6 +318,45 @@ jobs:
         run: |
           bash scripts/ci/amd/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-b-test-small-1-gpu-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 14 --timeout-per-file 1800
 
+  stage-b-test-small-1-gpu-amd-nondeterministic:
+    needs: [check-changes, stage-a-test-1-amd]
+    if: |
+      always() &&
+      (
+        (inputs.target_stage == 'stage-b-test-small-1-gpu-amd-nondeterministic') ||
+        (
+          !inputs.target_stage &&
+          (!failure() && !cancelled()) &&
+          ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
+        )
+      )
+    strategy:
+      fail-fast: false
+      matrix:
+        runner: [linux-mi325-gpu-1]
+    runs-on: ${{matrix.runner}}
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
+
+      - name: Ensure VRAM is clear
+        run: bash scripts/ensure_vram_clear.sh rocm
+
+      - name: Start CI container
+        run: bash scripts/ci/amd/amd_ci_start_container.sh
+        env:
+          GITHUB_WORKSPACE: ${{ github.workspace }}
+
+      - name: Install dependencies
+        run: bash scripts/ci/amd/amd_ci_install_dependency.sh
+
+      - name: Run test
+        timeout-minutes: 30
+        run: |
+          bash scripts/ci/amd/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-b-test-small-1-gpu-amd-nondeterministic --timeout-per-file 1800 --continue-on-error
+
   stage-b-test-small-1-gpu-amd-mi35x:
     needs: [check-changes, stage-a-test-1-amd]
     if: |
@@ -890,6 +929,7 @@ jobs:
         stage-a-test-1-amd,
         jit-kernel-unit-test-amd,
         stage-b-test-small-1-gpu-amd,
+        stage-b-test-small-1-gpu-amd-nondeterministic,
         stage-b-test-small-1-gpu-amd-mi35x,
         stage-b-test-large-1-gpu-amd,
         stage-b-test-large-2-gpu-amd,
diff --git a/scripts/ci/utils/slash_command_handler.py b/scripts/ci/utils/slash_command_handler.py
index 9e7f98f87c7c..6d2973f066ee 100644
--- a/scripts/ci/utils/slash_command_handler.py
+++ b/scripts/ci/utils/slash_command_handler.py
@@ -274,6 +274,7 @@ def handle_rerun_stage(
         "sgl-kernel-unit-test-2-gpu-amd",
         "stage-a-test-1-amd",
         "stage-b-test-small-1-gpu-amd",
+        "stage-b-test-small-1-gpu-amd-nondeterministic",
         "stage-b-test-small-1-gpu-amd-mi35x",
         "stage-b-test-large-1-gpu-amd",
         "stage-b-test-large-2-gpu-amd",
diff --git a/test/registered/lora/test_multi_lora_backend.py b/test/registered/lora/test_multi_lora_backend.py
index cc8efde6d953..971b9882578f 100644
--- a/test/registered/lora/test_multi_lora_backend.py
+++ b/test/registered/lora/test_multi_lora_backend.py
@@ -32,7 +32,7 @@
 from sglang.test.test_utils import CustomTestCase, is_in_amd_ci, is_in_ci
 
 register_cuda_ci(est_time=100, suite="stage-b-test-large-1-gpu")
-register_amd_ci(est_time=200, suite="stage-b-test-small-1-gpu-amd")
+register_amd_ci(est_time=200, suite="stage-b-test-small-1-gpu-amd-nondeterministic")
 
 
 class TestMultiLoRABackend(CustomTestCase):
diff --git a/test/run_suite.py b/test/run_suite.py
index 3f2dcc44bb75..2f45522aa9b0 100644
--- a/test/run_suite.py
+++ b/test/run_suite.py
@@ -21,6 +21,7 @@
     HWBackend.AMD: [
         "stage-a-test-1-amd",
         "stage-b-test-small-1-gpu-amd",
+        "stage-b-test-small-1-gpu-amd-nondeterministic",
         "stage-b-test-small-1-gpu-amd-mi35x",
         "stage-b-test-large-8-gpu-35x-disaggregation-amd",
         "stage-b-test-large-1-gpu-amd",

From c5a0e6364815292646ab28001db747f9c59874a8 Mon Sep 17 00:00:00 2001
From: YC Tseng <yctseng@amd.com>
Date: Fri, 27 Feb 2026 11:27:44 +0800
Subject: [PATCH 10/10] Update test_multi_lora_backend.py for AMD CI

---
 .../lora/test_multi_lora_backend.py           | 42 +------------------
 1 file changed, 2 insertions(+), 40 deletions(-)

diff --git a/test/registered/lora/test_multi_lora_backend.py b/test/registered/lora/test_multi_lora_backend.py
index 971b9882578f..9a7465d45dbe 100644
--- a/test/registered/lora/test_multi_lora_backend.py
+++ b/test/registered/lora/test_multi_lora_backend.py
@@ -14,60 +14,22 @@
 
 import multiprocessing as mp
 import os
-import re
 import unittest
 
-from sglang.srt.utils import is_hip
 from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci
-
-if is_hip():
-    os.environ.setdefault("SGLANG_USE_AITER", "0")
-
 from sglang.test.lora_utils import (
     ALL_OTHER_MULTI_LORA_MODELS,
     CI_MULTI_LORA_MODELS,
     run_lora_batch_splitting_equivalence_test,
     run_lora_multiple_batch_on_model_cases,
 )
-from sglang.test.test_utils import CustomTestCase, is_in_amd_ci, is_in_ci
+from sglang.test.test_utils import CustomTestCase, is_in_ci
 
 register_cuda_ci(est_time=100, suite="stage-b-test-large-1-gpu")
-register_amd_ci(est_time=200, suite="stage-b-test-small-1-gpu-amd-nondeterministic")
+register_amd_ci(est_time=100, suite="stage-b-test-small-1-gpu-amd-nondeterministic")
 
 
 class TestMultiLoRABackend(CustomTestCase):
-    def _callTestMethod(self, method):
-        from sglang.srt.environ import envs
-        from sglang.srt.utils.common import retry
-
-        max_retry = envs.SGLANG_TEST_MAX_RETRY.get()
-        if max_retry is None:
-            max_retry = 1 if is_in_ci() else 0
-
-        if is_in_amd_ci():
-            attempt_count = [0]
-
-            def should_retry(e):
-                attempt_count[0] += 1
-                match = re.search(r"ROUGE-L score ([\d.]+)", str(e))
-                if match:
-                    score = float(match.group(1))
-                    if score < 0.977:
-                        return False
-                    return True
-                return attempt_count[0] <= max_retry
-
-            retry(
-                lambda: super(CustomTestCase, self)._callTestMethod(method),
-                max_retry=max_retry + 3,
-                should_retry=should_retry,
-            )
-        else:
-            retry(
-                lambda: super(CustomTestCase, self)._callTestMethod(method),
-                max_retry=max_retry,
-            )
-
     def test_ci_lora_models_batch_splitting(self):
         run_lora_batch_splitting_equivalence_test(CI_MULTI_LORA_MODELS)