sgl-project · HaiShaw · Jan 19, 2026 · Jan 19, 2026 · Jan 19, 2026 · gemini-code-assist
@@ -208,7 +208,7 @@ jobs:
       fail-fast: false
       matrix:
         runner: [linux-mi325-gpu-1]
-        part: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
+        part: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
     runs-on: ${{matrix.runner}}
     steps:
       - name: Checkout code
@@ -230,7 +230,7 @@ jobs:
       - name: Run test
         timeout-minutes: 30
         run: |
-          bash scripts/ci/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-b-test-small-1-gpu-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 12 --timeout-per-file 1800
+          bash scripts/ci/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-b-test-small-1-gpu-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 13 --timeout-per-file 1800
 
   stage-b-test-small-1-gpu-amd-mi35x:
     needs: [check-changes, stage-a-test-1-amd]

diff --git a/test/registered/amd/test_deepseek_v32_basic.py b/test/registered/amd/test_deepseek_v32_basic.py
@@ -32,7 +32,7 @@ def setUpClass(cls):
             "8",
             "--enable-dp-attention",
             "--model-loader-extra-config",
-            '{"enable_multithread_load": true, "num_threads": 64}',
+            '{"enable_multithread_load": true}',
         ]
         if is_in_amd_ci():
             other_args += [
@@ -101,7 +101,7 @@ def setUpClass(cls):
             "--tp",
             "8",
             "--model-loader-extra-config",
-            '{"enable_multithread_load": true, "num_threads": 64}',
+            '{"enable_multithread_load": true}',
         ]
         if is_in_amd_ci():
             other_args += [

diff --git a/test/registered/amd/test_deepseek_v32_mtp.py b/test/registered/amd/test_deepseek_v32_mtp.py
@@ -44,7 +44,7 @@ def setUpClass(cls):
             "--mem-frac",
             "0.7",
             "--model-loader-extra-config",
-            '{"enable_multithread_load": true, "num_threads": 64}',
+            '{"enable_multithread_load": true}',
         ]
         if is_in_amd_ci():
             other_args += [
@@ -137,7 +137,7 @@ def setUpClass(cls):
             "--mem-frac",
             "0.7",
             "--model-loader-extra-config",
-            '{"enable_multithread_load": true, "num_threads": 64}',
+            '{"enable_multithread_load": true}',
         ]
         if is_in_amd_ci():
             other_args += [

diff --git a/test/registered/attention/test_triton_sliding_window.py b/test/registered/attention/test_triton_sliding_window.py
@@ -16,7 +16,7 @@
 
 # Sliding window attention with Triton backend (Gemma-3 model)
 register_cuda_ci(est_time=100, suite="stage-b-test-large-1-gpu")
-register_amd_ci(est_time=100, suite="stage-b-test-small-1-gpu-amd")
+register_amd_ci(est_time=200, suite="stage-b-test-small-1-gpu-amd")
-register_amd_ci(est_time=200, suite="stage-b-test-small-1-gpu-amd")
+register_amd_ci(est_time=200, suite="stage-b-test-small-1-gpu-amd")  # Increased to 200s to prevent timeouts on AMD CI.
-register_amd_ci(est_time=200, suite="stage-b-test-small-1-gpu-amd")
+register_amd_ci(est_time=200, suite="stage-b-test-small-1-gpu-amd")  # Increased to 200s to prevent timeouts on AMD CI.
 
 
 class TestSlidingWindowAttentionTriton(CustomTestCase):

diff --git a/test/registered/openai_server/basic/test_openai_server.py b/test/registered/openai_server/basic/test_openai_server.py
@@ -29,7 +29,7 @@
 )
 
 register_cuda_ci(est_time=184, suite="stage-b-test-small-1-gpu")
-register_amd_ci(est_time=149, suite="stage-b-test-small-1-gpu-amd")
+register_amd_ci(est_time=200, suite="stage-b-test-small-1-gpu-amd")
 
 
 class TestOpenAIServer(CustomTestCase):