diff --git a/.github/workflows/pr-test-amd.yml b/.github/workflows/pr-test-amd.yml index 55c00d532349..762ede69d845 100644 --- a/.github/workflows/pr-test-amd.yml +++ b/.github/workflows/pr-test-amd.yml @@ -208,7 +208,7 @@ jobs: fail-fast: false matrix: runner: [linux-mi325-gpu-1] - part: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + part: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] runs-on: ${{matrix.runner}} steps: - name: Checkout code @@ -230,7 +230,7 @@ jobs: - name: Run test timeout-minutes: 30 run: | - bash scripts/ci/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-b-test-small-1-gpu-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 12 --timeout-per-file 1800 + bash scripts/ci/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-b-test-small-1-gpu-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 13 --timeout-per-file 1800 stage-b-test-small-1-gpu-amd-mi35x: needs: [check-changes, stage-a-test-1-amd] diff --git a/test/registered/amd/test_deepseek_v32_basic.py b/test/registered/amd/test_deepseek_v32_basic.py index 4f0be204695e..48bc52615238 100644 --- a/test/registered/amd/test_deepseek_v32_basic.py +++ b/test/registered/amd/test_deepseek_v32_basic.py @@ -32,7 +32,7 @@ def setUpClass(cls): "8", "--enable-dp-attention", "--model-loader-extra-config", - '{"enable_multithread_load": true, "num_threads": 64}', + '{"enable_multithread_load": true}', ] if is_in_amd_ci(): other_args += [ @@ -101,7 +101,7 @@ def setUpClass(cls): "--tp", "8", "--model-loader-extra-config", - '{"enable_multithread_load": true, "num_threads": 64}', + '{"enable_multithread_load": true}', ] if is_in_amd_ci(): other_args += [ diff --git a/test/registered/amd/test_deepseek_v32_mtp.py b/test/registered/amd/test_deepseek_v32_mtp.py index 22ff040850ae..04c0c7be23e8 100644 --- a/test/registered/amd/test_deepseek_v32_mtp.py +++ b/test/registered/amd/test_deepseek_v32_mtp.py @@ -44,7 +44,7 @@ def setUpClass(cls): "--mem-frac", "0.7", "--model-loader-extra-config", - '{"enable_multithread_load": true, "num_threads": 64}', + '{"enable_multithread_load": true}', ] if is_in_amd_ci(): other_args += [ @@ -137,7 +137,7 @@ def setUpClass(cls): "--mem-frac", "0.7", "--model-loader-extra-config", - '{"enable_multithread_load": true, "num_threads": 64}', + '{"enable_multithread_load": true}', ] if is_in_amd_ci(): other_args += [ diff --git a/test/registered/attention/test_triton_sliding_window.py b/test/registered/attention/test_triton_sliding_window.py index 628ef93b6bac..afad309c72ff 100644 --- a/test/registered/attention/test_triton_sliding_window.py +++ b/test/registered/attention/test_triton_sliding_window.py @@ -16,7 +16,7 @@ # Sliding window attention with Triton backend (Gemma-3 model) register_cuda_ci(est_time=100, suite="stage-b-test-large-1-gpu") -register_amd_ci(est_time=100, suite="stage-b-test-small-1-gpu-amd") +register_amd_ci(est_time=200, suite="stage-b-test-small-1-gpu-amd") class TestSlidingWindowAttentionTriton(CustomTestCase): diff --git a/test/registered/openai_server/basic/test_openai_server.py b/test/registered/openai_server/basic/test_openai_server.py index 3f27fb500e38..04b6ec84e395 100644 --- a/test/registered/openai_server/basic/test_openai_server.py +++ b/test/registered/openai_server/basic/test_openai_server.py @@ -29,7 +29,7 @@ ) register_cuda_ci(est_time=184, suite="stage-b-test-small-1-gpu") -register_amd_ci(est_time=149, suite="stage-b-test-small-1-gpu-amd") +register_amd_ci(est_time=200, suite="stage-b-test-small-1-gpu-amd") class TestOpenAIServer(CustomTestCase):