diff --git a/.github/workflows/pr-test-amd.yml b/.github/workflows/pr-test-amd.yml index a741b9f79c76..187aa4dc5b47 100644 --- a/.github/workflows/pr-test-amd.yml +++ b/.github/workflows/pr-test-amd.yml @@ -224,7 +224,46 @@ jobs: - name: Run test timeout-minutes: 30 run: | - bash scripts/ci/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-b-test-small-1-gpu --auto-partition-id ${{ matrix.part }} --auto-partition-size 12 + bash scripts/ci/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-b-test-small-1-gpu-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 12 + + stage-b-test-small-1-gpu-amd-mi35x: + needs: [check-changes, stage-a-test-1-amd] + if: | + always() && + ( + (inputs.target_stage == 'stage-b-test-small-1-gpu-amd-mi35x') || + ( + !inputs.target_stage && + (!failure() && !cancelled()) && + ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) + ) + ) + strategy: + fail-fast: false + matrix: + runner: [linux-mi35x-gpu-1] + runs-on: ${{matrix.runner}} + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} + + - name: Ensure VRAM is clear + run: bash scripts/ensure_vram_clear.sh rocm + + - name: Start CI container + run: bash scripts/ci/amd_ci_start_container.sh + env: + GITHUB_WORKSPACE: ${{ github.workspace }} + + - name: Install dependencies + run: bash scripts/ci/amd_ci_install_dependency.sh + + - name: Run test + timeout-minutes: 30 + run: | + bash scripts/ci/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-b-test-small-1-gpu-amd-mi35x stage-b-test-large-2-gpu-amd: needs: [check-changes, stage-a-test-1-amd] @@ -545,87 +584,8 @@ jobs: run: | bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 - unit-test-backend-1-gpu-amd-mi35x: - needs: [check-changes, stage-a-test-1-amd] - if: | - always() && - ( - (inputs.target_stage == 'unit-test-backend-1-gpu-amd-mi35x') || - ( - !inputs.target_stage && - (!failure() && !cancelled()) && - ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) - ) - ) - strategy: - fail-fast: false - matrix: - runner: [linux-mi35x-gpu-1] - runs-on: ${{matrix.runner}} - steps: - - name: Checkout code - uses: actions/checkout@v4 - with: - ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} - - - name: Ensure VRAM is clear - run: bash scripts/ensure_vram_clear.sh rocm - - - name: Start CI container - run: bash scripts/ci/amd_ci_start_container.sh - env: - GITHUB_WORKSPACE: ${{ github.workspace }} - - - name: Install dependencies - run: bash scripts/ci/amd_ci_install_dependency.sh - - - name: Run test - timeout-minutes: 15 - run: | - bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-amd-mi35x - - unit-test-backend-2-gpu-amd: - needs: [check-changes, stage-a-test-1-amd] - if: | - always() && - ( - (inputs.target_stage == 'unit-test-backend-2-gpu-amd') || - ( - !inputs.target_stage && - (!failure() && !cancelled()) && - ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) - ) - ) - strategy: - fail-fast: false - matrix: - runner: [linux-mi325-gpu-2] - part: [0, 1] - runs-on: ${{matrix.runner}} - steps: - - name: Checkout code - uses: actions/checkout@v4 - with: - ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} - - - name: Ensure VRAM is clear - run: bash scripts/ensure_vram_clear.sh rocm - - - name: Start CI container - run: bash scripts/ci/amd_ci_start_container.sh - env: - GITHUB_WORKSPACE: ${{ github.workspace }} - - - name: Install dependencies - run: bash scripts/ci/amd_ci_install_dependency.sh - - - name: Run test - timeout-minutes: 30 - run: | - bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-2-gpu-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 - unit-test-backend-8-gpu-amd: - needs: [check-changes, unit-test-backend-2-gpu-amd] + needs: [check-changes, stage-a-test-1-amd] if: | always() && ( @@ -673,7 +633,7 @@ jobs: bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-8-gpu-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 --timeout-per-file 3600 unit-test-backend-8-gpu-amd-mi35x: - needs: [check-changes, unit-test-backend-2-gpu-amd] + needs: [check-changes, stage-a-test-1-amd] if: always() && !failure() && !cancelled() && ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) strategy: @@ -806,7 +766,7 @@ jobs: bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_default_fp8 performance-test-2-gpu-amd: - needs: [check-changes, unit-test-backend-2-gpu-amd] + needs: [check-changes, stage-a-test-1-amd] if: | always() && ( @@ -965,10 +925,9 @@ jobs: stage-a-test-1-amd, stage-b-test-small-1-gpu-amd, + stage-b-test-small-1-gpu-amd-mi35x, stage-b-test-large-2-gpu-amd, unit-test-backend-1-gpu-amd, - unit-test-backend-1-gpu-amd-mi35x, - unit-test-backend-2-gpu-amd, unit-test-backend-8-gpu-amd, unit-test-backend-8-gpu-amd-mi35x, performance-test-1-gpu-part-1-amd, diff --git a/scripts/ci/slash_command_handler.py b/scripts/ci/slash_command_handler.py index d1cc5f9c2caa..9743e8843e85 100644 --- a/scripts/ci/slash_command_handler.py +++ b/scripts/ci/slash_command_handler.py @@ -177,6 +177,7 @@ def handle_rerun_stage( "sgl-kernel-unit-test-amd", "stage-a-test-1-amd", "stage-b-test-small-1-gpu-amd", + "stage-b-test-small-1-gpu-amd-mi35x", "stage-b-test-large-2-gpu-amd", "unit-test-backend-1-gpu-amd", "unit-test-backend-2-gpu-amd", diff --git a/test/registered/attention/test_create_kvindices.py b/test/registered/attention/test_create_kvindices.py index f5e9be46441e..881e68d6e1f5 100644 --- a/test/registered/attention/test_create_kvindices.py +++ b/test/registered/attention/test_create_kvindices.py @@ -9,7 +9,7 @@ # Triton kernel unit test for KV indices creation register_cuda_ci(est_time=10, suite="stage-b-test-small-1-gpu") -register_amd_ci(est_time=10, suite="stage-b-test-small-1-gpu") +register_amd_ci(est_time=10, suite="stage-b-test-small-1-gpu-amd") class TestCreateKvIndices(CustomTestCase): diff --git a/test/registered/attention/test_radix_attention.py b/test/registered/attention/test_radix_attention.py index e72e4fa618f8..5931a0921709 100644 --- a/test/registered/attention/test_radix_attention.py +++ b/test/registered/attention/test_radix_attention.py @@ -15,7 +15,7 @@ # RadixAttention server integration tests register_cuda_ci(est_time=100, suite="stage-b-test-small-1-gpu") -register_amd_ci(est_time=100, suite="stage-b-test-small-1-gpu") +register_amd_ci(est_time=100, suite="stage-b-test-small-1-gpu-amd") class TestRadixCacheFCFS(CustomTestCase): diff --git a/test/registered/attention/test_swa_unittest.py b/test/registered/attention/test_swa_unittest.py index be3667fffb2d..1a808417c1ed 100644 --- a/test/registered/attention/test_swa_unittest.py +++ b/test/registered/attention/test_swa_unittest.py @@ -10,7 +10,7 @@ from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci register_cuda_ci(est_time=8, suite="stage-b-test-small-1-gpu") -register_amd_ci(est_time=10, suite="stage-b-test-small-1-gpu") +register_amd_ci(est_time=10, suite="stage-b-test-small-1-gpu-amd") class TestSWA(unittest.TestCase): diff --git a/test/registered/attention/test_torch_native_attention_backend.py b/test/registered/attention/test_torch_native_attention_backend.py index c7da08a0395e..36bb27d4d532 100644 --- a/test/registered/attention/test_torch_native_attention_backend.py +++ b/test/registered/attention/test_torch_native_attention_backend.py @@ -19,7 +19,7 @@ # Torch native attention backend integration test with MMLU eval register_cuda_ci(est_time=150, suite="stage-b-test-small-1-gpu") -register_amd_ci(est_time=150, suite="stage-b-test-small-1-gpu") +register_amd_ci(est_time=150, suite="stage-b-test-small-1-gpu-amd") class TestTorchNativeAttnBackend(CustomTestCase): diff --git a/test/registered/attention/test_triton_attention_backend.py b/test/registered/attention/test_triton_attention_backend.py index 59cda72daf09..8f4c748d6fdf 100644 --- a/test/registered/attention/test_triton_attention_backend.py +++ b/test/registered/attention/test_triton_attention_backend.py @@ -21,7 +21,7 @@ # Triton attention backend integration test with latency benchmark and MMLU eval register_cuda_ci(est_time=200, suite="stage-b-test-small-1-gpu") -register_amd_ci(est_time=1110, suite="stage-b-test-small-1-gpu") +register_amd_ci(est_time=1110, suite="stage-b-test-small-1-gpu-amd") class TestTritonAttnBackend(CustomTestCase): diff --git a/test/registered/attention/test_triton_attention_kernels.py b/test/registered/attention/test_triton_attention_kernels.py index 61c8a68edb5c..fede8457b44e 100644 --- a/test/registered/attention/test_triton_attention_kernels.py +++ b/test/registered/attention/test_triton_attention_kernels.py @@ -26,7 +26,7 @@ register_cuda_ci(est_time=30, suite="stage-b-test-small-1-gpu") register_amd_ci( est_time=30, - suite="stage-b-test-small-1-gpu", + suite="stage-b-test-small-1-gpu-amd", disabled="test was never enabled for AMD CI, needs validation", ) diff --git a/test/registered/attention/test_triton_sliding_window.py b/test/registered/attention/test_triton_sliding_window.py index 439b220f0564..73e8dadfea25 100644 --- a/test/registered/attention/test_triton_sliding_window.py +++ b/test/registered/attention/test_triton_sliding_window.py @@ -16,7 +16,7 @@ # Sliding window attention with Triton backend (Gemma-3 model) register_cuda_ci(est_time=100, suite="stage-b-test-small-1-gpu") -register_amd_ci(est_time=100, suite="stage-b-test-small-1-gpu") +register_amd_ci(est_time=100, suite="stage-b-test-small-1-gpu-amd") class TestSlidingWindowAttentionTriton(CustomTestCase): diff --git a/test/registered/backends/test_torch_compile.py b/test/registered/backends/test_torch_compile.py index 4588af347bb7..f9075cb3ce2d 100644 --- a/test/registered/backends/test_torch_compile.py +++ b/test/registered/backends/test_torch_compile.py @@ -17,7 +17,7 @@ ) register_cuda_ci(est_time=190, suite="stage-b-test-small-1-gpu") -register_amd_ci(est_time=1100, suite="stage-b-test-small-1-gpu") +register_amd_ci(est_time=1100, suite="stage-b-test-small-1-gpu-amd") class TestTorchCompile(CustomTestCase): diff --git a/test/registered/constrained_decoding/test_constrained_decoding.py b/test/registered/constrained_decoding/test_constrained_decoding.py index 8992e7074a1b..d83f0d3e2947 100644 --- a/test/registered/constrained_decoding/test_constrained_decoding.py +++ b/test/registered/constrained_decoding/test_constrained_decoding.py @@ -1,7 +1,7 @@ from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci register_cuda_ci(est_time=111, suite="stage-b-test-small-1-gpu") -register_amd_ci(est_time=179, suite="stage-b-test-small-1-gpu") +register_amd_ci(est_time=179, suite="stage-b-test-small-1-gpu-amd") import unittest diff --git a/test/registered/core/test_gpt_oss_1gpu.py b/test/registered/core/test_gpt_oss_1gpu.py index 50325765205e..a7ae8dc42026 100644 --- a/test/registered/core/test_gpt_oss_1gpu.py +++ b/test/registered/core/test_gpt_oss_1gpu.py @@ -4,7 +4,7 @@ from sglang.test.gpt_oss_common import BaseTestGptOss register_cuda_ci(est_time=402, suite="stage-b-test-small-1-gpu") -register_amd_ci(est_time=750, suite="stage-b-test-small-1-gpu-amd") +register_amd_ci(est_time=750, suite="stage-b-test-small-1-gpu-amd-mi35x") class TestGptOss1Gpu(BaseTestGptOss): diff --git a/test/registered/debug_utils/test_tensor_dump_forward_hook.py b/test/registered/debug_utils/test_tensor_dump_forward_hook.py index 418f1c7a6ffc..0cbc436277f6 100644 --- a/test/registered/debug_utils/test_tensor_dump_forward_hook.py +++ b/test/registered/debug_utils/test_tensor_dump_forward_hook.py @@ -1,7 +1,7 @@ from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci register_cuda_ci(est_time=9, suite="stage-b-test-small-1-gpu") -register_amd_ci(est_time=15, suite="stage-b-test-small-1-gpu") +register_amd_ci(est_time=15, suite="stage-b-test-small-1-gpu-amd") import unittest diff --git a/test/registered/dllm/test_llada2_mini_amd.py b/test/registered/dllm/test_llada2_mini_amd.py index 50f378229822..8a4858711de5 100644 --- a/test/registered/dllm/test_llada2_mini_amd.py +++ b/test/registered/dllm/test_llada2_mini_amd.py @@ -1,6 +1,6 @@ from sglang.test.ci.ci_register import register_amd_ci -register_amd_ci(est_time=520, suite="stage-b-test-small-1-gpu") +register_amd_ci(est_time=520, suite="stage-b-test-small-1-gpu-amd") """ Test LLaDA2 (Diffusion Language Model) on AMD GPUs. diff --git a/test/registered/lora/test_lora.py b/test/registered/lora/test_lora.py index 25ace89f0b0a..b9e1559faa5f 100644 --- a/test/registered/lora/test_lora.py +++ b/test/registered/lora/test_lora.py @@ -25,7 +25,7 @@ from sglang.test.test_utils import CustomTestCase, is_in_ci register_cuda_ci(est_time=82, suite="stage-b-test-small-1-gpu") -register_amd_ci(est_time=82, suite="stage-b-test-small-1-gpu") +register_amd_ci(est_time=82, suite="stage-b-test-small-1-gpu-amd") class TestLoRA(CustomTestCase): diff --git a/test/registered/lora/test_lora_backend.py b/test/registered/lora/test_lora_backend.py index 0057c2a7f4a1..0cc2aca9b86d 100644 --- a/test/registered/lora/test_lora_backend.py +++ b/test/registered/lora/test_lora_backend.py @@ -32,7 +32,7 @@ register_cuda_ci(est_time=200, suite="stage-b-test-small-1-gpu") register_amd_ci( est_time=200, - suite="stage-b-test-small-1-gpu", + suite="stage-b-test-small-1-gpu-amd", disabled="see https://github.com/sgl-project/sglang/issues/13107", ) diff --git a/test/registered/lora/test_lora_eviction.py b/test/registered/lora/test_lora_eviction.py index 05ed1dee3068..4404d11ec5e1 100644 --- a/test/registered/lora/test_lora_eviction.py +++ b/test/registered/lora/test_lora_eviction.py @@ -24,7 +24,7 @@ from sglang.test.test_utils import CustomTestCase register_cuda_ci(est_time=224, suite="stage-b-test-small-1-gpu") -register_amd_ci(est_time=224, suite="stage-b-test-small-1-gpu") +register_amd_ci(est_time=224, suite="stage-b-test-small-1-gpu-amd") PROMPTS = [ "AI is a field of computer science focused on", diff --git a/test/registered/lora/test_multi_lora_backend.py b/test/registered/lora/test_multi_lora_backend.py index 1a43a13f29bd..7180bebfe47b 100644 --- a/test/registered/lora/test_multi_lora_backend.py +++ b/test/registered/lora/test_multi_lora_backend.py @@ -25,7 +25,7 @@ from sglang.test.test_utils import CustomTestCase, is_in_ci register_cuda_ci(est_time=60, suite="stage-b-test-small-1-gpu") -register_amd_ci(est_time=60, suite="stage-b-test-small-1-gpu") +register_amd_ci(est_time=60, suite="stage-b-test-small-1-gpu-amd") # All prompts are used at once in a batch. PROMPTS = [ diff --git a/test/registered/metrics/test_metrics.py b/test/registered/metrics/test_metrics.py index 1d280fec8237..f7c18ea2da77 100644 --- a/test/registered/metrics/test_metrics.py +++ b/test/registered/metrics/test_metrics.py @@ -6,7 +6,7 @@ from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci register_cuda_ci(est_time=32, suite="stage-b-test-small-1-gpu") -register_amd_ci(est_time=32, suite="stage-b-test-small-1-gpu") +register_amd_ci(est_time=32, suite="stage-b-test-small-1-gpu-amd") from prometheus_client.parser import text_string_to_metric_families from prometheus_client.samples import Sample diff --git a/test/registered/mla/test_mla.py b/test/registered/mla/test_mla.py index 197025dcbc7f..2df93449e423 100644 --- a/test/registered/mla/test_mla.py +++ b/test/registered/mla/test_mla.py @@ -16,7 +16,7 @@ register_cuda_ci(est_time=194, suite="stage-b-test-small-1-gpu") register_amd_ci( est_time=242, - suite="stage-a-test-1", + suite="stage-b-test-small-1-gpu-amd", disabled="see https://github.com/sgl-project/sglang/issues/13107", ) diff --git a/test/registered/mla/test_mla_deepseek_v3.py b/test/registered/mla/test_mla_deepseek_v3.py index 50f25317745d..f3c46213dcba 100644 --- a/test/registered/mla/test_mla_deepseek_v3.py +++ b/test/registered/mla/test_mla_deepseek_v3.py @@ -19,7 +19,7 @@ register_cuda_ci(est_time=442, suite="stage-b-test-small-1-gpu") register_amd_ci( est_time=221, - suite="stage-a-test-1", + suite="stage-b-test-small-1-gpu-amd", disabled="see https://github.com/sgl-project/sglang/issues/12574", ) diff --git a/test/registered/models/test_compressed_tensors_models.py b/test/registered/models/test_compressed_tensors_models.py index e86b74b65adc..0a9d82778490 100644 --- a/test/registered/models/test_compressed_tensors_models.py +++ b/test/registered/models/test_compressed_tensors_models.py @@ -2,7 +2,7 @@ # Model tests for compressed tensors (FP8) register_cuda_ci(est_time=42, suite="stage-b-test-small-1-gpu") -register_amd_ci(est_time=42, suite="stage-b-test-small-1-gpu") +register_amd_ci(est_time=42, suite="stage-b-test-small-1-gpu-amd") import unittest from types import SimpleNamespace diff --git a/test/registered/models/test_cross_encoder_models.py b/test/registered/models/test_cross_encoder_models.py index 7fc5f465922a..cc60d5d4ae99 100644 --- a/test/registered/models/test_cross_encoder_models.py +++ b/test/registered/models/test_cross_encoder_models.py @@ -2,7 +2,7 @@ # Cross encoder model tests register_cuda_ci(est_time=100, suite="stage-b-test-small-1-gpu") -register_amd_ci(est_time=150, suite="stage-b-test-small-1-gpu") +register_amd_ci(est_time=150, suite="stage-b-test-small-1-gpu-amd") import multiprocessing as mp import random diff --git a/test/registered/models/test_embedding_models.py b/test/registered/models/test_embedding_models.py index 440ae57a3dc3..03ebe8a12ea3 100644 --- a/test/registered/models/test_embedding_models.py +++ b/test/registered/models/test_embedding_models.py @@ -4,7 +4,7 @@ register_cuda_ci(est_time=73, suite="stage-b-test-small-1-gpu") register_amd_ci( est_time=73, - suite="stage-b-test-small-1-gpu", + suite="stage-b-test-small-1-gpu-amd", disabled="see https://github.com/sgl-project/sglang/issues/11127", ) diff --git a/test/registered/models/test_qwen_models.py b/test/registered/models/test_qwen_models.py index beba81b7f604..cdfa4c06f00d 100644 --- a/test/registered/models/test_qwen_models.py +++ b/test/registered/models/test_qwen_models.py @@ -2,7 +2,7 @@ # Qwen model tests register_cuda_ci(est_time=90, suite="stage-b-test-small-1-gpu") -register_amd_ci(est_time=130, suite="stage-b-test-small-1-gpu") +register_amd_ci(est_time=130, suite="stage-b-test-small-1-gpu-amd") import unittest from types import SimpleNamespace diff --git a/test/registered/models/test_reward_models.py b/test/registered/models/test_reward_models.py index 3a197c421b53..2f615d272f71 100644 --- a/test/registered/models/test_reward_models.py +++ b/test/registered/models/test_reward_models.py @@ -2,7 +2,7 @@ # Reward model tests register_cuda_ci(est_time=103, suite="stage-b-test-small-1-gpu") -register_amd_ci(est_time=132, suite="stage-b-test-small-1-gpu") +register_amd_ci(est_time=132, suite="stage-b-test-small-1-gpu-amd") # Copyright 2023-2024 SGLang Team # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/test/registered/models/test_transformers_models.py b/test/registered/models/test_transformers_models.py index 782b7a6c6267..cf570a14b6b3 100644 --- a/test/registered/models/test_transformers_models.py +++ b/test/registered/models/test_transformers_models.py @@ -2,7 +2,7 @@ # Transformers fallback model tests register_cuda_ci(est_time=245, suite="stage-b-test-small-1-gpu") -register_amd_ci(est_time=320, suite="stage-b-test-small-1-gpu") +register_amd_ci(est_time=320, suite="stage-b-test-small-1-gpu-amd") import dataclasses import multiprocessing as mp diff --git a/test/registered/models/test_vlm_models.py b/test/registered/models/test_vlm_models.py index 4afd39a00102..c05875ebf72c 100644 --- a/test/registered/models/test_vlm_models.py +++ b/test/registered/models/test_vlm_models.py @@ -2,7 +2,7 @@ # VLM (Vision Language Model) tests register_cuda_ci(est_time=270, suite="stage-b-test-small-1-gpu") -register_amd_ci(est_time=420, suite="stage-b-test-small-1-gpu") +register_amd_ci(est_time=420, suite="stage-b-test-small-1-gpu-amd") import argparse import random diff --git a/test/registered/moe/test_fused_moe.py b/test/registered/moe/test_fused_moe.py index 96a2418f7dca..91e61aaab953 100644 --- a/test/registered/moe/test_fused_moe.py +++ b/test/registered/moe/test_fused_moe.py @@ -1,7 +1,7 @@ from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci register_cuda_ci(est_time=80, suite="stage-b-test-small-1-gpu") -register_amd_ci(est_time=30, suite="stage-a-test-1") +register_amd_ci(est_time=30, suite="stage-b-test-small-1-gpu-amd") import unittest diff --git a/test/registered/openai_server/basic/test_openai_embedding.py b/test/registered/openai_server/basic/test_openai_embedding.py index 11e5249c0300..1432a5307b3c 100644 --- a/test/registered/openai_server/basic/test_openai_embedding.py +++ b/test/registered/openai_server/basic/test_openai_embedding.py @@ -14,7 +14,7 @@ ) register_cuda_ci(est_time=70, suite="stage-b-test-small-1-gpu") -register_amd_ci(est_time=141, suite="stage-b-test-small-1-gpu") +register_amd_ci(est_time=141, suite="stage-b-test-small-1-gpu-amd") class TestOpenAIEmbedding(CustomTestCase): diff --git a/test/registered/openai_server/basic/test_openai_server.py b/test/registered/openai_server/basic/test_openai_server.py index 1d515663ca5f..3f27fb500e38 100644 --- a/test/registered/openai_server/basic/test_openai_server.py +++ b/test/registered/openai_server/basic/test_openai_server.py @@ -29,7 +29,7 @@ ) register_cuda_ci(est_time=184, suite="stage-b-test-small-1-gpu") -register_amd_ci(est_time=149, suite="stage-b-test-small-1-gpu") +register_amd_ci(est_time=149, suite="stage-b-test-small-1-gpu-amd") class TestOpenAIServer(CustomTestCase): diff --git a/test/registered/openai_server/basic/test_protocol.py b/test/registered/openai_server/basic/test_protocol.py index d40b10e78e89..47bf563816ef 100644 --- a/test/registered/openai_server/basic/test_protocol.py +++ b/test/registered/openai_server/basic/test_protocol.py @@ -31,7 +31,7 @@ from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci register_cuda_ci(est_time=3, suite="stage-b-test-small-1-gpu") -register_amd_ci(est_time=10, suite="stage-b-test-small-1-gpu") +register_amd_ci(est_time=10, suite="stage-b-test-small-1-gpu-amd") class TestModelCard(unittest.TestCase): diff --git a/test/registered/openai_server/basic/test_serving_chat.py b/test/registered/openai_server/basic/test_serving_chat.py index 796d1ac48d18..d81f2efb051f 100644 --- a/test/registered/openai_server/basic/test_serving_chat.py +++ b/test/registered/openai_server/basic/test_serving_chat.py @@ -24,7 +24,7 @@ from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci register_cuda_ci(est_time=10, suite="stage-b-test-small-1-gpu") -register_amd_ci(est_time=10, suite="stage-b-test-small-1-gpu") +register_amd_ci(est_time=10, suite="stage-b-test-small-1-gpu-amd") class _MockTokenizerManager: diff --git a/test/registered/openai_server/basic/test_serving_completions.py b/test/registered/openai_server/basic/test_serving_completions.py index 504ee235838a..34c262e6050e 100644 --- a/test/registered/openai_server/basic/test_serving_completions.py +++ b/test/registered/openai_server/basic/test_serving_completions.py @@ -14,7 +14,7 @@ from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci register_cuda_ci(est_time=10, suite="stage-b-test-small-1-gpu") -register_amd_ci(est_time=10, suite="stage-b-test-small-1-gpu") +register_amd_ci(est_time=10, suite="stage-b-test-small-1-gpu-amd") class _MockTemplateManager: diff --git a/test/registered/openai_server/basic/test_serving_embedding.py b/test/registered/openai_server/basic/test_serving_embedding.py index 2f7f8d8cdf62..8f5b45022f94 100644 --- a/test/registered/openai_server/basic/test_serving_embedding.py +++ b/test/registered/openai_server/basic/test_serving_embedding.py @@ -17,7 +17,7 @@ from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci register_cuda_ci(est_time=10, suite="stage-b-test-small-1-gpu") -register_amd_ci(est_time=10, suite="stage-b-test-small-1-gpu") +register_amd_ci(est_time=10, suite="stage-b-test-small-1-gpu-amd") # Mock TokenizerManager for embedding tests diff --git a/test/registered/openai_server/features/test_enable_thinking.py b/test/registered/openai_server/features/test_enable_thinking.py index d9e4ae83a589..f723036dd80a 100644 --- a/test/registered/openai_server/features/test_enable_thinking.py +++ b/test/registered/openai_server/features/test_enable_thinking.py @@ -22,7 +22,7 @@ ) register_cuda_ci(est_time=70, suite="stage-b-test-small-1-gpu") -register_amd_ci(est_time=200, suite="stage-b-test-small-1-gpu") +register_amd_ci(est_time=200, suite="stage-b-test-small-1-gpu-amd") class TestEnableThinking(CustomTestCase): diff --git a/test/registered/openai_server/features/test_json_mode.py b/test/registered/openai_server/features/test_json_mode.py index 38395d1c9120..946d8e0ffb8a 100644 --- a/test/registered/openai_server/features/test_json_mode.py +++ b/test/registered/openai_server/features/test_json_mode.py @@ -14,7 +14,7 @@ ) register_cuda_ci(est_time=109, suite="stage-b-test-small-1-gpu") -register_amd_ci(est_time=180, suite="stage-b-test-small-1-gpu") +register_amd_ci(est_time=180, suite="stage-b-test-small-1-gpu-amd") class TestJSONModeMixin: diff --git a/test/registered/openai_server/features/test_openai_server_ebnf.py b/test/registered/openai_server/features/test_openai_server_ebnf.py index 8618b90ef538..165bbcf78a7e 100644 --- a/test/registered/openai_server/features/test_openai_server_ebnf.py +++ b/test/registered/openai_server/features/test_openai_server_ebnf.py @@ -14,7 +14,7 @@ ) register_cuda_ci(est_time=7, suite="stage-b-test-small-1-gpu") -register_amd_ci(est_time=20, suite="stage-b-test-small-1-gpu") +register_amd_ci(est_time=20, suite="stage-b-test-small-1-gpu-amd") # ------------------------------------------------------------------------- diff --git a/test/registered/openai_server/features/test_openai_server_hidden_states.py b/test/registered/openai_server/features/test_openai_server_hidden_states.py index edd058088e96..0ac3d3265ee9 100644 --- a/test/registered/openai_server/features/test_openai_server_hidden_states.py +++ b/test/registered/openai_server/features/test_openai_server_hidden_states.py @@ -19,7 +19,7 @@ register_cuda_ci(est_time=186, suite="stage-b-test-small-1-gpu") register_amd_ci( est_time=186, - suite="stage-b-test-small-1-gpu", + suite="stage-b-test-small-1-gpu-amd", disabled="see https://github.com/sgl-project/sglang/issues/11127", ) diff --git a/test/registered/openai_server/features/test_reasoning_content.py b/test/registered/openai_server/features/test_reasoning_content.py index fd94c0e642e3..2c59921fe3e1 100644 --- a/test/registered/openai_server/features/test_reasoning_content.py +++ b/test/registered/openai_server/features/test_reasoning_content.py @@ -24,7 +24,7 @@ ) register_cuda_ci(est_time=89, suite="stage-b-test-small-1-gpu") -register_amd_ci(est_time=89, suite="stage-b-test-small-1-gpu") +register_amd_ci(est_time=89, suite="stage-b-test-small-1-gpu-amd") class TestReasoningContentAPI(CustomTestCase): diff --git a/test/registered/openai_server/function_call/test_openai_function_calling.py b/test/registered/openai_server/function_call/test_openai_function_calling.py index 25233dd412cc..d3fdd68239c0 100644 --- a/test/registered/openai_server/function_call/test_openai_function_calling.py +++ b/test/registered/openai_server/function_call/test_openai_function_calling.py @@ -15,7 +15,7 @@ ) register_cuda_ci(est_time=60, suite="stage-b-test-small-1-gpu") -register_amd_ci(est_time=73, suite="stage-b-test-small-1-gpu") +register_amd_ci(est_time=73, suite="stage-b-test-small-1-gpu-amd") class TestOpenAIServerFunctionCalling(CustomTestCase): diff --git a/test/registered/openai_server/function_call/test_tool_choice.py b/test/registered/openai_server/function_call/test_tool_choice.py index daf070dc67fb..b12cd70d0ced 100644 --- a/test/registered/openai_server/function_call/test_tool_choice.py +++ b/test/registered/openai_server/function_call/test_tool_choice.py @@ -23,7 +23,7 @@ ) register_cuda_ci(est_time=120, suite="stage-b-test-small-1-gpu") -register_amd_ci(est_time=258, suite="stage-b-test-small-1-gpu") +register_amd_ci(est_time=258, suite="stage-b-test-small-1-gpu-amd") class TestToolChoiceLlama32(CustomTestCase): diff --git a/test/registered/openai_server/validation/test_large_max_new_tokens.py b/test/registered/openai_server/validation/test_large_max_new_tokens.py index 37c701625f83..8b4fb9f7d4ca 100644 --- a/test/registered/openai_server/validation/test_large_max_new_tokens.py +++ b/test/registered/openai_server/validation/test_large_max_new_tokens.py @@ -23,7 +23,7 @@ ) register_cuda_ci(est_time=41, suite="stage-b-test-small-1-gpu") -register_amd_ci(est_time=41, suite="stage-b-test-small-1-gpu") +register_amd_ci(est_time=41, suite="stage-b-test-small-1-gpu-amd") class TestLargeMaxNewTokens(CustomTestCase): diff --git a/test/registered/openai_server/validation/test_matched_stop.py b/test/registered/openai_server/validation/test_matched_stop.py index ff218ecea860..aa890e363165 100644 --- a/test/registered/openai_server/validation/test_matched_stop.py +++ b/test/registered/openai_server/validation/test_matched_stop.py @@ -12,7 +12,7 @@ ) register_cuda_ci(est_time=40, suite="stage-b-test-small-1-gpu") -register_amd_ci(est_time=60, suite="stage-b-test-small-1-gpu") +register_amd_ci(est_time=60, suite="stage-b-test-small-1-gpu-amd") class TestMatchedStop(CustomTestCase, MatchedStopMixin): diff --git a/test/registered/openai_server/validation/test_openai_server_ignore_eos.py b/test/registered/openai_server/validation/test_openai_server_ignore_eos.py index d3342ffed3de..bbf696d41ac3 100644 --- a/test/registered/openai_server/validation/test_openai_server_ignore_eos.py +++ b/test/registered/openai_server/validation/test_openai_server_ignore_eos.py @@ -12,7 +12,7 @@ ) register_cuda_ci(est_time=6, suite="stage-b-test-small-1-gpu") -register_amd_ci(est_time=47, suite="stage-b-test-small-1-gpu") +register_amd_ci(est_time=47, suite="stage-b-test-small-1-gpu-amd") class TestOpenAIServerIgnoreEOS(CustomTestCase): diff --git a/test/registered/openai_server/validation/test_request_length_validation.py b/test/registered/openai_server/validation/test_request_length_validation.py index dc23ec45241e..4053728631c9 100644 --- a/test/registered/openai_server/validation/test_request_length_validation.py +++ b/test/registered/openai_server/validation/test_request_length_validation.py @@ -13,7 +13,7 @@ ) register_cuda_ci(est_time=38, suite="stage-b-test-small-1-gpu") -register_amd_ci(est_time=31, suite="stage-b-test-small-1-gpu") +register_amd_ci(est_time=31, suite="stage-b-test-small-1-gpu-amd") class TestRequestLengthValidation(CustomTestCase): diff --git a/test/registered/ops/test_repeat_interleave.py b/test/registered/ops/test_repeat_interleave.py index 1eace460afdb..528766ba1bba 100644 --- a/test/registered/ops/test_repeat_interleave.py +++ b/test/registered/ops/test_repeat_interleave.py @@ -2,7 +2,7 @@ # Ops - Repeat Interleave tests (1-GPU) register_cuda_ci(est_time=60, suite="stage-b-test-small-1-gpu") -register_amd_ci(est_time=75, suite="stage-b-test-small-1-gpu") +register_amd_ci(est_time=75, suite="stage-b-test-small-1-gpu-amd") import time from typing import Tuple diff --git a/test/registered/profiling/test_start_profile.py b/test/registered/profiling/test_start_profile.py index 1c1acc6cceb3..ddd19b6bb8a9 100644 --- a/test/registered/profiling/test_start_profile.py +++ b/test/registered/profiling/test_start_profile.py @@ -30,7 +30,7 @@ ) register_cuda_ci(est_time=41, suite="stage-b-test-small-1-gpu") -register_amd_ci(est_time=60, suite="stage-b-test-small-1-gpu") +register_amd_ci(est_time=60, suite="stage-b-test-small-1-gpu-amd") OUTPUT_DIR = "./profiler_dir" diff --git a/test/registered/quant/test_block_int8.py b/test/registered/quant/test_block_int8.py index b3787315fc80..91f2cd47411f 100644 --- a/test/registered/quant/test_block_int8.py +++ b/test/registered/quant/test_block_int8.py @@ -11,7 +11,7 @@ from sglang.test.test_utils import CustomTestCase register_cuda_ci(est_time=44, suite="stage-b-test-small-1-gpu") -register_amd_ci(est_time=22, suite="stage-a-test-1") +register_amd_ci(est_time=22, suite="stage-b-test-small-1-gpu-amd") # For test diff --git a/test/registered/quant/test_eval_fp8_accuracy.py b/test/registered/quant/test_eval_fp8_accuracy.py index f92ca4977a7b..0c8e6cec780d 100644 --- a/test/registered/quant/test_eval_fp8_accuracy.py +++ b/test/registered/quant/test_eval_fp8_accuracy.py @@ -15,7 +15,7 @@ ) register_cuda_ci(est_time=250, suite="stage-b-test-small-1-gpu") -register_amd_ci(est_time=303, suite="stage-b-test-small-1-gpu") +register_amd_ci(est_time=303, suite="stage-b-test-small-1-gpu-amd") class TestEvalFP8Accuracy(CustomTestCase): diff --git a/test/registered/quant/test_triton_scaled_mm.py b/test/registered/quant/test_triton_scaled_mm.py index aac9acc269ad..35a30d710ae9 100644 --- a/test/registered/quant/test_triton_scaled_mm.py +++ b/test/registered/quant/test_triton_scaled_mm.py @@ -9,7 +9,7 @@ from sglang.test.test_utils import CustomTestCase register_cuda_ci(est_time=8, suite="stage-b-test-small-1-gpu") -register_amd_ci(est_time=12, suite="stage-a-test-1") +register_amd_ci(est_time=12, suite="stage-b-test-small-1-gpu-amd") def torch_scaled_mm( diff --git a/test/registered/rl/test_fp32_lm_head.py b/test/registered/rl/test_fp32_lm_head.py index 44740eda998b..4d4acee9a822 100644 --- a/test/registered/rl/test_fp32_lm_head.py +++ b/test/registered/rl/test_fp32_lm_head.py @@ -1,7 +1,7 @@ from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci register_cuda_ci(est_time=9, suite="stage-b-test-small-1-gpu") -register_amd_ci(est_time=15, suite="stage-b-test-small-1-gpu") +register_amd_ci(est_time=15, suite="stage-b-test-small-1-gpu-amd") import unittest from types import SimpleNamespace diff --git a/test/registered/rl/test_update_weights_from_disk.py b/test/registered/rl/test_update_weights_from_disk.py index dc9535525675..9376735ac849 100644 --- a/test/registered/rl/test_update_weights_from_disk.py +++ b/test/registered/rl/test_update_weights_from_disk.py @@ -1,7 +1,9 @@ from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci register_cuda_ci(est_time=210, suite="stage-b-test-small-1-gpu", disabled="see #14021") -register_amd_ci(est_time=210, suite="stage-b-test-small-1-gpu", disabled="see #14021") +register_amd_ci( + est_time=210, suite="stage-b-test-small-1-gpu-amd", disabled="see #14021" +) import json import random diff --git a/test/registered/rotary/test_mrope.py b/test/registered/rotary/test_mrope.py index 463e01dd0088..217197d7f580 100644 --- a/test/registered/rotary/test_mrope.py +++ b/test/registered/rotary/test_mrope.py @@ -2,7 +2,7 @@ # Rotary Embedding - MRoPE tests (1-GPU) register_cuda_ci(est_time=10, suite="stage-b-test-small-1-gpu") -register_amd_ci(est_time=15, suite="stage-b-test-small-1-gpu") +register_amd_ci(est_time=15, suite="stage-b-test-small-1-gpu-amd") from typing import NamedTuple diff --git a/test/registered/sampling/test_original_logprobs.py b/test/registered/sampling/test_original_logprobs.py index f024a172e7a0..9093dea5106c 100644 --- a/test/registered/sampling/test_original_logprobs.py +++ b/test/registered/sampling/test_original_logprobs.py @@ -18,7 +18,7 @@ from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci register_cuda_ci(est_time=41, suite="stage-b-test-small-1-gpu") -register_amd_ci(est_time=60, suite="stage-b-test-small-1-gpu") +register_amd_ci(est_time=60, suite="stage-b-test-small-1-gpu-amd") import random import unittest diff --git a/test/registered/sampling/test_penalty.py b/test/registered/sampling/test_penalty.py index 921d34cc1e5b..d73b5d62a4e7 100644 --- a/test/registered/sampling/test_penalty.py +++ b/test/registered/sampling/test_penalty.py @@ -10,7 +10,7 @@ from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci register_cuda_ci(est_time=82, suite="stage-b-test-small-1-gpu") -register_amd_ci(est_time=82, suite="stage-b-test-small-1-gpu") +register_amd_ci(est_time=82, suite="stage-b-test-small-1-gpu-amd") from sglang.test.test_utils import ( DEFAULT_SMALL_MODEL_NAME_FOR_TEST, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, diff --git a/test/registered/sampling/test_pytorch_sampling_backend.py b/test/registered/sampling/test_pytorch_sampling_backend.py index 058708b7656b..51bf254a6971 100644 --- a/test/registered/sampling/test_pytorch_sampling_backend.py +++ b/test/registered/sampling/test_pytorch_sampling_backend.py @@ -7,7 +7,7 @@ from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci register_cuda_ci(est_time=66, suite="stage-b-test-small-1-gpu") -register_amd_ci(est_time=66, suite="stage-b-test-small-1-gpu") +register_amd_ci(est_time=66, suite="stage-b-test-small-1-gpu-amd") from sglang.test.run_eval import run_eval from sglang.test.test_utils import ( DEFAULT_MODEL_NAME_FOR_TEST, diff --git a/test/registered/tokenizer/test_multi_tokenizer.py b/test/registered/tokenizer/test_multi_tokenizer.py index a293727627f3..e214dd1b6a8e 100644 --- a/test/registered/tokenizer/test_multi_tokenizer.py +++ b/test/registered/tokenizer/test_multi_tokenizer.py @@ -18,7 +18,7 @@ ) register_cuda_ci(est_time=230, suite="stage-b-test-small-1-gpu") -register_amd_ci(est_time=345, suite="stage-b-test-small-1-gpu") +register_amd_ci(est_time=345, suite="stage-b-test-small-1-gpu-amd") class TestMultiTokenizer(CustomTestCase): diff --git a/test/registered/tokenizer/test_skip_tokenizer_init.py b/test/registered/tokenizer/test_skip_tokenizer_init.py index 7303fdb0422a..7d95c19cf48f 100644 --- a/test/registered/tokenizer/test_skip_tokenizer_init.py +++ b/test/registered/tokenizer/test_skip_tokenizer_init.py @@ -24,7 +24,7 @@ ) register_cuda_ci(est_time=77, suite="stage-b-test-small-1-gpu") -register_amd_ci(est_time=117, suite="stage-b-test-small-1-gpu") +register_amd_ci(est_time=117, suite="stage-b-test-small-1-gpu-amd") class TestSkipTokenizerInit(CustomTestCase): diff --git a/test/run_suite.py b/test/run_suite.py index 7d465f326130..e2944781cb84 100644 --- a/test/run_suite.py +++ b/test/run_suite.py @@ -20,7 +20,8 @@ HWBackend.CPU: ["default", "stage-a-cpu-only"], HWBackend.AMD: [ "stage-a-test-1", - "stage-b-test-small-1-gpu", + "stage-b-test-small-1-gpu-amd", + "stage-b-test-small-1-gpu-amd-mi35x", "stage-b-test-large-2-gpu-amd", ], HWBackend.CUDA: [