diff --git a/python/sglang/test/ci/ci_register.py b/python/sglang/test/ci/ci_register.py index 014c408dd5e7..7c62349407d0 100644 --- a/python/sglang/test/ci/ci_register.py +++ b/python/sglang/test/ci/ci_register.py @@ -16,7 +16,12 @@ "ut_parse_one_file", ] +# `suite` stays in positional slot 2 for backward compat with existing +# `register_cpu_ci(5, "stage-a-test-cpu")` style positional calls. New fields +# (`stage`, `runner_config`) are kwarg-only. _PARAM_ORDER = ("est_time", "suite", "nightly", "disabled") +_KWARG_ONLY = ("stage", "runner_config") +_ALL_PARAMS = _PARAM_ORDER + _KWARG_ONLY _UNSET = object() @@ -31,25 +36,43 @@ class HWBackend(Enum): class CIRegistry: backend: HWBackend filename: str - # Estimated time to run the test in seconds. est_time: float - # The suite this test is registered in. - suite: str - # Whether the test is a nightly test. + stage: Optional[str] = None + runner_config: Optional[str] = None + # Legacy single-string suite; kept for nightly/stress/weekly + AMD/CPU/NPU + # suites whose names don't follow `{stage}-test-{runner_config}` shape. + suite: Optional[str] = None nightly: bool = False - # Reason for disabling the test. None = enabled, string = disabled with reason. disabled: Optional[str] = None + @property + def effective_suite(self) -> Optional[str]: + if self.stage is not None and self.runner_config is not None: + return f"{self.stage}-test-{self.runner_config}" + return self.suite + def register_cpu_ci( - est_time: float, suite: str, nightly: bool = False, disabled: Optional[str] = None + est_time: float, + suite: Optional[str] = None, + nightly: bool = False, + disabled: Optional[str] = None, + *, + stage: Optional[str] = None, + runner_config: Optional[str] = None, ): """Marker for CPU CI registration (parsed via AST; runtime no-op).""" return None def register_cuda_ci( - est_time: float, suite: str, nightly: bool = False, disabled: Optional[str] = None + est_time: float, + suite: Optional[str] = None, + nightly: bool = False, + disabled: Optional[str] = None, + *, + stage: Optional[str] = None, + runner_config: Optional[str] = None, ): """Marker for CUDA CI registration (parsed via AST; runtime no-op).""" return None @@ -57,9 +80,12 @@ def register_cuda_ci( def register_amd_ci( est_time: float, - suite: str, + suite: Optional[str] = None, nightly: bool = False, disabled: Optional[str] = None, + *, + stage: Optional[str] = None, + runner_config: Optional[str] = None, ): """Marker for AMD CI registration (parsed via AST; runtime no-op).""" return None @@ -67,9 +93,12 @@ def register_amd_ci( def register_npu_ci( est_time: float, - suite: str, + suite: Optional[str] = None, nightly: bool = False, disabled: Optional[str] = None, + *, + stage: Optional[str] = None, + runner_config: Optional[str] = None, ): """Marker for NPU CI registration (parsed via AST; runtime no-op).""" return None @@ -94,10 +123,8 @@ def _constant_value(self, node: ast.AST) -> object: return node.value return _UNSET - def _parse_call_args( - self, func_call: ast.Call - ) -> tuple[float, str, bool, Optional[str]]: - args = {name: _UNSET for name in _PARAM_ORDER} + def _parse_call_args(self, func_call: ast.Call) -> dict: + args = {name: _UNSET for name in _ALL_PARAMS} seen = set() if any(isinstance(arg, ast.Starred) for arg in func_call.args): @@ -129,23 +156,49 @@ def _parse_call_args( seen.add(kw.arg) args[kw.arg] = self._constant_value(kw.value) - if args["est_time"] is _UNSET or args["suite"] is _UNSET: + if args["est_time"] is _UNSET: raise ValueError( - f"{self.filename}: est_time and suite are required constants in {func_call.func.id}()" + f"{self.filename}: est_time is a required constant in {func_call.func.id}()" ) - est_time, suite = args["est_time"], args["suite"] - nightly_value = args["nightly"] + # The only valid (stage, runner_config, suite) shapes are: + # (set, set, unset) -> new-style pair + # (unset, unset, set) -> legacy single-string + # Any other combination is rejected with the actual triple in the error. + stage_set = args["stage"] is not _UNSET + runner_set = args["runner_config"] is not _UNSET + suite_set = args["suite"] is not _UNSET + valid_shape = (stage_set and runner_set and not suite_set) or ( + not stage_set and not runner_set and suite_set + ) + if not valid_shape: + raise ValueError( + f"{self.filename}: {func_call.func.id}() must specify exactly one of " + f"(stage, runner_config) pair or suite; got stage={stage_set}, " + f"runner_config={runner_set}, suite={suite_set}" + ) + est_time = args["est_time"] if not isinstance(est_time, (int, float)): raise ValueError( f"{self.filename}: est_time must be a number in {func_call.func.id}()" ) - if not isinstance(suite, str): + + suite = args["suite"] if suite_set else None + if suite is not None and not isinstance(suite, str): raise ValueError( f"{self.filename}: suite must be a string in {func_call.func.id}()" ) + stage = args["stage"] if stage_set else None + runner_config = args["runner_config"] if runner_set else None + for name, value in (("stage", stage), ("runner_config", runner_config)): + if value is not None and not isinstance(value, str): + raise ValueError( + f"{self.filename}: {name} must be a string in {func_call.func.id}()" + ) + + nightly_value = args["nightly"] if nightly_value is _UNSET: nightly = False elif isinstance(nightly_value, bool): @@ -161,7 +214,14 @@ def _parse_call_args( f"{self.filename}: disabled must be a string in {func_call.func.id}()" ) - return float(est_time), suite, nightly, disabled + return { + "est_time": float(est_time), + "stage": stage, + "runner_config": runner_config, + "suite": suite, + "nightly": nightly, + "disabled": disabled, + } def _collect_ci_registry(self, func_call: ast.Call): if not isinstance(func_call.func, ast.Name): @@ -171,14 +231,11 @@ def _collect_ci_registry(self, func_call: ast.Call): if backend is None: return None - est_time, suite, nightly, disabled = self._parse_call_args(func_call) + parsed = self._parse_call_args(func_call) return CIRegistry( backend=backend, filename=self.filename, - est_time=est_time, - suite=suite, - nightly=nightly, - disabled=disabled, + **parsed, ) @staticmethod diff --git a/scripts/ci/utils/ci_coverage_report.py b/scripts/ci/utils/ci_coverage_report.py index 717b801fa07e..80a5f9239615 100755 --- a/scripts/ci/utils/ci_coverage_report.py +++ b/scripts/ci/utils/ci_coverage_report.py @@ -155,7 +155,9 @@ def generate_summary_section(data: dict) -> str: for t in sorted(disabled_tests, key=lambda x: (x.backend.name, x.filename)): test_name = get_test_basename(t.filename) reason = t.disabled[:50] + "..." if len(t.disabled) > 50 else t.disabled - lines.append(f"| `{test_name}` | {t.backend.name} | {t.suite} | {reason} |") + lines.append( + f"| `{test_name}` | {t.backend.name} | {t.effective_suite} | {reason} |" + ) lines.append("\n\n") return "\n".join(lines) @@ -197,7 +199,7 @@ def generate_by_folder_section(data: dict) -> str: else ("Nightly" if t.nightly else "Per-Commit") ) lines.append( - f"| `{test_name}` | {t.suite} | {t.est_time:.0f}s | {status} |" + f"| `{test_name}` | {t.effective_suite} | {t.est_time:.0f}s | {status} |" ) lines.append("") @@ -231,7 +233,7 @@ def generate_by_suite_section(data: dict) -> str: # Group by suite within backend backend_suites = defaultdict(list) for t in backend_tests: - backend_suites[t.suite].append(t) + backend_suites[t.effective_suite].append(t) for suite in sorted(backend_suites.keys()): suite_tests = backend_suites[suite] @@ -336,7 +338,7 @@ def generate_json_report(tests: list[CIRegistry]) -> str: data["tests_by_folder"][folder]["backends"][backend] = [ { "filename": get_test_basename(t.filename), - "suite": t.suite, + "suite": t.effective_suite, "est_time": t.est_time, "status": ( "disabled" @@ -355,7 +357,7 @@ def generate_json_report(tests: list[CIRegistry]) -> str: backend_suites = defaultdict(list) for t in backend_tests: - backend_suites[t.suite].append(t) + backend_suites[t.effective_suite].append(t) data["tests_by_suite"][backend] = { "total": len(backend_tests), @@ -423,7 +425,7 @@ def generate_json_report(tests: list[CIRegistry]) -> str: { "filename": get_test_basename(t.filename), "backend": t.backend.name, - "suite": t.suite, + "suite": t.effective_suite, "reason": t.disabled, } ) diff --git a/scripts/ci/utils/compute_partitions.py b/scripts/ci/utils/compute_partitions.py index c86a6d9c6a32..88754c464c5e 100644 --- a/scripts/ci/utils/compute_partitions.py +++ b/scripts/ci/utils/compute_partitions.py @@ -88,7 +88,7 @@ def compute_partitions(tests, full_parallel=False): continue if t.nightly or t.disabled is not None: continue - suite_tests[t.suite].append(t) + suite_tests[t.effective_suite].append(t) result = {} for suite, group in suite_tests.items(): diff --git a/test/registered/4-gpu-models/test_deepseek_v3_cutedsl_4gpu.py b/test/registered/4-gpu-models/test_deepseek_v3_cutedsl_4gpu.py index 6f89bdfaacc3..1effb77f32ed 100644 --- a/test/registered/4-gpu-models/test_deepseek_v3_cutedsl_4gpu.py +++ b/test/registered/4-gpu-models/test_deepseek_v3_cutedsl_4gpu.py @@ -14,7 +14,7 @@ try_cached_model, ) -register_cuda_ci(est_time=1800, suite="stage-c-test-4-gpu-gb200") +register_cuda_ci(est_time=1800, stage="stage-c", runner_config="4-gpu-gb200") class TestDeepseekR1Nvfp4CuteDSLDeepEP(CustomTestCase): diff --git a/test/registered/4-gpu-models/test_gpt_oss_4gpu.py b/test/registered/4-gpu-models/test_gpt_oss_4gpu.py index e2e9ac475238..2d13560e6e6c 100644 --- a/test/registered/4-gpu-models/test_gpt_oss_4gpu.py +++ b/test/registered/4-gpu-models/test_gpt_oss_4gpu.py @@ -3,8 +3,8 @@ from sglang.test.ci.ci_register import register_cuda_ci from sglang.test.gpt_oss_common import BaseTestGptOss -register_cuda_ci(est_time=392, suite="stage-c-test-4-gpu-h100") -register_cuda_ci(est_time=740, suite="stage-c-test-4-gpu-b200") +register_cuda_ci(est_time=392, stage="stage-c", runner_config="4-gpu-h100") +register_cuda_ci(est_time=740, stage="stage-c", runner_config="4-gpu-b200") class TestGptOss4Gpu(BaseTestGptOss): diff --git a/test/registered/4-gpu-models/test_nvidia_nemotron_3_super_nvfp4.py b/test/registered/4-gpu-models/test_nvidia_nemotron_3_super_nvfp4.py index 38c1395bbcc5..96361446b615 100644 --- a/test/registered/4-gpu-models/test_nvidia_nemotron_3_super_nvfp4.py +++ b/test/registered/4-gpu-models/test_nvidia_nemotron_3_super_nvfp4.py @@ -11,7 +11,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=710, suite="stage-c-test-4-gpu-b200") +register_cuda_ci(est_time=710, stage="stage-c", runner_config="4-gpu-b200") NEMOTRON_3_SUPER_NVFP4_MODEL = "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-NVFP4" diff --git a/test/registered/4-gpu-models/test_qwen35_fp4_mtp_v2.py b/test/registered/4-gpu-models/test_qwen35_fp4_mtp_v2.py index 669ed41f45b5..d9ee7586a12a 100644 --- a/test/registered/4-gpu-models/test_qwen35_fp4_mtp_v2.py +++ b/test/registered/4-gpu-models/test_qwen35_fp4_mtp_v2.py @@ -15,7 +15,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=540, suite="stage-c-test-4-gpu-b200") +register_cuda_ci(est_time=540, stage="stage-c", runner_config="4-gpu-b200") QWEN35_FP4_MODEL = "nvidia/Qwen3.5-397B-A17B-NVFP4" ACC_THRESHOLDS = {QWEN35_FP4_MODEL: {"gsm8k": 0.95}} diff --git a/test/registered/4-gpu-models/test_qwen35_hicache.py b/test/registered/4-gpu-models/test_qwen35_hicache.py index 5dc05062e1e5..696adfb7b7e9 100644 --- a/test/registered/4-gpu-models/test_qwen35_hicache.py +++ b/test/registered/4-gpu-models/test_qwen35_hicache.py @@ -22,7 +22,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=540, suite="stage-c-test-4-gpu-h100") +register_cuda_ci(est_time=540, stage="stage-c", runner_config="4-gpu-h100") QWEN35_27B_MODEL = "Qwen/Qwen3.5-27B" ACC_THRESHOLDS = {QWEN35_27B_MODEL: {"gsm8k": 0.8}} diff --git a/test/registered/4-gpu-models/test_qwen35_models.py b/test/registered/4-gpu-models/test_qwen35_models.py index 7fc8a42adc7f..695d25c30188 100644 --- a/test/registered/4-gpu-models/test_qwen35_models.py +++ b/test/registered/4-gpu-models/test_qwen35_models.py @@ -17,7 +17,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=260, suite="stage-c-test-4-gpu-b200") +register_cuda_ci(est_time=260, stage="stage-c", runner_config="4-gpu-b200") QWEN35_FP4_MODEL = "nvidia/Qwen3.5-397B-A17B-NVFP4" ACC_THRESHOLDS = {QWEN35_FP4_MODEL: {"gsm8k": 0.95}} diff --git a/test/registered/4-gpu-models/test_qwen3_30b.py b/test/registered/4-gpu-models/test_qwen3_30b.py index d2461da927aa..238564599b0a 100644 --- a/test/registered/4-gpu-models/test_qwen3_30b.py +++ b/test/registered/4-gpu-models/test_qwen3_30b.py @@ -11,7 +11,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=261, suite="stage-c-test-4-gpu-h100") +register_cuda_ci(est_time=261, stage="stage-c", runner_config="4-gpu-h100") QWEN3_30B_MODEL_PATH = "Qwen/Qwen3-30B-A3B-FP8" diff --git a/test/registered/4-gpu-models/test_qwen3_next_models_mtp.py b/test/registered/4-gpu-models/test_qwen3_next_models_mtp.py index 23d1a8f26b6f..1955dedc6344 100644 --- a/test/registered/4-gpu-models/test_qwen3_next_models_mtp.py +++ b/test/registered/4-gpu-models/test_qwen3_next_models_mtp.py @@ -6,7 +6,7 @@ from sglang.test.kits.prefix_cache_branching_kit import PrefixCacheBranchingMixin from sglang.test.server_fixtures.default_fixture import DefaultServerBase -register_cuda_ci(est_time=290, suite="stage-c-test-4-gpu-h100") +register_cuda_ci(est_time=290, stage="stage-c", runner_config="4-gpu-h100") QWEN3_NEXT_MODEL = "Qwen/Qwen3-Next-80B-A3B-Instruct" diff --git a/test/registered/8-gpu-models/test_deepseek_v32_indexcache.py b/test/registered/8-gpu-models/test_deepseek_v32_indexcache.py index e9b3d863314f..6e861c74d8d5 100644 --- a/test/registered/8-gpu-models/test_deepseek_v32_indexcache.py +++ b/test/registered/8-gpu-models/test_deepseek_v32_indexcache.py @@ -13,7 +13,7 @@ write_github_step_summary, ) -register_cuda_ci(est_time=492, suite="stage-c-test-8-gpu-h200") +register_cuda_ci(est_time=492, stage="stage-c", runner_config="8-gpu-h200") DEEPSEEK_V32_MODEL_PATH = "deepseek-ai/DeepSeek-V3.2" diff --git a/test/registered/8-gpu-models/test_deepseek_v3_mtp.py b/test/registered/8-gpu-models/test_deepseek_v3_mtp.py index 8089d1167cb0..dc80d67822bd 100644 --- a/test/registered/8-gpu-models/test_deepseek_v3_mtp.py +++ b/test/registered/8-gpu-models/test_deepseek_v3_mtp.py @@ -17,7 +17,7 @@ write_github_step_summary, ) -register_cuda_ci(est_time=309, suite="stage-c-test-8-gpu-h200") +register_cuda_ci(est_time=309, stage="stage-c", runner_config="8-gpu-h200") FULL_DEEPSEEK_V3_MODEL_PATH = "deepseek-ai/DeepSeek-V3-0324" diff --git a/test/registered/8-gpu-models/test_dsa_models_hisparse.py b/test/registered/8-gpu-models/test_dsa_models_hisparse.py index 4b2bf068598c..73287134fa03 100644 --- a/test/registered/8-gpu-models/test_dsa_models_hisparse.py +++ b/test/registered/8-gpu-models/test_dsa_models_hisparse.py @@ -12,7 +12,9 @@ write_github_step_summary, ) -register_cuda_ci(est_time=720, suite="stage-c-test-8-gpu-h200", nightly=True) +register_cuda_ci( + est_time=720, stage="stage-c", runner_config="8-gpu-h200", nightly=True +) GLM5_MODEL_PATH = "zai-org/GLM-5-FP8" diff --git a/test/registered/8-gpu-models/test_dsa_models_mtp.py b/test/registered/8-gpu-models/test_dsa_models_mtp.py index bf7dcae03743..acc24d60c5fb 100644 --- a/test/registered/8-gpu-models/test_dsa_models_mtp.py +++ b/test/registered/8-gpu-models/test_dsa_models_mtp.py @@ -18,7 +18,8 @@ register_cuda_ci( est_time=1048, - suite="stage-c-test-8-gpu-h200", + stage="stage-c", + runner_config="8-gpu-h200", ) FULL_DEEPSEEK_V32_MODEL_PATH = "deepseek-ai/DeepSeek-V3.2" diff --git a/test/registered/8-gpu-models/test_mimo_models.py b/test/registered/8-gpu-models/test_mimo_models.py index cc266bbe20e3..99f229b99090 100644 --- a/test/registered/8-gpu-models/test_mimo_models.py +++ b/test/registered/8-gpu-models/test_mimo_models.py @@ -6,7 +6,7 @@ from sglang.test.server_fixtures.default_fixture import DefaultServerBase from sglang.test.server_fixtures.mmmu_fixture import MMMUServerBase -register_cuda_ci(est_time=610, suite="stage-c-test-8-gpu-h200") +register_cuda_ci(est_time=610, stage="stage-c", runner_config="8-gpu-h200") class TestMiMoV2Flash(GSM8KMixin, SpecDecodingMixin, DefaultServerBase): diff --git a/test/registered/8-gpu-models/test_minimax_m25_basic.py b/test/registered/8-gpu-models/test_minimax_m25_basic.py index 894dfe89baa0..9b2af51bc390 100644 --- a/test/registered/8-gpu-models/test_minimax_m25_basic.py +++ b/test/registered/8-gpu-models/test_minimax_m25_basic.py @@ -14,7 +14,7 @@ write_github_step_summary, ) -register_cuda_ci(est_time=307, suite="stage-c-test-8-gpu-h200") +register_cuda_ci(est_time=307, stage="stage-c", runner_config="8-gpu-h200") MINIMAX_M25_MODEL_PATH = "MiniMaxAI/MiniMax-M2.5" diff --git a/test/registered/8-gpu-models/test_nvidia_nemotron_3_super_bf16.py b/test/registered/8-gpu-models/test_nvidia_nemotron_3_super_bf16.py index 0e35999b38d7..79fa002f73fc 100644 --- a/test/registered/8-gpu-models/test_nvidia_nemotron_3_super_bf16.py +++ b/test/registered/8-gpu-models/test_nvidia_nemotron_3_super_bf16.py @@ -11,7 +11,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=376, suite="stage-c-test-8-gpu-h200") +register_cuda_ci(est_time=376, stage="stage-c", runner_config="8-gpu-h200") NEMOTRON_3_SUPER_BF16_MODEL = "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16" diff --git a/test/registered/8-gpu-models/test_return_indexer_topk.py b/test/registered/8-gpu-models/test_return_indexer_topk.py index c686c7c752fb..a854dcb89d63 100644 --- a/test/registered/8-gpu-models/test_return_indexer_topk.py +++ b/test/registered/8-gpu-models/test_return_indexer_topk.py @@ -17,7 +17,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=600, suite="stage-c-test-8-gpu-h200") +register_cuda_ci(est_time=600, stage="stage-c", runner_config="8-gpu-h200") DEEPSEEK_V32_MODEL_PATH = "deepseek-ai/DeepSeek-V3.2" diff --git a/test/registered/8-gpu-models/test_step3p5_flash_chain_mtp.py b/test/registered/8-gpu-models/test_step3p5_flash_chain_mtp.py index 84a1f6b0fa8f..c5a825882fe3 100644 --- a/test/registered/8-gpu-models/test_step3p5_flash_chain_mtp.py +++ b/test/registered/8-gpu-models/test_step3p5_flash_chain_mtp.py @@ -17,7 +17,7 @@ write_github_step_summary, ) -register_cuda_ci(est_time=663, suite="stage-c-test-8-gpu-h200") +register_cuda_ci(est_time=663, stage="stage-c", runner_config="8-gpu-h200") STEP3P5_FLASH_MODEL_PATH = "stepfun-ai/Step-3.5-Flash" diff --git a/test/registered/attention/test_chunk_gated_delta_rule.py b/test/registered/attention/test_chunk_gated_delta_rule.py index e9f9c7ad51b1..f28a720e8d78 100644 --- a/test/registered/attention/test_chunk_gated_delta_rule.py +++ b/test/registered/attention/test_chunk_gated_delta_rule.py @@ -8,7 +8,7 @@ ) from sglang.test.ci.ci_register import register_cuda_ci -register_cuda_ci(est_time=11, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=11, stage="stage-b", runner_config="1-gpu-large") @unittest.skipIf(not torch.cuda.is_available(), "Test requires CUDA") diff --git a/test/registered/attention/test_create_kvindices.py b/test/registered/attention/test_create_kvindices.py index c1bd28cac2d2..c52581c41873 100644 --- a/test/registered/attention/test_create_kvindices.py +++ b/test/registered/attention/test_create_kvindices.py @@ -9,7 +9,7 @@ from sglang.test.test_utils import CustomTestCase # Triton kernel unit test for KV indices creation -register_cuda_ci(est_time=7, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=7, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=10, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/attention/test_flash_attention_4.py b/test/registered/attention/test_flash_attention_4.py index 22909630718c..0d2e4aeb324f 100644 --- a/test/registered/attention/test_flash_attention_4.py +++ b/test/registered/attention/test_flash_attention_4.py @@ -13,7 +13,7 @@ ) # FlashAttention4 integration test (requires SM 100+ / Blackwell B200) -register_cuda_ci(est_time=265, suite="stage-b-test-4-gpu-b200") +register_cuda_ci(est_time=265, stage="stage-b", runner_config="4-gpu-b200") @unittest.skipIf(get_device_sm() < 100, "Test requires CUDA SM 100 or higher") diff --git a/test/registered/attention/test_gdn_noncontiguous_stride.py b/test/registered/attention/test_gdn_noncontiguous_stride.py index af807978bef8..2ba172952d6f 100644 --- a/test/registered/attention/test_gdn_noncontiguous_stride.py +++ b/test/registered/attention/test_gdn_noncontiguous_stride.py @@ -14,7 +14,7 @@ ) from sglang.test.ci.ci_register import register_cuda_ci -register_cuda_ci(est_time=7, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=7, stage="stage-b", runner_config="1-gpu-large") def _make_noncontiguous_ab(batch, num_heads, dtype=torch.bfloat16, device="cuda"): diff --git a/test/registered/attention/test_hybrid_attn_backend.py b/test/registered/attention/test_hybrid_attn_backend.py index d999438429dc..3c6ec10ca2a6 100644 --- a/test/registered/attention/test_hybrid_attn_backend.py +++ b/test/registered/attention/test_hybrid_attn_backend.py @@ -20,7 +20,7 @@ # Hybrid attention backend tests (FA3 prefill + FlashInfer decode, requires SM 90+ / H100) # Multiple test classes: base, MLA, TorchCompile, SpecDecode variants -register_cuda_ci(est_time=407, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=407, stage="stage-b", runner_config="1-gpu-large") GSM_DATASET_PATH = None diff --git a/test/registered/attention/test_kda_kernels.py b/test/registered/attention/test_kda_kernels.py index 0c11e236e221..b44ef7f017db 100644 --- a/test/registered/attention/test_kda_kernels.py +++ b/test/registered/attention/test_kda_kernels.py @@ -14,7 +14,7 @@ from sglang.srt.utils.common import get_device from sglang.test.ci.ci_register import register_cuda_ci -register_cuda_ci(est_time=12, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=12, stage="stage-b", runner_config="1-gpu-large") @unittest.skipIf( diff --git a/test/registered/attention/test_normal_decode_set_metadata.py b/test/registered/attention/test_normal_decode_set_metadata.py index b4c6e5c120ae..4deb2a4185f2 100644 --- a/test/registered/attention/test_normal_decode_set_metadata.py +++ b/test/registered/attention/test_normal_decode_set_metadata.py @@ -21,7 +21,7 @@ from sglang.test.test_utils import CustomTestCase # Register this test for CUDA CI in stage-b (fast attention/kernel tests) -register_cuda_ci(est_time=11, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=11, stage="stage-b", runner_config="1-gpu-large") def reference_normal_decode_set_metadata( diff --git a/test/registered/attention/test_torch_native_attention_backend.py b/test/registered/attention/test_torch_native_attention_backend.py index eba008c64c93..67f76ef4707f 100644 --- a/test/registered/attention/test_torch_native_attention_backend.py +++ b/test/registered/attention/test_torch_native_attention_backend.py @@ -18,7 +18,7 @@ ) # Torch native attention backend integration test with MMLU eval -register_cuda_ci(est_time=140, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=140, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=150, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/attention/test_triton_attention_backend.py b/test/registered/attention/test_triton_attention_backend.py index e8558d13c5c4..b2aac8dda42b 100644 --- a/test/registered/attention/test_triton_attention_backend.py +++ b/test/registered/attention/test_triton_attention_backend.py @@ -20,7 +20,7 @@ ) # Triton attention backend integration test with latency benchmark and MMLU eval -register_cuda_ci(est_time=177, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=177, stage="stage-b", runner_config="1-gpu-large") register_amd_ci(est_time=1400, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/attention/test_triton_attention_kernels.py b/test/registered/attention/test_triton_attention_kernels.py index 9dde8c16d034..8002df396f8c 100644 --- a/test/registered/attention/test_triton_attention_kernels.py +++ b/test/registered/attention/test_triton_attention_kernels.py @@ -23,7 +23,7 @@ from sglang.test.test_utils import CustomTestCase, is_in_amd_ci # Triton attention kernel unit tests (decode, extend, prefill) -register_cuda_ci(est_time=19, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=19, stage="stage-b", runner_config="1-gpu-large") register_amd_ci(est_time=30, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/attention/test_triton_sliding_window.py b/test/registered/attention/test_triton_sliding_window.py index 42aa770e30eb..0f03f6f11060 100644 --- a/test/registered/attention/test_triton_sliding_window.py +++ b/test/registered/attention/test_triton_sliding_window.py @@ -16,7 +16,7 @@ ) # Sliding window attention with Triton backend (Gemma-3 model) -register_cuda_ci(est_time=93, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=93, stage="stage-b", runner_config="1-gpu-large") register_amd_ci(est_time=200, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/backends/test_torch_compile.py b/test/registered/backends/test_torch_compile.py index d884631f936c..68a9eb36fbe6 100644 --- a/test/registered/backends/test_torch_compile.py +++ b/test/registered/backends/test_torch_compile.py @@ -15,7 +15,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=126, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=126, stage="stage-b", runner_config="1-gpu-large") register_amd_ci(est_time=1100, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/breakable_cuda_graph/test_breakable_cuda_graph.py b/test/registered/breakable_cuda_graph/test_breakable_cuda_graph.py index e69674dd7924..aeb391769dad 100644 --- a/test/registered/breakable_cuda_graph/test_breakable_cuda_graph.py +++ b/test/registered/breakable_cuda_graph/test_breakable_cuda_graph.py @@ -23,7 +23,7 @@ ) # CI Registration — large suite to fit the integration test's server startup. -register_cuda_ci(est_time=79, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=79, stage="stage-b", runner_config="1-gpu-large") def _skip_if_no_cuda(test_func): diff --git a/test/registered/constrained_decoding/test_constrained_decoding.py b/test/registered/constrained_decoding/test_constrained_decoding.py index 9a71f75fd4cf..54c9e01a1205 100644 --- a/test/registered/constrained_decoding/test_constrained_decoding.py +++ b/test/registered/constrained_decoding/test_constrained_decoding.py @@ -13,7 +13,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=120, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=120, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=179, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/core/test_deterministic.py b/test/registered/core/test_deterministic.py index f28db8d0b5f2..97d9c5c8da07 100644 --- a/test/registered/core/test_deterministic.py +++ b/test/registered/core/test_deterministic.py @@ -16,7 +16,7 @@ ) from sglang.test.test_utils import is_in_amd_ci -register_cuda_ci(est_time=207, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=207, stage="stage-b", runner_config="1-gpu-large") register_amd_ci(est_time=278, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/core/test_engine_child_pids.py b/test/registered/core/test_engine_child_pids.py index eaa103e4057a..e9bbf0a9233e 100644 --- a/test/registered/core/test_engine_child_pids.py +++ b/test/registered/core/test_engine_child_pids.py @@ -20,7 +20,7 @@ CustomTestCase, ) -register_cuda_ci(est_time=77, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=77, stage="stage-b", runner_config="1-gpu-small") class TestEngineChildPids(CustomTestCase): diff --git a/test/registered/core/test_gemma4_moe_deterministic.py b/test/registered/core/test_gemma4_moe_deterministic.py index 7869b4cf31d2..109af8243328 100644 --- a/test/registered/core/test_gemma4_moe_deterministic.py +++ b/test/registered/core/test_gemma4_moe_deterministic.py @@ -23,7 +23,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=107, suite="stage-b-test-2-gpu-large") +register_cuda_ci(est_time=107, stage="stage-b", runner_config="2-gpu-large") PROMPT = ( diff --git a/test/registered/core/test_gpt_oss_sm120.py b/test/registered/core/test_gpt_oss_sm120.py index d453d3dab01e..16815bde6ea4 100644 --- a/test/registered/core/test_gpt_oss_sm120.py +++ b/test/registered/core/test_gpt_oss_sm120.py @@ -5,7 +5,7 @@ from sglang.test.ci.ci_register import register_cuda_ci from sglang.test.gpt_oss_common import BaseTestGptOss -register_cuda_ci(est_time=345, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=345, stage="stage-b", runner_config="1-gpu-small") @unittest.skipIf(not torch.cuda.is_available(), "CUDA is not available") diff --git a/test/registered/core/test_hidden_states.py b/test/registered/core/test_hidden_states.py index 2f83fea90071..454393d116b4 100644 --- a/test/registered/core/test_hidden_states.py +++ b/test/registered/core/test_hidden_states.py @@ -8,7 +8,7 @@ from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci from sglang.test.test_utils import DEFAULT_SMALL_MODEL_NAME_FOR_TEST, CustomTestCase -register_cuda_ci(est_time=45, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=45, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=55, suite="stage-b-test-1-gpu-small-amd") _is_hip = is_hip() diff --git a/test/registered/core/test_mm_process_config.py b/test/registered/core/test_mm_process_config.py index aef4fd3c5f78..6fbdb295df5f 100644 --- a/test/registered/core/test_mm_process_config.py +++ b/test/registered/core/test_mm_process_config.py @@ -4,7 +4,7 @@ from sglang.srt.server_args import ServerArgs from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci -register_cuda_ci(est_time=9, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=9, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=1, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/core/test_request_queue_validation.py b/test/registered/core/test_request_queue_validation.py index 82147c41f38b..9fb096c7f930 100644 --- a/test/registered/core/test_request_queue_validation.py +++ b/test/registered/core/test_request_queue_validation.py @@ -17,7 +17,7 @@ send_generate_requests, ) -register_cuda_ci(est_time=53, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=53, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=70, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/core/test_srt_endpoint.py b/test/registered/core/test_srt_endpoint.py index 061bded0eb83..1108a005b0ee 100644 --- a/test/registered/core/test_srt_endpoint.py +++ b/test/registered/core/test_srt_endpoint.py @@ -28,7 +28,7 @@ run_logprob_check, ) -register_cuda_ci(est_time=134, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=134, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=130, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/core/test_srt_engine.py b/test/registered/core/test_srt_engine.py index 2d769631a4c0..86d1ecc10204 100644 --- a/test/registered/core/test_srt_engine.py +++ b/test/registered/core/test_srt_engine.py @@ -22,7 +22,7 @@ CustomTestCase, ) -register_cuda_ci(est_time=387, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=387, stage="stage-b", runner_config="1-gpu-large") register_amd_ci(est_time=261, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/cp/test_deepseek_v32_cp_single_node.py b/test/registered/cp/test_deepseek_v32_cp_single_node.py index 8bb1c632874e..85c48c8e44e4 100644 --- a/test/registered/cp/test_deepseek_v32_cp_single_node.py +++ b/test/registered/cp/test_deepseek_v32_cp_single_node.py @@ -15,7 +15,8 @@ register_cuda_ci( est_time=616, - suite="stage-c-test-deepep-8-gpu-h200", + stage="stage-c", + runner_config="deepep-8-gpu-h200", ) DEEPSEEK_V32_MODEL_PATH = "deepseek-ai/DeepSeek-V3.2" diff --git a/test/registered/debug_utils/test_cuda_coredump.py b/test/registered/debug_utils/test_cuda_coredump.py index 2354175fefaf..21e2becef48a 100644 --- a/test/registered/debug_utils/test_cuda_coredump.py +++ b/test/registered/debug_utils/test_cuda_coredump.py @@ -12,7 +12,8 @@ register_cuda_ci( est_time=10, - suite="stage-a-test-1-gpu-small", + stage="stage-a", + runner_config="1-gpu-small", disabled="Manual only: triggers intentional CUDA crash for coredump verification", ) diff --git a/test/registered/debug_utils/test_tensor_dump_forward_hook.py b/test/registered/debug_utils/test_tensor_dump_forward_hook.py index a4176eecdba8..df8f7527f964 100644 --- a/test/registered/debug_utils/test_tensor_dump_forward_hook.py +++ b/test/registered/debug_utils/test_tensor_dump_forward_hook.py @@ -19,7 +19,8 @@ register_cuda_ci( est_time=9, - suite="stage-b-test-1-gpu-small", + stage="stage-b", + runner_config="1-gpu-small", disabled="Test uses pytest-style function without TestCase class - see #17145", ) register_amd_ci( diff --git a/test/registered/disaggregation/test_disaggregation_basic.py b/test/registered/disaggregation/test_disaggregation_basic.py index fdd62ce90ad2..f14cc17561fd 100644 --- a/test/registered/disaggregation/test_disaggregation_basic.py +++ b/test/registered/disaggregation/test_disaggregation_basic.py @@ -21,7 +21,7 @@ DEFAULT_TARGET_MODEL_EAGLE3, ) -register_cuda_ci(est_time=509, suite="stage-b-test-2-gpu-large") +register_cuda_ci(est_time=509, stage="stage-b", runner_config="2-gpu-large") class TestDisaggregationAccuracy(PauseResumeInPlaceMixin, PDDisaggregationServerBase): diff --git a/test/registered/disaggregation/test_disaggregation_decode_offload.py b/test/registered/disaggregation/test_disaggregation_decode_offload.py index 3bc2108031a2..6891758975de 100644 --- a/test/registered/disaggregation/test_disaggregation_decode_offload.py +++ b/test/registered/disaggregation/test_disaggregation_decode_offload.py @@ -19,7 +19,8 @@ # Increasing estimated time since we run evaluation twice register_cuda_ci( est_time=600, - suite="stage-b-test-2-gpu-large", + stage="stage-b", + runner_config="2-gpu-large", disabled="Temporarily disable the flaky test.", ) diff --git a/test/registered/disaggregation/test_disaggregation_xpu.py b/test/registered/disaggregation/test_disaggregation_xpu.py index 3b42e17dcd9b..e3a5e6a941c8 100644 --- a/test/registered/disaggregation/test_disaggregation_xpu.py +++ b/test/registered/disaggregation/test_disaggregation_xpu.py @@ -26,7 +26,8 @@ register_cuda_ci( est_time=300, - suite="stage-a-test-1-gpu-small", + stage="stage-a", + runner_config="1-gpu-small", disabled="Intel XPU only — not available in standard CUDA CI", ) diff --git a/test/registered/disaggregation/test_specv2_kvcache_offloading.py b/test/registered/disaggregation/test_specv2_kvcache_offloading.py index 639d5b44fbc2..8d70919d1893 100644 --- a/test/registered/disaggregation/test_specv2_kvcache_offloading.py +++ b/test/registered/disaggregation/test_specv2_kvcache_offloading.py @@ -17,7 +17,7 @@ ) from sglang.test.ci.ci_register import register_cuda_ci -register_cuda_ci(est_time=8, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=8, stage="stage-b", runner_config="1-gpu-small") def _make_mock_req( diff --git a/test/registered/distributed/test_data_parallelism.py b/test/registered/distributed/test_data_parallelism.py index fa7ac217091f..f79b8eae77be 100644 --- a/test/registered/distributed/test_data_parallelism.py +++ b/test/registered/distributed/test_data_parallelism.py @@ -14,7 +14,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=91, suite="stage-b-test-2-gpu-large") +register_cuda_ci(est_time=91, stage="stage-b", runner_config="2-gpu-large") register_amd_ci(est_time=73, suite="stage-b-test-2-gpu-large-amd") diff --git a/test/registered/distributed/test_disaggregation_aarch64.py b/test/registered/distributed/test_disaggregation_aarch64.py index c5a6f3e93c8a..850343569b88 100644 --- a/test/registered/distributed/test_disaggregation_aarch64.py +++ b/test/registered/distributed/test_disaggregation_aarch64.py @@ -13,7 +13,7 @@ popen_launch_pd_server, ) -register_cuda_ci(est_time=300, suite="stage-c-test-4-gpu-gb200") +register_cuda_ci(est_time=300, stage="stage-c", runner_config="4-gpu-gb200") class TestDisaggregationMooncakeAARCH64Accuracy(PDDisaggregationServerBase): diff --git a/test/registered/distributed/test_disaggregation_decode_radix_cache.py b/test/registered/distributed/test_disaggregation_decode_radix_cache.py index f1ecc9080203..57a15bc3df0a 100644 --- a/test/registered/distributed/test_disaggregation_decode_radix_cache.py +++ b/test/registered/distributed/test_disaggregation_decode_radix_cache.py @@ -16,7 +16,7 @@ try_cached_model, ) -register_cuda_ci(est_time=300, suite="stage-c-test-8-gpu-h20") +register_cuda_ci(est_time=300, stage="stage-c", runner_config="8-gpu-h20") def _has_nixl(): diff --git a/test/registered/distributed/test_disaggregation_different_tp.py b/test/registered/distributed/test_disaggregation_different_tp.py index fc6215685853..c9d725b87252 100644 --- a/test/registered/distributed/test_disaggregation_different_tp.py +++ b/test/registered/distributed/test_disaggregation_different_tp.py @@ -16,7 +16,7 @@ try_cached_model, ) -register_cuda_ci(est_time=375, suite="stage-c-test-8-gpu-h20") +register_cuda_ci(est_time=375, stage="stage-c", runner_config="8-gpu-h20") class TestDisaggregationMooncakePrefillLargerTP(PDDisaggregationServerBase): diff --git a/test/registered/distributed/test_disaggregation_dp_attention.py b/test/registered/distributed/test_disaggregation_dp_attention.py index e6348f83be88..b64a5559b3da 100644 --- a/test/registered/distributed/test_disaggregation_dp_attention.py +++ b/test/registered/distributed/test_disaggregation_dp_attention.py @@ -16,7 +16,7 @@ try_cached_model, ) -register_cuda_ci(est_time=443, suite="stage-c-test-8-gpu-h20") +register_cuda_ci(est_time=443, stage="stage-c", runner_config="8-gpu-h20") class TestDisaggregationDPAttention(PDDisaggregationServerBase): diff --git a/test/registered/distributed/test_disaggregation_dsv4.py b/test/registered/distributed/test_disaggregation_dsv4.py index d36844e2d6b8..392996508728 100644 --- a/test/registered/distributed/test_disaggregation_dsv4.py +++ b/test/registered/distributed/test_disaggregation_dsv4.py @@ -12,7 +12,7 @@ try_cached_model, ) -register_cuda_ci(est_time=250, suite="stage-c-test-dsv4-8-gpu-h200") +register_cuda_ci(est_time=250, stage="stage-c", runner_config="dsv4-8-gpu-h200") DSV4_FLASH_MODEL = "sgl-project/DeepSeek-V4-Flash-FP8" diff --git a/test/registered/distributed/test_disaggregation_hybrid_attention.py b/test/registered/distributed/test_disaggregation_hybrid_attention.py index 439201d0e774..984431209ee1 100644 --- a/test/registered/distributed/test_disaggregation_hybrid_attention.py +++ b/test/registered/distributed/test_disaggregation_hybrid_attention.py @@ -12,7 +12,7 @@ popen_launch_pd_server, ) -register_cuda_ci(est_time=695, suite="stage-c-test-8-gpu-h200") +register_cuda_ci(est_time=695, stage="stage-c", runner_config="8-gpu-h200") @unittest.skipIf(is_in_ci(), "Temporarily disable the flaky test.") diff --git a/test/registered/distributed/test_disaggregation_pp.py b/test/registered/distributed/test_disaggregation_pp.py index 072b034addd0..e04c7b271d30 100644 --- a/test/registered/distributed/test_disaggregation_pp.py +++ b/test/registered/distributed/test_disaggregation_pp.py @@ -14,7 +14,7 @@ try_cached_model, ) -register_cuda_ci(est_time=216, suite="stage-c-test-8-gpu-h20") +register_cuda_ci(est_time=216, stage="stage-c", runner_config="8-gpu-h20") class TestDisaggregationPrefillPPAccuracy(PDDisaggregationServerBase): diff --git a/test/registered/distributed/test_dp_attention.py b/test/registered/distributed/test_dp_attention.py index 01e8f6565d19..cae258138b25 100644 --- a/test/registered/distributed/test_dp_attention.py +++ b/test/registered/distributed/test_dp_attention.py @@ -21,7 +21,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=420, suite="stage-b-test-2-gpu-large") +register_cuda_ci(est_time=420, stage="stage-b", runner_config="2-gpu-large") class TestDPAttentionDP2TP2( diff --git a/test/registered/distributed/test_flashinfer_fusion_preflight.py b/test/registered/distributed/test_flashinfer_fusion_preflight.py index 82b00e12d07f..118a0b55496a 100644 --- a/test/registered/distributed/test_flashinfer_fusion_preflight.py +++ b/test/registered/distributed/test_flashinfer_fusion_preflight.py @@ -11,7 +11,7 @@ from sglang.test.ci.ci_register import register_cuda_ci from sglang.test.test_utils import CustomTestCase -register_cuda_ci(est_time=30, suite="stage-b-test-2-gpu-large") +register_cuda_ci(est_time=30, stage="stage-b", runner_config="2-gpu-large") WORLD_SIZE = 2 diff --git a/test/registered/distributed/test_load_weights_from_remote_instance.py b/test/registered/distributed/test_load_weights_from_remote_instance.py index e48377f97937..50a659244b14 100644 --- a/test/registered/distributed/test_load_weights_from_remote_instance.py +++ b/test/registered/distributed/test_load_weights_from_remote_instance.py @@ -38,7 +38,7 @@ mp.set_start_method("spawn", force=True) -register_cuda_ci(est_time=145, suite="stage-b-test-2-gpu-large") +register_cuda_ci(est_time=145, stage="stage-b", runner_config="2-gpu-large") register_amd_ci(est_time=72, suite="stage-b-test-2-gpu-large-amd") diff --git a/test/registered/distributed/test_parallel_state.py b/test/registered/distributed/test_parallel_state.py index 0c3f7eb620af..1a3994c4998d 100644 --- a/test/registered/distributed/test_parallel_state.py +++ b/test/registered/distributed/test_parallel_state.py @@ -43,7 +43,7 @@ from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci -register_cuda_ci(est_time=8, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=8, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=8, suite="stage-b-test-1-gpu-small-amd") # Import the actual parallel_state module diff --git a/test/registered/distributed/test_pp_single_node.py b/test/registered/distributed/test_pp_single_node.py index 13ed425092f9..519bb5573f32 100644 --- a/test/registered/distributed/test_pp_single_node.py +++ b/test/registered/distributed/test_pp_single_node.py @@ -32,7 +32,7 @@ run_bench_one_batch_server, ) -register_cuda_ci(est_time=554, suite="stage-c-test-4-gpu-h100") +register_cuda_ci(est_time=554, stage="stage-c", runner_config="4-gpu-h100") register_amd_ci(est_time=650, suite="stage-c-test-4-gpu-amd") diff --git a/test/registered/dllm/test_llada2_mini.py b/test/registered/dllm/test_llada2_mini.py index e56d3dcbd984..33ed3c861e5e 100644 --- a/test/registered/dllm/test_llada2_mini.py +++ b/test/registered/dllm/test_llada2_mini.py @@ -1,6 +1,6 @@ from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci -register_cuda_ci(est_time=139, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=139, stage="stage-b", runner_config="1-gpu-large") register_amd_ci(est_time=330, suite="stage-b-test-1-gpu-small-amd") import unittest diff --git a/test/registered/dsv4/test_deepseek_v4_flash_fp4_b200.py b/test/registered/dsv4/test_deepseek_v4_flash_fp4_b200.py index 78ba68c7b0c4..a19ef8720272 100644 --- a/test/registered/dsv4/test_deepseek_v4_flash_fp4_b200.py +++ b/test/registered/dsv4/test_deepseek_v4_flash_fp4_b200.py @@ -21,7 +21,7 @@ try_cached_model, ) -register_cuda_ci(est_time=1800, suite="stage-c-test-dsv4-4-gpu-b200") +register_cuda_ci(est_time=1800, stage="stage-c", runner_config="dsv4-4-gpu-b200") MODEL = "deepseek-ai/DeepSeek-V4-Flash" SERVER_LAUNCH_TIMEOUT = 3600 diff --git a/test/registered/dsv4/test_deepseek_v4_flash_fp4_h200.py b/test/registered/dsv4/test_deepseek_v4_flash_fp4_h200.py index c5c7dbc178a5..e556a09e4374 100644 --- a/test/registered/dsv4/test_deepseek_v4_flash_fp4_h200.py +++ b/test/registered/dsv4/test_deepseek_v4_flash_fp4_h200.py @@ -21,7 +21,7 @@ try_cached_model, ) -register_cuda_ci(est_time=1800, suite="stage-c-test-dsv4-8-gpu-h200") +register_cuda_ci(est_time=1800, stage="stage-c", runner_config="dsv4-8-gpu-h200") def _flashinfer_has_sm90_cutlass_mxfp4() -> bool: diff --git a/test/registered/dsv4/test_deepseek_v4_flash_fp8_h200.py b/test/registered/dsv4/test_deepseek_v4_flash_fp8_h200.py index 790985e08e4e..38e04e70a141 100644 --- a/test/registered/dsv4/test_deepseek_v4_flash_fp8_h200.py +++ b/test/registered/dsv4/test_deepseek_v4_flash_fp8_h200.py @@ -22,7 +22,7 @@ try_cached_model, ) -register_cuda_ci(est_time=900, suite="stage-c-test-dsv4-8-gpu-h200") +register_cuda_ci(est_time=900, stage="stage-c", runner_config="dsv4-8-gpu-h200") MODEL_FP8 = "sgl-project/DeepSeek-V4-Flash-FP8" SERVER_LAUNCH_TIMEOUT = 3600 diff --git a/test/registered/ep/test_deepep_large.py b/test/registered/ep/test_deepep_large.py index a400ae73d105..b032dc4abb4f 100644 --- a/test/registered/ep/test_deepep_large.py +++ b/test/registered/ep/test_deepep_large.py @@ -16,7 +16,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=528, suite="stage-c-test-deepep-8-gpu-h200") +register_cuda_ci(est_time=528, stage="stage-c", runner_config="deepep-8-gpu-h200") DEEPSEEK_V32_MODEL_PATH = "deepseek-ai/DeepSeek-V3.2" diff --git a/test/registered/ep/test_deepep_small.py b/test/registered/ep/test_deepep_small.py index 0ed244774126..abc672cfc971 100644 --- a/test/registered/ep/test_deepep_small.py +++ b/test/registered/ep/test_deepep_small.py @@ -16,7 +16,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=478, suite="stage-c-test-deepep-4-gpu-h100") +register_cuda_ci(est_time=478, stage="stage-c", runner_config="deepep-4-gpu-h100") class TestPureDP(CustomTestCase): diff --git a/test/registered/ep/test_mooncake_ep_small.py b/test/registered/ep/test_mooncake_ep_small.py index 61e0cd04e3e1..3fac6eb6959c 100644 --- a/test/registered/ep/test_mooncake_ep_small.py +++ b/test/registered/ep/test_mooncake_ep_small.py @@ -15,7 +15,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=82, suite="stage-c-test-deepep-4-gpu-h100") +register_cuda_ci(est_time=82, stage="stage-c", runner_config="deepep-4-gpu-h100") ib_devices = get_rdma_devices_args() diff --git a/test/registered/hicache/test_hicache_spec_file_storage.py b/test/registered/hicache/test_hicache_spec_file_storage.py index 55816767d276..602530764fb3 100644 --- a/test/registered/hicache/test_hicache_spec_file_storage.py +++ b/test/registered/hicache/test_hicache_spec_file_storage.py @@ -30,7 +30,7 @@ ) from sglang.utils import wait_for_http_ready -register_cuda_ci(est_time=200, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=200, stage="stage-b", runner_config="1-gpu-large") @unittest.skipIf(is_hip(), "HiCache + EAGLE3 file-storage loadback e2e is CUDA-only.") diff --git a/test/registered/hicache/test_hicache_storage.py b/test/registered/hicache/test_hicache_storage.py index 112178a7611d..1ae355593b2b 100644 --- a/test/registered/hicache/test_hicache_storage.py +++ b/test/registered/hicache/test_hicache_storage.py @@ -1,6 +1,6 @@ from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci -register_cuda_ci(est_time=99, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=99, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=300, suite="stage-b-test-1-gpu-small-amd") import time diff --git a/test/registered/hicache/test_hicache_storage_3fs_backend.py b/test/registered/hicache/test_hicache_storage_3fs_backend.py index 8bf69623d44e..124b43d83720 100644 --- a/test/registered/hicache/test_hicache_storage_3fs_backend.py +++ b/test/registered/hicache/test_hicache_storage_3fs_backend.py @@ -13,7 +13,7 @@ from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci from sglang.test.test_utils import CustomTestCase -register_cuda_ci(est_time=150, suite="stage-b-test-2-gpu-large") +register_cuda_ci(est_time=150, stage="stage-b", runner_config="2-gpu-large") register_amd_ci(est_time=300, suite="stage-b-test-2-gpu-large") diff --git a/test/registered/hicache/test_hicache_storage_file_backend.py b/test/registered/hicache/test_hicache_storage_file_backend.py index 99fd26b4036b..163a9fe7b2c1 100644 --- a/test/registered/hicache/test_hicache_storage_file_backend.py +++ b/test/registered/hicache/test_hicache_storage_file_backend.py @@ -31,7 +31,7 @@ ) from sglang.utils import wait_for_http_ready -register_cuda_ci(est_time=148, suite="stage-b-test-2-gpu-large") +register_cuda_ci(est_time=148, stage="stage-b", runner_config="2-gpu-large") register_amd_ci(est_time=526, suite="stage-b-test-2-gpu-large-amd") diff --git a/test/registered/hicache/test_hicache_storage_mooncake_backend.py b/test/registered/hicache/test_hicache_storage_mooncake_backend.py index 15d84a240dc8..15ae0f05b05a 100644 --- a/test/registered/hicache/test_hicache_storage_mooncake_backend.py +++ b/test/registered/hicache/test_hicache_storage_mooncake_backend.py @@ -21,7 +21,7 @@ is_in_ci, ) -register_cuda_ci(est_time=236, suite="stage-b-test-2-gpu-large") +register_cuda_ci(est_time=236, stage="stage-b", runner_config="2-gpu-large") class HiCacheStorageMooncakeBackendBaseMixin(HiCacheStorageBaseMixin): diff --git a/test/registered/hicache/test_hicache_storage_runtime_attach_detach.py b/test/registered/hicache/test_hicache_storage_runtime_attach_detach.py index b46ca93f377b..cf74ba4e8108 100644 --- a/test/registered/hicache/test_hicache_storage_runtime_attach_detach.py +++ b/test/registered/hicache/test_hicache_storage_runtime_attach_detach.py @@ -28,7 +28,7 @@ ) from sglang.utils import wait_for_http_ready -register_cuda_ci(est_time=139, suite="stage-b-test-2-gpu-large") +register_cuda_ci(est_time=139, stage="stage-b", runner_config="2-gpu-large") class TestHiCacheStorageRuntimeAttachDetach(CustomTestCase): diff --git a/test/registered/hicache/test_hicache_variants.py b/test/registered/hicache/test_hicache_variants.py index c769cf40d0d6..6712204462c5 100644 --- a/test/registered/hicache/test_hicache_variants.py +++ b/test/registered/hicache/test_hicache_variants.py @@ -1,6 +1,6 @@ from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci -register_cuda_ci(est_time=450, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=450, stage="stage-b", runner_config="1-gpu-large") register_amd_ci(est_time=524, suite="stage-b-test-1-gpu-small-amd") """ Consolidated HiCache variant tests. diff --git a/test/registered/input_embedding/test_input_embeddings.py b/test/registered/input_embedding/test_input_embeddings.py index 1616fe9427a0..32736e8efe28 100644 --- a/test/registered/input_embedding/test_input_embeddings.py +++ b/test/registered/input_embedding/test_input_embeddings.py @@ -16,7 +16,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=42, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=42, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=38, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/input_embedding/test_input_embeds_chunked.py b/test/registered/input_embedding/test_input_embeds_chunked.py index f3aad1abff46..800058930aaf 100644 --- a/test/registered/input_embedding/test_input_embeds_chunked.py +++ b/test/registered/input_embedding/test_input_embeds_chunked.py @@ -30,7 +30,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=43, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=43, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=43, suite="stage-b-test-1-gpu-small-amd") CHUNKED_PREFILL_SIZE = 256 diff --git a/test/registered/kernels/test_nsa_indexer.py b/test/registered/kernels/test_nsa_indexer.py index 9aaba4568997..56958121ccff 100644 --- a/test/registered/kernels/test_nsa_indexer.py +++ b/test/registered/kernels/test_nsa_indexer.py @@ -24,7 +24,7 @@ from sglang.srt.server_args import ServerArgs, set_global_server_args_for_scheduler from sglang.test.test_utils import CustomTestCase -register_cuda_ci(est_time=18, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=18, stage="stage-b", runner_config="1-gpu-large") # Global configuration for all indexer tests DEFAULT_CONFIG = { diff --git a/test/registered/language/test_srt_backend.py b/test/registered/language/test_srt_backend.py index 23567f1197df..849331ae87b8 100644 --- a/test/registered/language/test_srt_backend.py +++ b/test/registered/language/test_srt_backend.py @@ -20,7 +20,7 @@ ) from sglang.test.test_utils import DEFAULT_MODEL_NAME_FOR_TEST, CustomTestCase -register_cuda_ci(est_time=79, suite="stage-a-test-1-gpu-small") +register_cuda_ci(est_time=79, stage="stage-a", runner_config="1-gpu-small") register_amd_ci(est_time=120, suite="stage-a-test-1-gpu-small-amd") diff --git a/test/registered/layers/mamba/test_causal_conv1d.py b/test/registered/layers/mamba/test_causal_conv1d.py index 66053c5b68dc..06f0958f54eb 100644 --- a/test/registered/layers/mamba/test_causal_conv1d.py +++ b/test/registered/layers/mamba/test_causal_conv1d.py @@ -2,7 +2,7 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci -register_cuda_ci(est_time=11, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=11, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=25, suite="stage-b-test-1-gpu-small-amd") # Adapted from https://github.com/vllm-project/vllm/blob/main/tests/kernels/mamba/test_causal_conv1d.py diff --git a/test/registered/layers/mamba/test_mamba2_mixer.py b/test/registered/layers/mamba/test_mamba2_mixer.py index 5e4797912e74..5fd7e62d120f 100644 --- a/test/registered/layers/mamba/test_mamba2_mixer.py +++ b/test/registered/layers/mamba/test_mamba2_mixer.py @@ -18,7 +18,7 @@ from sglang.srt.utils import get_device, get_device_count from sglang.test.ci.ci_register import register_cuda_ci -register_cuda_ci(est_time=32, suite="stage-b-test-2-gpu-large") +register_cuda_ci(est_time=32, stage="stage-b", runner_config="2-gpu-large") NUM_GPUS = 2 diff --git a/test/registered/layers/mamba/test_mamba_ssm.py b/test/registered/layers/mamba/test_mamba_ssm.py index 7c98efb8dc02..510bd8aee509 100644 --- a/test/registered/layers/mamba/test_mamba_ssm.py +++ b/test/registered/layers/mamba/test_mamba_ssm.py @@ -2,7 +2,7 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci -register_cuda_ci(est_time=10, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=10, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=20, suite="stage-b-test-1-gpu-small-amd") # Adapted from https://github.com/vllm-project/vllm/blob/633f943e30a4444d890d26b81850f7217736f840/tests/kernels/mamba/test_mamba_ssm_ssd.py diff --git a/test/registered/layers/mamba/test_mamba_ssm_ssd.py b/test/registered/layers/mamba/test_mamba_ssm_ssd.py index 1606fb99b1cd..bd2b1b255a69 100644 --- a/test/registered/layers/mamba/test_mamba_ssm_ssd.py +++ b/test/registered/layers/mamba/test_mamba_ssm_ssd.py @@ -2,7 +2,7 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci -register_cuda_ci(est_time=10, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=10, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=34, suite="stage-b-test-1-gpu-small-amd") # Adapted from https://github.com/vllm-project/vllm/blob/633f943e30a4444d890d26b81850f7217736f840/tests/kernels/mamba/test_mamba_ssm_ssd.py diff --git a/test/registered/layers/test_fla_layernorm_guard.py b/test/registered/layers/test_fla_layernorm_guard.py index 9a99efda642f..b65415f7bab0 100644 --- a/test/registered/layers/test_fla_layernorm_guard.py +++ b/test/registered/layers/test_fla_layernorm_guard.py @@ -19,7 +19,8 @@ register_cuda_ci( est_time=60, - suite="stage-b-test-2-gpu-large", + stage="stage-b", + runner_config="2-gpu-large", disabled="Temporarily disabled", ) diff --git a/test/registered/lora/test_fused_moe_lora_kernel.py b/test/registered/lora/test_fused_moe_lora_kernel.py index 91fd2f55ebb6..5fc4bc8090da 100644 --- a/test/registered/lora/test_fused_moe_lora_kernel.py +++ b/test/registered/lora/test_fused_moe_lora_kernel.py @@ -15,7 +15,7 @@ # ============================================================================== -register_cuda_ci(est_time=28, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=28, stage="stage-b", runner_config="1-gpu-large") def round_up(x, base): diff --git a/test/registered/lora/test_lora_drainer.py b/test/registered/lora/test_lora_drainer.py index 5b7d97e6d235..e1b68fa9d40d 100644 --- a/test/registered/lora/test_lora_drainer.py +++ b/test/registered/lora/test_lora_drainer.py @@ -12,7 +12,7 @@ ) from sglang.test.test_utils import is_in_ci -register_cuda_ci(est_time=100, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=100, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=100, suite="stage-b-test-1-gpu-small-amd") MOCK_START_TIME = 1000.0 diff --git a/test/registered/lora/test_lora_eviction.py b/test/registered/lora/test_lora_eviction.py index 940db7af06b1..6fad72acfeb2 100644 --- a/test/registered/lora/test_lora_eviction.py +++ b/test/registered/lora/test_lora_eviction.py @@ -23,7 +23,7 @@ from sglang.test.runners import SRTRunner from sglang.test.test_utils import CustomTestCase -register_cuda_ci(est_time=263, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=263, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=224, suite="stage-b-test-1-gpu-small-amd") PROMPTS = [ diff --git a/test/registered/lora/test_lora_gpt_oss_20b_logprob_diff.py b/test/registered/lora/test_lora_gpt_oss_20b_logprob_diff.py index f6d3fab97e91..c7aa9609584b 100644 --- a/test/registered/lora/test_lora_gpt_oss_20b_logprob_diff.py +++ b/test/registered/lora/test_lora_gpt_oss_20b_logprob_diff.py @@ -36,7 +36,8 @@ register_cuda_ci( est_time=300, - suite="stage-c-test-4-gpu-b200", + stage="stage-c", + runner_config="4-gpu-b200", ) BASE_MODEL = "lmsys/gpt-oss-20b-bf16" diff --git a/test/registered/lora/test_lora_hf_sgl_logprob_diff.py b/test/registered/lora/test_lora_hf_sgl_logprob_diff.py index c32c100a527b..0f17d07883cd 100644 --- a/test/registered/lora/test_lora_hf_sgl_logprob_diff.py +++ b/test/registered/lora/test_lora_hf_sgl_logprob_diff.py @@ -41,7 +41,8 @@ register_cuda_ci( est_time=150, - suite="stage-b-test-1-gpu-small", + stage="stage-b", + runner_config="1-gpu-small", ) register_amd_ci( est_time=250, diff --git a/test/registered/lora/test_lora_moe_tp_logprob_diff.py b/test/registered/lora/test_lora_moe_tp_logprob_diff.py index 05a5c7b46d70..9214a9e1efb9 100644 --- a/test/registered/lora/test_lora_moe_tp_logprob_diff.py +++ b/test/registered/lora/test_lora_moe_tp_logprob_diff.py @@ -34,7 +34,8 @@ register_cuda_ci( est_time=200, - suite="stage-b-test-2-gpu-large", + stage="stage-b", + runner_config="2-gpu-large", ) LOGPROB_THRESHOLD = 5e-04 diff --git a/test/registered/lora/test_lora_moe_vllm_sgl_logprob_diff.py b/test/registered/lora/test_lora_moe_vllm_sgl_logprob_diff.py index 7a7b47bbc76f..a2f397a5e45f 100644 --- a/test/registered/lora/test_lora_moe_vllm_sgl_logprob_diff.py +++ b/test/registered/lora/test_lora_moe_vllm_sgl_logprob_diff.py @@ -26,7 +26,8 @@ register_cuda_ci( est_time=50, - suite="stage-b-test-1-gpu-large", + stage="stage-b", + runner_config="1-gpu-large", ) # Format: [{"text": "result string", "lps": [0.1, 0.2, ...]}, ...] diff --git a/test/registered/lora/test_lora_nemotron_3_super_120b_a12b_logprob_diff.py b/test/registered/lora/test_lora_nemotron_3_super_120b_a12b_logprob_diff.py index 60f247b08f80..4fd458f48294 100644 --- a/test/registered/lora/test_lora_nemotron_3_super_120b_a12b_logprob_diff.py +++ b/test/registered/lora/test_lora_nemotron_3_super_120b_a12b_logprob_diff.py @@ -36,7 +36,8 @@ register_cuda_ci( est_time=300, - suite="stage-c-test-4-gpu-b200", + stage="stage-c", + runner_config="4-gpu-b200", ) BASE_MODEL = "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16" diff --git a/test/registered/lora/test_lora_overlap_loading.py b/test/registered/lora/test_lora_overlap_loading.py index a733d4a7c8ec..36964f7f3248 100644 --- a/test/registered/lora/test_lora_overlap_loading.py +++ b/test/registered/lora/test_lora_overlap_loading.py @@ -29,7 +29,7 @@ ) from sglang.test.test_utils import CustomTestCase -register_cuda_ci(est_time=48, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=48, stage="stage-b", runner_config="1-gpu-large") register_amd_ci(est_time=75, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/lora/test_lora_qwen3_30b_a3b_instruct_2507_logprob_diff.py b/test/registered/lora/test_lora_qwen3_30b_a3b_instruct_2507_logprob_diff.py index c9647f52421f..a4ea150f5164 100644 --- a/test/registered/lora/test_lora_qwen3_30b_a3b_instruct_2507_logprob_diff.py +++ b/test/registered/lora/test_lora_qwen3_30b_a3b_instruct_2507_logprob_diff.py @@ -36,7 +36,8 @@ register_cuda_ci( est_time=160, - suite="stage-c-test-4-gpu-b200", + stage="stage-c", + runner_config="4-gpu-b200", ) BASE_MODEL = "Qwen/Qwen3-30B-A3B-Instruct-2507" diff --git a/test/registered/lora/test_lora_qwen3_5_35b_a3b_logprob_diff.py b/test/registered/lora/test_lora_qwen3_5_35b_a3b_logprob_diff.py index cd4001a435a9..b11b38ec0810 100644 --- a/test/registered/lora/test_lora_qwen3_5_35b_a3b_logprob_diff.py +++ b/test/registered/lora/test_lora_qwen3_5_35b_a3b_logprob_diff.py @@ -36,7 +36,8 @@ register_cuda_ci( est_time=160, - suite="stage-c-test-4-gpu-b200", + stage="stage-c", + runner_config="4-gpu-b200", ) BASE_MODEL = "Qwen/Qwen3.5-35B-A3B" diff --git a/test/registered/lora/test_lora_qwen3_5_4b_logprob_diff.py b/test/registered/lora/test_lora_qwen3_5_4b_logprob_diff.py index 33115a877b7c..8c514da488fc 100644 --- a/test/registered/lora/test_lora_qwen3_5_4b_logprob_diff.py +++ b/test/registered/lora/test_lora_qwen3_5_4b_logprob_diff.py @@ -36,7 +36,8 @@ register_cuda_ci( est_time=90, - suite="stage-b-test-1-gpu-large", + stage="stage-b", + runner_config="1-gpu-large", ) BASE_MODEL = "Qwen/Qwen3.5-4B" diff --git a/test/registered/lora/test_lora_qwen3_8b_logprob_diff.py b/test/registered/lora/test_lora_qwen3_8b_logprob_diff.py index cd6f59fe7c3b..7f2f9a431230 100644 --- a/test/registered/lora/test_lora_qwen3_8b_logprob_diff.py +++ b/test/registered/lora/test_lora_qwen3_8b_logprob_diff.py @@ -39,7 +39,8 @@ register_cuda_ci( est_time=40, - suite="stage-b-test-1-gpu-large", + stage="stage-b", + runner_config="1-gpu-large", ) BASE_MODEL = "Qwen/Qwen3-8B" diff --git a/test/registered/lora/test_lora_qwen3_vl_30b_a3b_instruct_logprob_diff.py b/test/registered/lora/test_lora_qwen3_vl_30b_a3b_instruct_logprob_diff.py index ca52832c7d4d..63ca79ef491a 100644 --- a/test/registered/lora/test_lora_qwen3_vl_30b_a3b_instruct_logprob_diff.py +++ b/test/registered/lora/test_lora_qwen3_vl_30b_a3b_instruct_logprob_diff.py @@ -36,7 +36,8 @@ register_cuda_ci( est_time=160, - suite="stage-c-test-4-gpu-b200", + stage="stage-c", + runner_config="4-gpu-b200", ) BASE_MODEL = "Qwen/Qwen3-VL-30B-A3B-Instruct" diff --git a/test/registered/lora/test_lora_tp.py b/test/registered/lora/test_lora_tp.py index 32c4352889da..d26dfe2348f5 100644 --- a/test/registered/lora/test_lora_tp.py +++ b/test/registered/lora/test_lora_tp.py @@ -31,7 +31,8 @@ register_cuda_ci( est_time=190, - suite="stage-c-test-8-gpu-h200", + stage="stage-c", + runner_config="8-gpu-h200", ) register_amd_ci( est_time=116, diff --git a/test/registered/lora/test_lora_update.py b/test/registered/lora/test_lora_update.py index 860520dba327..ea01a512df97 100644 --- a/test/registered/lora/test_lora_update.py +++ b/test/registered/lora/test_lora_update.py @@ -36,7 +36,8 @@ register_cuda_ci( est_time=487, - suite="stage-b-test-1-gpu-large", + stage="stage-b", + runner_config="1-gpu-large", ) PROMPTS = [ diff --git a/test/registered/lora/test_multi_lora_backend.py b/test/registered/lora/test_multi_lora_backend.py index c13acdc93fcd..9cb87da63a66 100644 --- a/test/registered/lora/test_multi_lora_backend.py +++ b/test/registered/lora/test_multi_lora_backend.py @@ -25,7 +25,7 @@ ) from sglang.test.test_utils import CustomTestCase, is_in_ci -register_cuda_ci(est_time=99, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=99, stage="stage-b", runner_config="1-gpu-large") register_amd_ci(est_time=100, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/lora/test_virtual_experts_kernels.py b/test/registered/lora/test_virtual_experts_kernels.py index c42457f06f48..94106b1238da 100644 --- a/test/registered/lora/test_virtual_experts_kernels.py +++ b/test/registered/lora/test_virtual_experts_kernels.py @@ -30,7 +30,7 @@ from sglang.test.ci.ci_register import register_cuda_ci from sglang.test.test_utils import CustomTestCase -register_cuda_ci(est_time=15, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=15, stage="stage-b", runner_config="1-gpu-small") from sglang.srt.lora.triton_ops.virtual_experts import ( _align_block_size_jit, diff --git a/test/registered/mla/test_flashmla.py b/test/registered/mla/test_flashmla.py index 53c5964e46e4..a730865603ee 100644 --- a/test/registered/mla/test_flashmla.py +++ b/test/registered/mla/test_flashmla.py @@ -21,7 +21,7 @@ ) # FlashMLA attention backend tests with MTP speculative decoding -register_cuda_ci(est_time=160, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=160, stage="stage-b", runner_config="1-gpu-large") class TestFlashMLAMTP(CustomTestCase): diff --git a/test/registered/mla/test_mla_flashinfer.py b/test/registered/mla/test_mla_flashinfer.py index 254d9367ea69..e57eddbb3f4c 100644 --- a/test/registered/mla/test_mla_flashinfer.py +++ b/test/registered/mla/test_mla_flashinfer.py @@ -15,7 +15,7 @@ ) # FlashInfer MLA backend tests with MTP speculative decoding -register_cuda_ci(est_time=130, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=130, stage="stage-b", runner_config="1-gpu-large") class TestFlashinferMLAMTP(CustomTestCase): diff --git a/test/registered/mla/test_mla_fp8.py b/test/registered/mla/test_mla_fp8.py index eb793a530621..97ce56e38836 100644 --- a/test/registered/mla/test_mla_fp8.py +++ b/test/registered/mla/test_mla_fp8.py @@ -13,7 +13,7 @@ ) # MLA FP8 KV cache test with MGSM evaluation -register_cuda_ci(est_time=104, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=104, stage="stage-b", runner_config="1-gpu-large") register_amd_ci(est_time=800, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/mla/test_mla_int8_deepseek_v3.py b/test/registered/mla/test_mla_int8_deepseek_v3.py index 65fddf8fb98a..6d31368346cb 100644 --- a/test/registered/mla/test_mla_int8_deepseek_v3.py +++ b/test/registered/mla/test_mla_int8_deepseek_v3.py @@ -16,7 +16,7 @@ ) # DeepSeek-V3 INT8 quantization tests (channel and block INT8) -register_cuda_ci(est_time=160, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=160, stage="stage-b", runner_config="1-gpu-large") class TestDeepseekV3MTPChannelInt8(CustomTestCase): diff --git a/test/registered/model_loading/test_external_models.py b/test/registered/model_loading/test_external_models.py index cd546d99875c..a03bffd333d5 100644 --- a/test/registered/model_loading/test_external_models.py +++ b/test/registered/model_loading/test_external_models.py @@ -5,7 +5,7 @@ from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci from sglang.test.test_utils import CustomTestCase -register_cuda_ci(est_time=29, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=29, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=45, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/model_loading/test_utils_update_weights.py b/test/registered/model_loading/test_utils_update_weights.py index f79b6306cd44..ff9bd3a54e40 100644 --- a/test/registered/model_loading/test_utils_update_weights.py +++ b/test/registered/model_loading/test_utils_update_weights.py @@ -12,7 +12,7 @@ from sglang.test.ci.ci_register import register_cuda_ci from sglang.test.test_utils import DEFAULT_SMALL_MODEL_NAME_FOR_TEST -register_cuda_ci(est_time=32, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=32, stage="stage-b", runner_config="1-gpu-large") class AsyncEngine(Engine): diff --git a/test/registered/models/test_compressed_tensors_models.py b/test/registered/models/test_compressed_tensors_models.py index 74cbe0061f84..2d4d160e5379 100644 --- a/test/registered/models/test_compressed_tensors_models.py +++ b/test/registered/models/test_compressed_tensors_models.py @@ -13,7 +13,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=65, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=65, stage="stage-b", runner_config="1-gpu-large") register_amd_ci(est_time=42, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/models/test_dummy_grok_models.py b/test/registered/models/test_dummy_grok_models.py index f8ae27cdddbf..e251db8db54d 100644 --- a/test/registered/models/test_dummy_grok_models.py +++ b/test/registered/models/test_dummy_grok_models.py @@ -5,7 +5,8 @@ register_cuda_ci( est_time=120, - suite="stage-b-test-2-gpu-large", + stage="stage-b", + runner_config="2-gpu-large", disabled="Temporarily disabled", ) diff --git a/test/registered/models/test_gemma4_fp8_per_expert_loading.py b/test/registered/models/test_gemma4_fp8_per_expert_loading.py index 3565263fb51d..114f29c9b792 100644 --- a/test/registered/models/test_gemma4_fp8_per_expert_loading.py +++ b/test/registered/models/test_gemma4_fp8_per_expert_loading.py @@ -25,7 +25,7 @@ # Compressed-tensors per-expert FP8 MoE checkpoint that exercises the # loader path (gated repo + ~27 GB download + 4 GPUs at TP=4). -register_cuda_ci(est_time=120, suite="stage-c-test-4-gpu-h100") +register_cuda_ci(est_time=120, stage="stage-c", runner_config="4-gpu-h100") @unittest.skipIf(get_device_sm() < 90, "Test requires CUDA SM 90 or higher") diff --git a/test/registered/models/test_generation_models.py b/test/registered/models/test_generation_models.py index 437b4d49af77..b6b2ec1c51b8 100644 --- a/test/registered/models/test_generation_models.py +++ b/test/registered/models/test_generation_models.py @@ -1,7 +1,7 @@ from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci # Generation model tests (CUDA only) -register_cuda_ci(est_time=150, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=150, stage="stage-b", runner_config="1-gpu-large") register_amd_ci(est_time=106, suite="stage-b-test-1-gpu-small-amd") # Copyright 2023-2024 SGLang Team diff --git a/test/registered/models/test_kimi_linear_models.py b/test/registered/models/test_kimi_linear_models.py index 21acf3c57088..bc86d5e42e10 100644 --- a/test/registered/models/test_kimi_linear_models.py +++ b/test/registered/models/test_kimi_linear_models.py @@ -11,7 +11,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=178, suite="stage-b-test-2-gpu-large") +register_cuda_ci(est_time=178, stage="stage-b", runner_config="2-gpu-large") class TestKimiLinear(CustomTestCase): diff --git a/test/registered/models/test_ministral3_models.py b/test/registered/models/test_ministral3_models.py index 327b49f14aaf..d9a174c2faae 100644 --- a/test/registered/models/test_ministral3_models.py +++ b/test/registered/models/test_ministral3_models.py @@ -8,7 +8,8 @@ register_cuda_ci( est_time=200, - suite="stage-b-test-1-gpu-small", + stage="stage-b", + runner_config="1-gpu-small", disabled="Temporarily disabled", ) diff --git a/test/registered/models/test_ministral4_models.py b/test/registered/models/test_ministral4_models.py index 875e0a75e511..9697068c14b9 100644 --- a/test/registered/models/test_ministral4_models.py +++ b/test/registered/models/test_ministral4_models.py @@ -8,7 +8,8 @@ register_cuda_ci( est_time=200, - suite="stage-b-test-2-gpu-large", + stage="stage-b", + runner_config="2-gpu-large", ) MODEL = "mistralai/Mistral-Small-4-119B-2603" diff --git a/test/registered/models/test_nvidia_nemotron_3_nano.py b/test/registered/models/test_nvidia_nemotron_3_nano.py index 6fa052967a34..597eb5101cb6 100644 --- a/test/registered/models/test_nvidia_nemotron_3_nano.py +++ b/test/registered/models/test_nvidia_nemotron_3_nano.py @@ -6,7 +6,8 @@ register_cuda_ci( est_time=190, - suite="stage-b-test-2-gpu-large", + stage="stage-b", + runner_config="2-gpu-large", ) NEMOTRON_3_NANO_THINKING_ARGS = [ diff --git a/test/registered/models/test_transformers_backend_eval.py b/test/registered/models/test_transformers_backend_eval.py index 58698dc5d1ba..2a1e96085e39 100644 --- a/test/registered/models/test_transformers_backend_eval.py +++ b/test/registered/models/test_transformers_backend_eval.py @@ -7,7 +7,7 @@ from sglang.test.few_shot_gsm8k import run_eval from sglang.test.server_fixtures.default_fixture import DefaultServerBase -register_cuda_ci(est_time=48, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=48, stage="stage-b", runner_config="1-gpu-small") class TestTransformersBackendEval(DefaultServerBase): diff --git a/test/registered/models/test_transformers_models.py b/test/registered/models/test_transformers_models.py index 5fe40e1222a1..2b8095ff8a5c 100644 --- a/test/registered/models/test_transformers_models.py +++ b/test/registered/models/test_transformers_models.py @@ -21,7 +21,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=177, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=177, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=320, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/models/test_vlm_models.py b/test/registered/models/test_vlm_models.py index 17e9b146a196..e7da07f6b2ae 100644 --- a/test/registered/models/test_vlm_models.py +++ b/test/registered/models/test_vlm_models.py @@ -13,7 +13,7 @@ # VLM (Vision Language Model) tests -register_cuda_ci(est_time=317, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=317, stage="stage-b", runner_config="1-gpu-large") register_amd_ci(est_time=850, suite="stage-b-test-1-gpu-small-amd-nondeterministic") _is_hip = is_hip() diff --git a/test/registered/moe/test_cutedsl_moe.py b/test/registered/moe/test_cutedsl_moe.py index b29a48e3d9e8..7e566b2a4cc6 100644 --- a/test/registered/moe/test_cutedsl_moe.py +++ b/test/registered/moe/test_cutedsl_moe.py @@ -16,7 +16,7 @@ CuteDslMoEWrapper = None convert_sf_to_mma_layout = None -register_cuda_ci(est_time=590, suite="stage-c-test-4-gpu-b200") +register_cuda_ci(est_time=590, stage="stage-c", runner_config="4-gpu-b200") SKIP_TEST = torch.cuda.get_device_capability() < (10, 0) SKIP_REASON = "Nvfp4 Requires compute capability of 10 or above." diff --git a/test/registered/moe/test_fused_moe.py b/test/registered/moe/test_fused_moe.py index f93cbff99e01..75249bc7dbfb 100644 --- a/test/registered/moe/test_fused_moe.py +++ b/test/registered/moe/test_fused_moe.py @@ -13,7 +13,7 @@ from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci from sglang.test.test_utils import CustomTestCase, empty_gpu_cache -register_cuda_ci(est_time=87, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=87, stage="stage-b", runner_config="1-gpu-large") register_amd_ci(est_time=30, suite="stage-b-test-1-gpu-small-amd") _is_hip = is_hip() diff --git a/test/registered/moe/test_glm4_moe_models.py b/test/registered/moe/test_glm4_moe_models.py index e6e800ecdd64..f4faffe3bd96 100644 --- a/test/registered/moe/test_glm4_moe_models.py +++ b/test/registered/moe/test_glm4_moe_models.py @@ -11,7 +11,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=171, suite="stage-b-test-2-gpu-large") +register_cuda_ci(est_time=171, stage="stage-b", runner_config="2-gpu-large") class TestGLM4MoE(CustomTestCase): diff --git a/test/registered/moe/test_moe_ep.py b/test/registered/moe/test_moe_ep.py index b5e936ee6081..e4681fb7aad9 100644 --- a/test/registered/moe/test_moe_ep.py +++ b/test/registered/moe/test_moe_ep.py @@ -12,7 +12,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=279, suite="stage-b-test-2-gpu-large") +register_cuda_ci(est_time=279, stage="stage-b", runner_config="2-gpu-large") class TestEp(CustomTestCase): diff --git a/test/registered/moe/test_torch_compile_moe.py b/test/registered/moe/test_torch_compile_moe.py index 811ca69b457b..495ec9a5dbe3 100644 --- a/test/registered/moe/test_torch_compile_moe.py +++ b/test/registered/moe/test_torch_compile_moe.py @@ -16,7 +16,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=130, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=130, stage="stage-b", runner_config="1-gpu-large") register_amd_ci(est_time=1400, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/moe/test_triton_fused_moe.py b/test/registered/moe/test_triton_fused_moe.py index 309ac1b3a3a9..da0a5195fa9d 100644 --- a/test/registered/moe/test_triton_fused_moe.py +++ b/test/registered/moe/test_triton_fused_moe.py @@ -12,7 +12,7 @@ from sglang.test.ci.ci_register import register_cuda_ci from sglang.test.test_utils import CustomTestCase -register_cuda_ci(est_time=13, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=13, stage="stage-b", runner_config="1-gpu-large") class TestFusedMOE(CustomTestCase): diff --git a/test/registered/moe/test_triton_moe_channel_fp8_kernel.py b/test/registered/moe/test_triton_moe_channel_fp8_kernel.py index ee6b4522703f..2bbefdddb473 100644 --- a/test/registered/moe/test_triton_moe_channel_fp8_kernel.py +++ b/test/registered/moe/test_triton_moe_channel_fp8_kernel.py @@ -11,7 +11,7 @@ from sglang.test.ci.ci_register import register_cuda_ci from sglang.test.test_utils import CustomTestCase -register_cuda_ci(est_time=17, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=17, stage="stage-b", runner_config="1-gpu-large") def native_w8a8_per_token_matmul(A, B, As, Bs, output_dtype=torch.float16): diff --git a/test/registered/observability/test_metrics.py b/test/registered/observability/test_metrics.py index 9d9f9d056ada..d6db74295d73 100644 --- a/test/registered/observability/test_metrics.py +++ b/test/registered/observability/test_metrics.py @@ -20,7 +20,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=74, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=74, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=32, suite="stage-b-test-1-gpu-small-amd") _MODEL_NAME = "Qwen/Qwen3-0.6B" diff --git a/test/registered/observability/test_priority_metrics.py b/test/registered/observability/test_priority_metrics.py index 70c56131243a..fe778293e5c8 100644 --- a/test/registered/observability/test_priority_metrics.py +++ b/test/registered/observability/test_priority_metrics.py @@ -18,7 +18,8 @@ register_cuda_ci( est_time=60, - suite="stage-b-test-1-gpu-small", + stage="stage-b", + runner_config="1-gpu-small", ) register_amd_ci(est_time=60, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/observability/test_tracing.py b/test/registered/observability/test_tracing.py index 4217a1ac03a0..ca450c33d59a 100644 --- a/test/registered/observability/test_tracing.py +++ b/test/registered/observability/test_tracing.py @@ -46,7 +46,7 @@ logger = logging.getLogger(__name__) # CI registration -register_cuda_ci(est_time=113, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=113, stage="stage-b", runner_config="1-gpu-small") # ============================================================================ diff --git a/test/registered/observability/test_tracing_disaggregation.py b/test/registered/observability/test_tracing_disaggregation.py index 3c7990df7bbd..f16b2309187f 100644 --- a/test/registered/observability/test_tracing_disaggregation.py +++ b/test/registered/observability/test_tracing_disaggregation.py @@ -33,7 +33,7 @@ logger = logging.getLogger(__name__) # CI registration - PD disaggregation requires 2 GPUs -register_cuda_ci(est_time=65, suite="stage-b-test-2-gpu-large") +register_cuda_ci(est_time=65, stage="stage-b", runner_config="2-gpu-large") class TestTraceDisaggregation(CustomTestCase): diff --git a/test/registered/openai_server/basic/test_anthropic_server.py b/test/registered/openai_server/basic/test_anthropic_server.py index bdfb8b91f0cd..a77cd982cbb5 100644 --- a/test/registered/openai_server/basic/test_anthropic_server.py +++ b/test/registered/openai_server/basic/test_anthropic_server.py @@ -31,7 +31,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=40, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=40, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=140, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/openai_server/basic/test_http2_server.py b/test/registered/openai_server/basic/test_http2_server.py index 3f1434e8b3fc..21b222684b94 100644 --- a/test/registered/openai_server/basic/test_http2_server.py +++ b/test/registered/openai_server/basic/test_http2_server.py @@ -27,7 +27,7 @@ except ImportError: _HAS_GRANIAN = False -register_cuda_ci(est_time=52, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=52, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=52, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/openai_server/basic/test_openai_server.py b/test/registered/openai_server/basic/test_openai_server.py index 4a96c100fcdb..682692022a6a 100644 --- a/test/registered/openai_server/basic/test_openai_server.py +++ b/test/registered/openai_server/basic/test_openai_server.py @@ -28,7 +28,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=182, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=182, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=200, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/openai_server/basic/test_serving_transcription.py b/test/registered/openai_server/basic/test_serving_transcription.py index cf73a1aa0df1..6add726f2db0 100644 --- a/test/registered/openai_server/basic/test_serving_transcription.py +++ b/test/registered/openai_server/basic/test_serving_transcription.py @@ -21,7 +21,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=60, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=60, stage="stage-b", runner_config="1-gpu-small") WHISPER_MODEL = "openai/whisper-large-v3" AUDIO_URL = "https://raw.githubusercontent.com/sgl-project/sgl-test-files/refs/heads/main/audios/Trump_WEF_2018_10s.mp3" diff --git a/test/registered/openai_server/features/test_json_mode.py b/test/registered/openai_server/features/test_json_mode.py index d3aa8e68a722..a24fda4c3410 100644 --- a/test/registered/openai_server/features/test_json_mode.py +++ b/test/registered/openai_server/features/test_json_mode.py @@ -14,7 +14,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=118, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=118, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=180, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/openai_server/features/test_openai_server_ebnf.py b/test/registered/openai_server/features/test_openai_server_ebnf.py index 42dfd58fde44..4188e841bc97 100644 --- a/test/registered/openai_server/features/test_openai_server_ebnf.py +++ b/test/registered/openai_server/features/test_openai_server_ebnf.py @@ -13,7 +13,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=44, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=44, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=20, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/openai_server/features/test_openai_server_hidden_states.py b/test/registered/openai_server/features/test_openai_server_hidden_states.py index c1a46fd7a63d..2eb5fd7b6ef6 100644 --- a/test/registered/openai_server/features/test_openai_server_hidden_states.py +++ b/test/registered/openai_server/features/test_openai_server_hidden_states.py @@ -16,7 +16,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=222, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=222, stage="stage-b", runner_config="1-gpu-small") register_amd_ci( est_time=186, suite="stage-b-test-1-gpu-small-amd", diff --git a/test/registered/openai_server/function_call/test_anthropic_tool_use.py b/test/registered/openai_server/function_call/test_anthropic_tool_use.py index 7904b0b391e5..1a56b62c9ccd 100644 --- a/test/registered/openai_server/function_call/test_anthropic_tool_use.py +++ b/test/registered/openai_server/function_call/test_anthropic_tool_use.py @@ -25,7 +25,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=50, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=50, stage="stage-b", runner_config="1-gpu-large") register_amd_ci(est_time=140, suite="stage-b-test-1-gpu-small-amd") # System message to guide Llama3.2 to produce proper tool call format diff --git a/test/registered/openai_server/function_call/test_openai_function_calling.py b/test/registered/openai_server/function_call/test_openai_function_calling.py index 88544c1975ad..f4e2485909e8 100644 --- a/test/registered/openai_server/function_call/test_openai_function_calling.py +++ b/test/registered/openai_server/function_call/test_openai_function_calling.py @@ -14,7 +14,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=100, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=100, stage="stage-b", runner_config="1-gpu-large") register_amd_ci(est_time=73, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/openai_server/function_call/test_tool_choice.py b/test/registered/openai_server/function_call/test_tool_choice.py index c7fef05c7cc4..a1997088e32b 100644 --- a/test/registered/openai_server/function_call/test_tool_choice.py +++ b/test/registered/openai_server/function_call/test_tool_choice.py @@ -22,7 +22,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=204, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=204, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=258, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/openai_server/validation/test_large_max_new_tokens.py b/test/registered/openai_server/validation/test_large_max_new_tokens.py index 3ec576774a5e..aa4446b766e7 100644 --- a/test/registered/openai_server/validation/test_large_max_new_tokens.py +++ b/test/registered/openai_server/validation/test_large_max_new_tokens.py @@ -22,7 +22,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=58, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=58, stage="stage-b", runner_config="1-gpu-large") register_amd_ci(est_time=41, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/openai_server/validation/test_matched_stop.py b/test/registered/openai_server/validation/test_matched_stop.py index c6402ab21e6c..c9566191cfee 100644 --- a/test/registered/openai_server/validation/test_matched_stop.py +++ b/test/registered/openai_server/validation/test_matched_stop.py @@ -10,7 +10,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=52, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=52, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=60, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/openai_server/validation/test_openai_server_ignore_eos.py b/test/registered/openai_server/validation/test_openai_server_ignore_eos.py index 847dd0463353..a765f8950230 100644 --- a/test/registered/openai_server/validation/test_openai_server_ignore_eos.py +++ b/test/registered/openai_server/validation/test_openai_server_ignore_eos.py @@ -11,7 +11,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=44, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=44, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=47, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/openai_server/validation/test_request_length_validation.py b/test/registered/openai_server/validation/test_request_length_validation.py index 0c581bed0459..1f5ae898dd29 100644 --- a/test/registered/openai_server/validation/test_request_length_validation.py +++ b/test/registered/openai_server/validation/test_request_length_validation.py @@ -12,7 +12,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=49, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=49, stage="stage-b", runner_config="1-gpu-large") register_amd_ci(est_time=31, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/perf/test_bench_one_batch_2gpu.py b/test/registered/perf/test_bench_one_batch_2gpu.py index 2daa9b6c4cf7..bb079075d3dd 100644 --- a/test/registered/perf/test_bench_one_batch_2gpu.py +++ b/test/registered/perf/test_bench_one_batch_2gpu.py @@ -11,7 +11,7 @@ write_github_step_summary, ) -register_cuda_ci(est_time=209, suite="stage-b-test-2-gpu-large") +register_cuda_ci(est_time=209, stage="stage-b", runner_config="2-gpu-large") register_amd_ci(est_time=630, suite="stage-b-test-2-gpu-large-amd") diff --git a/test/registered/perf/test_bench_serving_1gpu_large.py b/test/registered/perf/test_bench_serving_1gpu_large.py index cee6d40140c8..134c8e3ea3cd 100644 --- a/test/registered/perf/test_bench_serving_1gpu_large.py +++ b/test/registered/perf/test_bench_serving_1gpu_large.py @@ -17,7 +17,7 @@ write_github_step_summary, ) -register_cuda_ci(est_time=286, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=286, stage="stage-b", runner_config="1-gpu-large") register_amd_ci(est_time=300, suite="stage-b-test-1-gpu-large-amd") diff --git a/test/registered/perf/test_bench_serving_1gpu_part1.py b/test/registered/perf/test_bench_serving_1gpu_part1.py index 56040ffb636a..85238e00ba43 100644 --- a/test/registered/perf/test_bench_serving_1gpu_part1.py +++ b/test/registered/perf/test_bench_serving_1gpu_part1.py @@ -19,7 +19,7 @@ write_github_step_summary, ) -register_cuda_ci(est_time=1210, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=1210, stage="stage-b", runner_config="1-gpu-large") register_amd_ci(est_time=1100, suite="stage-b-test-1-gpu-large-amd") diff --git a/test/registered/perf/test_bench_serving_1gpu_part2.py b/test/registered/perf/test_bench_serving_1gpu_part2.py index 7743877b07e4..dc5cf99fcb57 100644 --- a/test/registered/perf/test_bench_serving_1gpu_part2.py +++ b/test/registered/perf/test_bench_serving_1gpu_part2.py @@ -19,7 +19,7 @@ write_github_step_summary, ) -register_cuda_ci(est_time=968, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=968, stage="stage-b", runner_config="1-gpu-large") register_amd_ci(est_time=900, suite="stage-b-test-1-gpu-large-amd") diff --git a/test/registered/perf/test_bench_serving_2gpu.py b/test/registered/perf/test_bench_serving_2gpu.py index 91783b07b787..66d2bc104c50 100644 --- a/test/registered/perf/test_bench_serving_2gpu.py +++ b/test/registered/perf/test_bench_serving_2gpu.py @@ -14,7 +14,7 @@ write_github_step_summary, ) -register_cuda_ci(est_time=721, suite="stage-b-test-2-gpu-large") +register_cuda_ci(est_time=721, stage="stage-b", runner_config="2-gpu-large") register_amd_ci(est_time=1450, suite="stage-b-test-2-gpu-large-amd") diff --git a/test/registered/perf/test_vlm_perf_5090.py b/test/registered/perf/test_vlm_perf_5090.py index 83f1ce25a23e..3c98f8568dfb 100644 --- a/test/registered/perf/test_vlm_perf_5090.py +++ b/test/registered/perf/test_vlm_perf_5090.py @@ -13,7 +13,7 @@ write_github_step_summary, ) -register_cuda_ci(est_time=406, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=406, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=500, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/piecewise_cuda_graph/test_pcg_with_speculative_decoding.py b/test/registered/piecewise_cuda_graph/test_pcg_with_speculative_decoding.py index db8eea0949ca..0386babb26e0 100644 --- a/test/registered/piecewise_cuda_graph/test_pcg_with_speculative_decoding.py +++ b/test/registered/piecewise_cuda_graph/test_pcg_with_speculative_decoding.py @@ -18,7 +18,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=531, suite="stage-b-test-2-gpu-large") +register_cuda_ci(est_time=531, stage="stage-b", runner_config="2-gpu-large") class TestPCGWithMTP(unittest.TestCase): diff --git a/test/registered/piecewise_cuda_graph/test_piecewise_cuda_graph_support_1_gpu.py b/test/registered/piecewise_cuda_graph/test_piecewise_cuda_graph_support_1_gpu.py index c6b92bb86870..abc44f285671 100644 --- a/test/registered/piecewise_cuda_graph/test_piecewise_cuda_graph_support_1_gpu.py +++ b/test/registered/piecewise_cuda_graph/test_piecewise_cuda_graph_support_1_gpu.py @@ -17,7 +17,7 @@ ) # CI Registration -register_cuda_ci(est_time=180, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=180, stage="stage-b", runner_config="1-gpu-large") class TestPiecewiseCudaGraphQwen25VL(CustomTestCase): diff --git a/test/registered/prefill_only/test_embed_overrides.py b/test/registered/prefill_only/test_embed_overrides.py index 9c4a0c334bc4..f1e67d262a35 100644 --- a/test/registered/prefill_only/test_embed_overrides.py +++ b/test/registered/prefill_only/test_embed_overrides.py @@ -24,7 +24,7 @@ from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci from sglang.test.test_utils import CustomTestCase -register_cuda_ci(est_time=9, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=9, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=9, suite="stage-b-test-1-gpu-small-amd") HIDDEN_DIM = 4 diff --git a/test/registered/prefill_only/test_embedding_models.py b/test/registered/prefill_only/test_embedding_models.py index ad95a84f2a17..f52ca0410f13 100644 --- a/test/registered/prefill_only/test_embedding_models.py +++ b/test/registered/prefill_only/test_embedding_models.py @@ -35,7 +35,7 @@ suite="stage-b-test-1-gpu-small-amd", disabled="see https://github.com/sgl-project/sglang/issues/11127", ) -register_cuda_ci(est_time=136, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=136, stage="stage-b", runner_config="1-gpu-small") MODEL_TO_CONFIG = { "Alibaba-NLP/gte-Qwen2-1.5B-instruct": (1, 1e-5), diff --git a/test/registered/prefill_only/test_multi_item_scoring.py b/test/registered/prefill_only/test_multi_item_scoring.py index fb9205cdd4b8..3a019affd434 100644 --- a/test/registered/prefill_only/test_multi_item_scoring.py +++ b/test/registered/prefill_only/test_multi_item_scoring.py @@ -27,7 +27,7 @@ CustomTestCase, ) -register_cuda_ci(est_time=211, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=211, stage="stage-b", runner_config="1-gpu-small") TEST_MODEL_NAME = os.environ.get("TEST_MODEL_NAME", DEFAULT_SMALL_MODEL_NAME_FOR_TEST) TEST_CLASSIFICATION_BASE_MODEL = os.environ.get( diff --git a/test/registered/prefill_only/test_openai_embedding.py b/test/registered/prefill_only/test_openai_embedding.py index cb088aa17f41..8455358e0010 100644 --- a/test/registered/prefill_only/test_openai_embedding.py +++ b/test/registered/prefill_only/test_openai_embedding.py @@ -13,7 +13,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=91, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=91, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=141, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/prefill_only/test_pooled_hidden_states.py b/test/registered/prefill_only/test_pooled_hidden_states.py index 0d3b7b969582..e112c13b1912 100644 --- a/test/registered/prefill_only/test_pooled_hidden_states.py +++ b/test/registered/prefill_only/test_pooled_hidden_states.py @@ -27,7 +27,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=100, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=100, stage="stage-b", runner_config="1-gpu-small") _SEQCLS_MODEL = "Qwen/Qwen3-0.6B" _CAUSAL_LM_MODEL = DEFAULT_SMALL_MODEL_NAME_FOR_TEST diff --git a/test/registered/prefill_only/test_reward_models.py b/test/registered/prefill_only/test_reward_models.py index 53eda08bb455..8647dba6aac6 100644 --- a/test/registered/prefill_only/test_reward_models.py +++ b/test/registered/prefill_only/test_reward_models.py @@ -24,7 +24,7 @@ # ============================================================================== -register_cuda_ci(est_time=166, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=166, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=132, suite="stage-b-test-1-gpu-small-amd-nondeterministic") MODELS = [ diff --git a/test/registered/prefill_only/test_score_api.py b/test/registered/prefill_only/test_score_api.py index ae06a5835019..7e6059588aee 100644 --- a/test/registered/prefill_only/test_score_api.py +++ b/test/registered/prefill_only/test_score_api.py @@ -27,7 +27,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=71, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=71, stage="stage-b", runner_config="1-gpu-small") _MODEL = os.environ.get("TEST_MODEL_NAME", DEFAULT_SMALL_MODEL_NAME_FOR_TEST) diff --git a/test/registered/prefill_only/test_score_engine.py b/test/registered/prefill_only/test_score_engine.py index 19d580f6d0cf..ff0550f2fe68 100644 --- a/test/registered/prefill_only/test_score_engine.py +++ b/test/registered/prefill_only/test_score_engine.py @@ -26,7 +26,7 @@ from sglang.test.ci.ci_register import register_cuda_ci from sglang.test.test_utils import DEFAULT_SMALL_MODEL_NAME_FOR_TEST, CustomTestCase -register_cuda_ci(est_time=85, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=85, stage="stage-b", runner_config="1-gpu-small") _CAUSAL_LM_MODEL = os.environ.get("TEST_MODEL_NAME", DEFAULT_SMALL_MODEL_NAME_FOR_TEST) _SEQCLS_MODEL = os.environ.get("TEST_CLASSIFICATION_BASE_MODEL", "Qwen/Qwen3-0.6B") diff --git a/test/registered/prefill_only/test_serving_rerank.py b/test/registered/prefill_only/test_serving_rerank.py index f1b10f253468..216278a99ea6 100644 --- a/test/registered/prefill_only/test_serving_rerank.py +++ b/test/registered/prefill_only/test_serving_rerank.py @@ -7,7 +7,7 @@ from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci # Keep consistent with other openai_server/basic unit tests. -register_cuda_ci(est_time=9, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=9, stage="stage-b", runner_config="1-gpu-large") register_amd_ci(est_time=10, suite="stage-b-test-1-gpu-small-amd") try: diff --git a/test/registered/profiling/test_profile_v2.py b/test/registered/profiling/test_profile_v2.py index 33452027d12c..ac9d6b2e0f58 100644 --- a/test/registered/profiling/test_profile_v2.py +++ b/test/registered/profiling/test_profile_v2.py @@ -19,7 +19,8 @@ register_cuda_ci( est_time=120, - suite="stage-b-test-1-gpu-small", + stage="stage-b", + runner_config="1-gpu-small", disabled="Temporarily disabled", ) diff --git a/test/registered/profiling/test_start_profile.py b/test/registered/profiling/test_start_profile.py index 9049f9cba462..34630c92204a 100644 --- a/test/registered/profiling/test_start_profile.py +++ b/test/registered/profiling/test_start_profile.py @@ -29,7 +29,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=42, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=42, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=60, suite="stage-b-test-1-gpu-small-amd") OUTPUT_DIR = "./profiler_dir" diff --git a/test/registered/quant/test_awq.py b/test/registered/quant/test_awq.py index 57450e66590c..fbdb69e35a33 100644 --- a/test/registered/quant/test_awq.py +++ b/test/registered/quant/test_awq.py @@ -13,7 +13,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=160, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=160, stage="stage-b", runner_config="1-gpu-large") register_amd_ci(est_time=200, suite="stage-b-test-1-gpu-large-amd") diff --git a/test/registered/quant/test_block_int8.py b/test/registered/quant/test_block_int8.py index b7716c8dc5ae..2c1d77def549 100644 --- a/test/registered/quant/test_block_int8.py +++ b/test/registered/quant/test_block_int8.py @@ -10,7 +10,7 @@ from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci from sglang.test.test_utils import CustomTestCase -register_cuda_ci(est_time=44, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=44, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=22, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/quant/test_deepseek_v32_fp4_mtp_4gpu.py b/test/registered/quant/test_deepseek_v32_fp4_mtp_4gpu.py index ca259b2aeddd..c087c8bfd9a3 100644 --- a/test/registered/quant/test_deepseek_v32_fp4_mtp_4gpu.py +++ b/test/registered/quant/test_deepseek_v32_fp4_mtp_4gpu.py @@ -17,7 +17,8 @@ register_cuda_ci( est_time=1060, - suite="stage-c-test-4-gpu-b200", + stage="stage-c", + runner_config="4-gpu-b200", ) FULL_DEEPSEEK_V3_FP4_MODEL_PATH = "nvidia/DeepSeek-V3.2-NVFP4" diff --git a/test/registered/quant/test_deepseek_v3_fp4_4gpu.py b/test/registered/quant/test_deepseek_v3_fp4_4gpu.py index 5ce8a8fb3258..1cfd18f09e49 100644 --- a/test/registered/quant/test_deepseek_v3_fp4_4gpu.py +++ b/test/registered/quant/test_deepseek_v3_fp4_4gpu.py @@ -14,7 +14,7 @@ write_github_step_summary, ) -register_cuda_ci(est_time=1190, suite="stage-c-test-4-gpu-b200") +register_cuda_ci(est_time=1190, stage="stage-c", runner_config="4-gpu-b200") FULL_DEEPSEEK_V3_FP4_MODEL_PATH = "nvidia/DeepSeek-V3-0324-FP4" SERVER_LAUNCH_TIMEOUT = 1200 diff --git a/test/registered/quant/test_fp8_blockwise_gemm.py b/test/registered/quant/test_fp8_blockwise_gemm.py index ae1446866d10..ebc158c67d26 100644 --- a/test/registered/quant/test_fp8_blockwise_gemm.py +++ b/test/registered/quant/test_fp8_blockwise_gemm.py @@ -12,7 +12,7 @@ try_cached_model, ) -register_cuda_ci(est_time=630, suite="stage-c-test-4-gpu-b200") +register_cuda_ci(est_time=630, stage="stage-c", runner_config="4-gpu-b200") MODEL_PATH = "Qwen/Qwen3-4B-Instruct-2507-FP8" MXFP8_MODEL_PATH = "zianglih/Qwen3-4B-Instruct-2507-MXFP8" diff --git a/test/registered/quant/test_fp8_gemm_sm120.py b/test/registered/quant/test_fp8_gemm_sm120.py index 9ecbb98ecc7f..3817f298d279 100644 --- a/test/registered/quant/test_fp8_gemm_sm120.py +++ b/test/registered/quant/test_fp8_gemm_sm120.py @@ -12,7 +12,7 @@ try_cached_model, ) -register_cuda_ci(est_time=146, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=146, stage="stage-b", runner_config="1-gpu-small") PERTENSOR_MODEL_PATH = "nvidia/Llama-3.1-8B-Instruct-FP8" BLOCKWISE_MODEL_PATH = "Qwen/Qwen3-4B-Instruct-2507-FP8" diff --git a/test/registered/quant/test_fp8_kernel.py b/test/registered/quant/test_fp8_kernel.py index a85841c8c7f6..a1cf1fa8451f 100644 --- a/test/registered/quant/test_fp8_kernel.py +++ b/test/registered/quant/test_fp8_kernel.py @@ -9,7 +9,7 @@ from sglang.test.ci.ci_register import register_cuda_ci from sglang.test.test_utils import CustomTestCase -register_cuda_ci(est_time=10, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=10, stage="stage-b", runner_config="1-gpu-large") from sglang.srt.utils import get_device, is_cuda, is_xpu diff --git a/test/registered/quant/test_fp8_utils.py b/test/registered/quant/test_fp8_utils.py index 563a9123b432..277051c0423b 100644 --- a/test/registered/quant/test_fp8_utils.py +++ b/test/registered/quant/test_fp8_utils.py @@ -10,7 +10,7 @@ from sglang.test.ci.ci_register import register_cuda_ci from sglang.test.test_utils import CustomTestCase -register_cuda_ci(est_time=9, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=9, stage="stage-b", runner_config="1-gpu-large") class TestInverseTransformScaleUe8m0(CustomTestCase): diff --git a/test/registered/quant/test_fp8kv_triton.py b/test/registered/quant/test_fp8kv_triton.py index c46302444be5..93103e143ce1 100644 --- a/test/registered/quant/test_fp8kv_triton.py +++ b/test/registered/quant/test_fp8kv_triton.py @@ -12,7 +12,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=73, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=73, stage="stage-b", runner_config="1-gpu-large") class TestFP8KVCacheTritonBackend(CustomTestCase): diff --git a/test/registered/quant/test_gguf.py b/test/registered/quant/test_gguf.py index 3ec75dad0fdb..50ed3bdaba2b 100644 --- a/test/registered/quant/test_gguf.py +++ b/test/registered/quant/test_gguf.py @@ -6,7 +6,7 @@ from sglang.test.ci.ci_register import register_cuda_ci from sglang.test.test_utils import CustomTestCase -register_cuda_ci(est_time=76, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=76, stage="stage-b", runner_config="1-gpu-small") class TestGGUF(CustomTestCase): diff --git a/test/registered/quant/test_gptqmodel_dynamic.py b/test/registered/quant/test_gptqmodel_dynamic.py index e0f542aa2130..ca9b123473d6 100644 --- a/test/registered/quant/test_gptqmodel_dynamic.py +++ b/test/registered/quant/test_gptqmodel_dynamic.py @@ -14,7 +14,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=100, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=100, stage="stage-b", runner_config="1-gpu-large") def check_quant_method(model_path: str, use_marlin_kernel: bool): diff --git a/test/registered/quant/test_int8_kernel.py b/test/registered/quant/test_int8_kernel.py index 8b48a913a62f..b3145f8f5d2b 100644 --- a/test/registered/quant/test_int8_kernel.py +++ b/test/registered/quant/test_int8_kernel.py @@ -11,7 +11,7 @@ from sglang.test.ci.ci_register import register_cuda_ci from sglang.test.test_utils import CustomTestCase -register_cuda_ci(est_time=15, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=15, stage="stage-b", runner_config="1-gpu-small") def native_w8a8_per_token_matmul(A, B, As, Bs, output_dtype=torch.float16): diff --git a/test/registered/quant/test_marlin_moe.py b/test/registered/quant/test_marlin_moe.py index d88a3627efa1..f7aaeac0b295 100644 --- a/test/registered/quant/test_marlin_moe.py +++ b/test/registered/quant/test_marlin_moe.py @@ -12,7 +12,7 @@ from sglang.test.test_marlin_utils import awq_marlin_quantize, marlin_quantize from sglang.test.test_utils import CustomTestCase -register_cuda_ci(est_time=108, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=108, stage="stage-b", runner_config="1-gpu-small") set_global_server_args_for_scheduler(object.__new__(ServerArgs)) diff --git a/test/registered/quant/test_modelopt_fp8.py b/test/registered/quant/test_modelopt_fp8.py index 9f1c7bdfc4d8..79e1bd9b2d89 100644 --- a/test/registered/quant/test_modelopt_fp8.py +++ b/test/registered/quant/test_modelopt_fp8.py @@ -12,7 +12,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=62, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=62, stage="stage-b", runner_config="1-gpu-large") class TestModeloptFP8(CustomTestCase): diff --git a/test/registered/quant/test_nvfp4_gemm.py b/test/registered/quant/test_nvfp4_gemm.py index 9054f2fea30e..433ccc6c995f 100644 --- a/test/registered/quant/test_nvfp4_gemm.py +++ b/test/registered/quant/test_nvfp4_gemm.py @@ -12,7 +12,7 @@ try_cached_model, ) -register_cuda_ci(est_time=420, suite="stage-c-test-4-gpu-b200") +register_cuda_ci(est_time=420, stage="stage-c", runner_config="4-gpu-b200") MODEL_PATH = "nvidia/Llama-3.1-8B-Instruct-NVFP4" diff --git a/test/registered/quant/test_nvfp4_gemm_sm120.py b/test/registered/quant/test_nvfp4_gemm_sm120.py index 7b1f4390f358..4dc6fa34831c 100644 --- a/test/registered/quant/test_nvfp4_gemm_sm120.py +++ b/test/registered/quant/test_nvfp4_gemm_sm120.py @@ -12,7 +12,7 @@ try_cached_model, ) -register_cuda_ci(est_time=109, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=109, stage="stage-b", runner_config="1-gpu-small") MODEL_PATH = "nvidia/Llama-3.1-8B-Instruct-NVFP4" diff --git a/test/registered/quant/test_triton_scaled_mm.py b/test/registered/quant/test_triton_scaled_mm.py index d56a0f13136a..da353db169be 100644 --- a/test/registered/quant/test_triton_scaled_mm.py +++ b/test/registered/quant/test_triton_scaled_mm.py @@ -9,7 +9,7 @@ from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci from sglang.test.test_utils import CustomTestCase -register_cuda_ci(est_time=11, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=11, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=12, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/quant/test_w8a8_quantization.py b/test/registered/quant/test_w8a8_quantization.py index 7841d098d772..c4ff012154ab 100644 --- a/test/registered/quant/test_w8a8_quantization.py +++ b/test/registered/quant/test_w8a8_quantization.py @@ -14,7 +14,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=232, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=232, stage="stage-b", runner_config="1-gpu-large") class BaseW8A8Test(CustomTestCase): diff --git a/test/registered/radix_cache/test_radix_attention.py b/test/registered/radix_cache/test_radix_attention.py index 871dac26e2a5..b88dccfc4ea7 100644 --- a/test/registered/radix_cache/test_radix_attention.py +++ b/test/registered/radix_cache/test_radix_attention.py @@ -14,7 +14,7 @@ ) # RadixAttention server integration tests -register_cuda_ci(est_time=100, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=100, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=100, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/radix_cache/test_radix_cache_hit.py b/test/registered/radix_cache/test_radix_cache_hit.py index bcd2b2b1caa4..de465f2dc55f 100644 --- a/test/registered/radix_cache/test_radix_cache_hit.py +++ b/test/registered/radix_cache/test_radix_cache_hit.py @@ -11,7 +11,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=55, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=55, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=55, suite="stage-b-test-1-gpu-small-amd") MODEL = DEFAULT_SMALL_MODEL_NAME_FOR_TEST diff --git a/test/registered/radix_cache/test_swa_radix_cache_kl.py b/test/registered/radix_cache/test_swa_radix_cache_kl.py index 02a336384dd3..d2d66fdf54d2 100644 --- a/test/registered/radix_cache/test_swa_radix_cache_kl.py +++ b/test/registered/radix_cache/test_swa_radix_cache_kl.py @@ -6,7 +6,7 @@ MODEL = "openai/gpt-oss-20b" -register_cuda_ci(est_time=151, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=151, stage="stage-b", runner_config="1-gpu-large") class TestSWARadixCacheKL(KLDivergenceMixin, DefaultServerBase): diff --git a/test/registered/radix_cache/test_unified_radix_cache_kl.py b/test/registered/radix_cache/test_unified_radix_cache_kl.py index 1817263f85de..d4b055748d54 100644 --- a/test/registered/radix_cache/test_unified_radix_cache_kl.py +++ b/test/registered/radix_cache/test_unified_radix_cache_kl.py @@ -35,7 +35,7 @@ def _random_suffixes(n, length, seed): SWA_MODEL = "openai/gpt-oss-20b" FULL_MODEL = "Qwen/Qwen3-32B" -register_cuda_ci(est_time=760, suite="stage-c-test-4-gpu-h100") +register_cuda_ci(est_time=760, stage="stage-c", runner_config="4-gpu-h100") class UnifiedRadixTreeTestMixin: diff --git a/test/registered/reasoning/test_reasoning.py b/test/registered/reasoning/test_reasoning.py index b271bea520ea..2bd16c387c02 100644 --- a/test/registered/reasoning/test_reasoning.py +++ b/test/registered/reasoning/test_reasoning.py @@ -17,7 +17,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=129, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=129, stage="stage-b", runner_config="1-gpu-large") register_amd_ci(est_time=200, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/rl/test_fp32_lm_head.py b/test/registered/rl/test_fp32_lm_head.py index 63b0481741ba..b6aeb4ca9b14 100644 --- a/test/registered/rl/test_fp32_lm_head.py +++ b/test/registered/rl/test_fp32_lm_head.py @@ -15,7 +15,7 @@ from sglang.srt.utils import get_device from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci -register_cuda_ci(est_time=9, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=9, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=15, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/rl/test_lora_load_from_tensor.py b/test/registered/rl/test_lora_load_from_tensor.py index 6dd6a89bd490..a0e7f9655aa2 100644 --- a/test/registered/rl/test_lora_load_from_tensor.py +++ b/test/registered/rl/test_lora_load_from_tensor.py @@ -10,7 +10,7 @@ from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci from sglang.test.test_utils import CustomTestCase -register_cuda_ci(est_time=102, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=102, stage="stage-b", runner_config="1-gpu-large") register_amd_ci(est_time=90, suite="stage-b-test-1-gpu-small-amd") MODEL_PATH = "Qwen/Qwen3-0.6B" diff --git a/test/registered/rl/test_multi_instance_release_memory_occupation.py b/test/registered/rl/test_multi_instance_release_memory_occupation.py index 5484b55b8914..754e3532d673 100644 --- a/test/registered/rl/test_multi_instance_release_memory_occupation.py +++ b/test/registered/rl/test_multi_instance_release_memory_occupation.py @@ -20,7 +20,7 @@ find_available_port, ) -register_cuda_ci(est_time=57, suite="stage-c-test-4-gpu-h100") +register_cuda_ci(est_time=57, stage="stage-c", runner_config="4-gpu-h100") register_amd_ci( est_time=64, suite="stage-c-test-4-gpu-amd", diff --git a/test/registered/rl/test_patch_torch.py b/test/registered/rl/test_patch_torch.py index 06768fc7c3da..6844a856e6ee 100644 --- a/test/registered/rl/test_patch_torch.py +++ b/test/registered/rl/test_patch_torch.py @@ -9,7 +9,7 @@ from sglang.srt.utils.patch_torch import monkey_patch_torch_reductions from sglang.test.ci.ci_register import register_cuda_ci -register_cuda_ci(est_time=15, suite="stage-b-test-2-gpu-large") +register_cuda_ci(est_time=15, stage="stage-b", runner_config="2-gpu-large") class TestReleaseMemoryOccupation(unittest.TestCase): diff --git a/test/registered/rl/test_release_memory_occupation.py b/test/registered/rl/test_release_memory_occupation.py index 7a4f8725da1c..4e2f024a879a 100644 --- a/test/registered/rl/test_release_memory_occupation.py +++ b/test/registered/rl/test_release_memory_occupation.py @@ -55,7 +55,8 @@ register_cuda_ci( est_time=200, - suite="stage-c-test-4-gpu-h100", + stage="stage-c", + runner_config="4-gpu-h100", disabled="Temporarily disabled - needs investigation", ) diff --git a/test/registered/rl/test_return_routed_experts.py b/test/registered/rl/test_return_routed_experts.py index 33643207a052..14fd3ce22885 100644 --- a/test/registered/rl/test_return_routed_experts.py +++ b/test/registered/rl/test_return_routed_experts.py @@ -24,7 +24,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=400, suite="stage-c-test-4-gpu-h100") +register_cuda_ci(est_time=400, stage="stage-c", runner_config="4-gpu-h100") # FP8 variant of Qwen3-30B-A3B: required because DeepEP normal/LL fast paths in # ep_moe/layer.py only run for {Fp8Config (via deep_gemm), W4AFp8Config, aiter, diff --git a/test/registered/rl/test_update_weights_from_disk.py b/test/registered/rl/test_update_weights_from_disk.py index 076bbe1bb7dc..415aee265e7e 100644 --- a/test/registered/rl/test_update_weights_from_disk.py +++ b/test/registered/rl/test_update_weights_from_disk.py @@ -21,7 +21,9 @@ register_amd_ci( est_time=210, suite="stage-b-test-1-gpu-small-amd", disabled="see #14021" ) -register_cuda_ci(est_time=210, suite="stage-b-test-1-gpu-large", disabled="see #14021") +register_cuda_ci( + est_time=210, stage="stage-b", runner_config="1-gpu-large", disabled="see #14021" +) ############################################################################### diff --git a/test/registered/rl/test_update_weights_from_disk_blackwell.py b/test/registered/rl/test_update_weights_from_disk_blackwell.py index 58b896695cef..ca5af2e2ea84 100644 --- a/test/registered/rl/test_update_weights_from_disk_blackwell.py +++ b/test/registered/rl/test_update_weights_from_disk_blackwell.py @@ -1,6 +1,6 @@ from sglang.test.ci.ci_register import register_cuda_ci -register_cuda_ci(est_time=400, suite="stage-c-test-4-gpu-b200") +register_cuda_ci(est_time=400, stage="stage-c", runner_config="4-gpu-b200") import unittest diff --git a/test/registered/rl/test_update_weights_from_distributed.py b/test/registered/rl/test_update_weights_from_distributed.py index 321826ccae9c..751348c76623 100644 --- a/test/registered/rl/test_update_weights_from_distributed.py +++ b/test/registered/rl/test_update_weights_from_distributed.py @@ -43,7 +43,7 @@ ) from sglang.utils import terminate_process -register_cuda_ci(est_time=137, suite="stage-b-test-2-gpu-large") +register_cuda_ci(est_time=137, stage="stage-b", runner_config="2-gpu-large") register_amd_ci(est_time=400, suite="stage-b-test-2-gpu-large-amd") mp.set_start_method("spawn", force=True) diff --git a/test/registered/rl/test_update_weights_from_tensor.py b/test/registered/rl/test_update_weights_from_tensor.py index 298030975bfa..c5115d4e8e6e 100644 --- a/test/registered/rl/test_update_weights_from_tensor.py +++ b/test/registered/rl/test_update_weights_from_tensor.py @@ -1,6 +1,6 @@ from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci -register_cuda_ci(est_time=147, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=147, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=195, suite="stage-b-test-1-gpu-small-amd") import gc diff --git a/test/registered/sampling/test_original_logprobs.py b/test/registered/sampling/test_original_logprobs.py index c5a21f0e379c..cefc40256b9d 100644 --- a/test/registered/sampling/test_original_logprobs.py +++ b/test/registered/sampling/test_original_logprobs.py @@ -25,7 +25,7 @@ from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci from sglang.test.test_utils import DEFAULT_SMALL_MODEL_NAME_FOR_TEST -register_cuda_ci(est_time=45, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=45, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=60, suite="stage-b-test-1-gpu-small-amd") # ------------------------- Configurable via env ------------------------- # diff --git a/test/registered/sampling/test_penalty.py b/test/registered/sampling/test_penalty.py index d6b7425a1006..39e42a5529a4 100644 --- a/test/registered/sampling/test_penalty.py +++ b/test/registered/sampling/test_penalty.py @@ -16,7 +16,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=53, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=53, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=82, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/sampling/test_pytorch_sampling_backend.py b/test/registered/sampling/test_pytorch_sampling_backend.py index ad13ebda8644..73ff86d05d16 100644 --- a/test/registered/sampling/test_pytorch_sampling_backend.py +++ b/test/registered/sampling/test_pytorch_sampling_backend.py @@ -15,7 +15,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=80, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=80, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=66, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/scheduler/test_mixed_chunked_prefill.py b/test/registered/scheduler/test_mixed_chunked_prefill.py index cced856d352e..fcfd73ce9616 100644 --- a/test/registered/scheduler/test_mixed_chunked_prefill.py +++ b/test/registered/scheduler/test_mixed_chunked_prefill.py @@ -12,7 +12,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=167, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=167, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=180, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/scheduler/test_prefill_delayer.py b/test/registered/scheduler/test_prefill_delayer.py index 0bd8b79975e9..24ea98015898 100644 --- a/test/registered/scheduler/test_prefill_delayer.py +++ b/test/registered/scheduler/test_prefill_delayer.py @@ -28,7 +28,8 @@ register_cuda_ci( est_time=300, - suite="stage-c-test-8-gpu-h200", + stage="stage-c", + runner_config="8-gpu-h200", disabled="Temporarily disabled", ) diff --git a/test/registered/scheduler/test_priority_scheduling.py b/test/registered/scheduler/test_priority_scheduling.py index 71e9cccb64b1..1d4eba2783fd 100644 --- a/test/registered/scheduler/test_priority_scheduling.py +++ b/test/registered/scheduler/test_priority_scheduling.py @@ -17,7 +17,7 @@ send_concurrent_generate_requests_with_custom_params, ) -register_cuda_ci(est_time=149, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=149, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=195, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/scheduler/test_retract_decode.py b/test/registered/scheduler/test_retract_decode.py index 81858ac6a071..4db2c790b147 100644 --- a/test/registered/scheduler/test_retract_decode.py +++ b/test/registered/scheduler/test_retract_decode.py @@ -17,7 +17,7 @@ ) from sglang.utils import is_in_ci -register_cuda_ci(est_time=353, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=353, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=600, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/scheduler/test_scheduler_control.py b/test/registered/scheduler/test_scheduler_control.py index aa8351ddbebc..0ef3bf66e6ce 100644 --- a/test/registered/scheduler/test_scheduler_control.py +++ b/test/registered/scheduler/test_scheduler_control.py @@ -20,7 +20,7 @@ run_and_check_memory_leak, ) -register_cuda_ci(est_time=367, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=367, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=300, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/sessions/test_session_control.py b/test/registered/sessions/test_session_control.py index ed5855d89b77..1e86974d0dd9 100644 --- a/test/registered/sessions/test_session_control.py +++ b/test/registered/sessions/test_session_control.py @@ -24,7 +24,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=87, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=87, stage="stage-b", runner_config="1-gpu-large") def remove_prefix(text: str, prefix: str) -> str: diff --git a/test/registered/sessions/test_session_latency.py b/test/registered/sessions/test_session_latency.py index d7c6fe327c1b..c87529f13171 100644 --- a/test/registered/sessions/test_session_latency.py +++ b/test/registered/sessions/test_session_latency.py @@ -33,7 +33,8 @@ register_cuda_ci( est_time=122, - suite="stage-b-test-1-gpu-large", + stage="stage-b", + runner_config="1-gpu-large", ) NUM_TURNS = 150 diff --git a/test/registered/sessions/test_streaming_session.py b/test/registered/sessions/test_streaming_session.py index 3c6901c006a9..9d54a4c4b599 100644 --- a/test/registered/sessions/test_streaming_session.py +++ b/test/registered/sessions/test_streaming_session.py @@ -21,7 +21,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=691, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=691, stage="stage-b", runner_config="1-gpu-large") LOGPROB_PROMPTS = [ "The quick brown fox jumps over the lazy dog.", diff --git a/test/registered/sessions/test_streaming_session_swa.py b/test/registered/sessions/test_streaming_session_swa.py index 787224cf535c..89c75e955c14 100644 --- a/test/registered/sessions/test_streaming_session_swa.py +++ b/test/registered/sessions/test_streaming_session_swa.py @@ -24,7 +24,7 @@ TestStreamingSessionAbortLeakRepro, ) -register_cuda_ci(est_time=519, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=519, stage="stage-b", runner_config="1-gpu-large") SWA_MODEL = "openai/gpt-oss-20b" diff --git a/test/registered/spec/dflash/test_dflash.py b/test/registered/spec/dflash/test_dflash.py index b64605e18c17..ef6ad8f84edf 100644 --- a/test/registered/spec/dflash/test_dflash.py +++ b/test/registered/spec/dflash/test_dflash.py @@ -18,7 +18,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=302, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=302, stage="stage-b", runner_config="1-gpu-small") class TestDFlashServerBase(CustomTestCase, MatchedStopMixin, GSM8KMixin): diff --git a/test/registered/spec/eagle/test_adaptive_speculative.py b/test/registered/spec/eagle/test_adaptive_speculative.py index 6863eacb4934..5a1972201177 100644 --- a/test/registered/spec/eagle/test_adaptive_speculative.py +++ b/test/registered/spec/eagle/test_adaptive_speculative.py @@ -18,7 +18,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=76, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=76, stage="stage-b", runner_config="1-gpu-large") HIGH_ACCEPT_PROMPT = ( "Output exactly 128 new lines. " diff --git a/test/registered/spec/eagle/test_deepseek_v3_fp4_mtp_small.py b/test/registered/spec/eagle/test_deepseek_v3_fp4_mtp_small.py index ae6ae80f94c5..e300218d0e40 100644 --- a/test/registered/spec/eagle/test_deepseek_v3_fp4_mtp_small.py +++ b/test/registered/spec/eagle/test_deepseek_v3_fp4_mtp_small.py @@ -16,7 +16,7 @@ write_github_step_summary, ) -register_cuda_ci(est_time=420, suite="stage-b-test-4-gpu-b200") +register_cuda_ci(est_time=420, stage="stage-b", runner_config="4-gpu-b200") FULL_DEEPSEEK_V3_FP4_MODEL_PATH = "nvidia/DeepSeek-V3-0324-FP4" SERVER_LAUNCH_TIMEOUT = 1200 diff --git a/test/registered/spec/eagle/test_eagle_constrained_decoding.py b/test/registered/spec/eagle/test_eagle_constrained_decoding.py index 2862d60d3622..4087332c6723 100644 --- a/test/registered/spec/eagle/test_eagle_constrained_decoding.py +++ b/test/registered/spec/eagle/test_eagle_constrained_decoding.py @@ -14,7 +14,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=116, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=116, stage="stage-b", runner_config="1-gpu-large") class TestEagleConstrainedDecoding( diff --git a/test/registered/spec/eagle/test_eagle_dp_attention.py b/test/registered/spec/eagle/test_eagle_dp_attention.py index 33a2b5d91e34..e45457b37f88 100644 --- a/test/registered/spec/eagle/test_eagle_dp_attention.py +++ b/test/registered/spec/eagle/test_eagle_dp_attention.py @@ -21,7 +21,7 @@ ) # EAGLE3 with DP attention (tp=2, dp=2, requires 4 GPUs) -register_cuda_ci(est_time=99, suite="stage-c-test-4-gpu-h100") +register_cuda_ci(est_time=99, stage="stage-c", runner_config="4-gpu-h100") register_amd_ci(est_time=200, suite="stage-c-test-4-gpu-amd") diff --git a/test/registered/spec/eagle/test_eagle_infer_a.py b/test/registered/spec/eagle/test_eagle_infer_a.py index 077e4f46bfbd..d845990d5013 100644 --- a/test/registered/spec/eagle/test_eagle_infer_a.py +++ b/test/registered/spec/eagle/test_eagle_infer_a.py @@ -13,7 +13,7 @@ CustomTestCase, ) -register_cuda_ci(est_time=357, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=357, stage="stage-b", runner_config="1-gpu-large") class TestEAGLEEngine(CustomTestCase): diff --git a/test/registered/spec/eagle/test_eagle_infer_b.py b/test/registered/spec/eagle/test_eagle_infer_b.py index 3d4449271e9b..aa6ce73564c6 100644 --- a/test/registered/spec/eagle/test_eagle_infer_b.py +++ b/test/registered/spec/eagle/test_eagle_infer_b.py @@ -22,7 +22,7 @@ from sglang.test.server_fixtures.eagle_fixture import EagleServerBase from sglang.test.test_utils import DEFAULT_TARGET_MODEL_EAGLE, run_logprob_check -register_cuda_ci(est_time=847, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=847, stage="stage-b", runner_config="1-gpu-large") class TestEAGLEServerBasic(EagleServerBase): diff --git a/test/registered/spec/eagle/test_eagle_infer_beta.py b/test/registered/spec/eagle/test_eagle_infer_beta.py index 6fff766f4b1b..75635ee63777 100644 --- a/test/registered/spec/eagle/test_eagle_infer_beta.py +++ b/test/registered/spec/eagle/test_eagle_infer_beta.py @@ -19,7 +19,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=369, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=369, stage="stage-b", runner_config="1-gpu-small") class TestEagle3ServerBase(CustomTestCase, MatchedStopMixin): diff --git a/test/registered/spec/eagle/test_eagle_infer_beta_dp_attention.py b/test/registered/spec/eagle/test_eagle_infer_beta_dp_attention.py index 68c86f8c9199..85d22235f9b7 100644 --- a/test/registered/spec/eagle/test_eagle_infer_beta_dp_attention.py +++ b/test/registered/spec/eagle/test_eagle_infer_beta_dp_attention.py @@ -17,7 +17,7 @@ ) # EAGLE with DP attention on B200 (tp=2, dp=2, requires 4 B200 GPUs) -register_cuda_ci(est_time=123, suite="stage-c-test-4-gpu-b200") +register_cuda_ci(est_time=123, stage="stage-c", runner_config="4-gpu-b200") def test_gsm8k(base_url: str, model: str): diff --git a/test/registered/spec/test_constrained_decoding_spec_reasoning.py b/test/registered/spec/test_constrained_decoding_spec_reasoning.py index 1cd4dc60f9dd..4f1322ea42b6 100644 --- a/test/registered/spec/test_constrained_decoding_spec_reasoning.py +++ b/test/registered/spec/test_constrained_decoding_spec_reasoning.py @@ -14,7 +14,7 @@ ) # Constrained decoding with EAGLE3 speculative reasoning (tp=2) -register_cuda_ci(est_time=137, suite="stage-b-test-2-gpu-large") +register_cuda_ci(est_time=137, stage="stage-b", runner_config="2-gpu-large") class ServerWithGrammar(CustomTestCase): diff --git a/test/registered/spec/test_ngram_speculative_decoding.py b/test/registered/spec/test_ngram_speculative_decoding.py index ad3904b7dc79..b3751fd29d01 100644 --- a/test/registered/spec/test_ngram_speculative_decoding.py +++ b/test/registered/spec/test_ngram_speculative_decoding.py @@ -14,7 +14,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=254, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=254, stage="stage-b", runner_config="1-gpu-large") GSM_DATASET_PATH = None diff --git a/test/registered/spec/test_standalone_speculative_decoding.py b/test/registered/spec/test_standalone_speculative_decoding.py index dba3a1d7afe3..8361ce8a7189 100644 --- a/test/registered/spec/test_standalone_speculative_decoding.py +++ b/test/registered/spec/test_standalone_speculative_decoding.py @@ -18,7 +18,7 @@ ) # Standalone speculative decoding tests (FA3, Triton, FlashInfer backends) -register_cuda_ci(est_time=406, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=406, stage="stage-b", runner_config="1-gpu-large") GSM_DATASET_PATH = None diff --git a/test/registered/spec/utils/test_build_eagle_tree.py b/test/registered/spec/utils/test_build_eagle_tree.py index 2e26a46e80c6..809f2d1d2ce1 100644 --- a/test/registered/spec/utils/test_build_eagle_tree.py +++ b/test/registered/spec/utils/test_build_eagle_tree.py @@ -9,7 +9,7 @@ from sglang.srt.utils import get_device from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci -register_cuda_ci(est_time=6, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=6, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=3, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/tokenizer/test_multi_tokenizer.py b/test/registered/tokenizer/test_multi_tokenizer.py index 134bcf8452d4..df2bb3aa0de3 100644 --- a/test/registered/tokenizer/test_multi_tokenizer.py +++ b/test/registered/tokenizer/test_multi_tokenizer.py @@ -17,7 +17,7 @@ write_github_step_summary, ) -register_cuda_ci(est_time=211, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=211, stage="stage-b", runner_config="1-gpu-large") register_amd_ci(est_time=345, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/tokenizer/test_skip_tokenizer_init.py b/test/registered/tokenizer/test_skip_tokenizer_init.py index 89a5ff977391..a4275801ac94 100644 --- a/test/registered/tokenizer/test_skip_tokenizer_init.py +++ b/test/registered/tokenizer/test_skip_tokenizer_init.py @@ -23,7 +23,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=79, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=79, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=117, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/unit/README.md b/test/registered/unit/README.md index 914f28639762..a8e5a817469a 100644 --- a/test/registered/unit/README.md +++ b/test/registered/unit/README.md @@ -15,8 +15,11 @@ Tests can use CPU or GPU — the key criterion is **no server process**. ```python from sglang.test.ci.ci_register import register_cpu_ci register_cpu_ci(est_time=5, suite="stage-a-test-cpu") - # or: register_cuda_ci(est_time=10, suite="stage-b-test-1-gpu-small") + # or: register_cuda_ci(est_time=10, stage="stage-b", runner_config="1-gpu-small") ``` + CUDA suites whose names follow `{stage}-test-{runner_config}` use the + `stage=` + `runner_config=` form. Legacy `suite="..."` is kept for + nightly/stress/weekly + AMD/CPU/NPU suites that don't fit that shape. 4. Run locally: ```bash pytest test/registered/unit/ -v # all unit tests diff --git a/test/registered/unit/auto_benchmark/test_dataset_tools.py b/test/registered/unit/auto_benchmark/test_dataset_tools.py index 08a52ce137ba..37e49224bead 100644 --- a/test/registered/unit/auto_benchmark/test_dataset_tools.py +++ b/test/registered/unit/auto_benchmark/test_dataset_tools.py @@ -15,7 +15,7 @@ from sglang.benchmark.datasets.autobench import sample_autobench_requests from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci -register_cuda_ci(est_time=6, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=6, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=6, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/unit/auto_benchmark/test_run_candidate.py b/test/registered/unit/auto_benchmark/test_run_candidate.py index 99e7a81fa849..427f83220064 100644 --- a/test/registered/unit/auto_benchmark/test_run_candidate.py +++ b/test/registered/unit/auto_benchmark/test_run_candidate.py @@ -14,7 +14,7 @@ from sglang.auto_benchmark_lib import SearchDeadlineExceeded, run_candidate from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci -register_cuda_ci(est_time=6, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=6, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=6, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/unit/auto_benchmark/test_search_tools.py b/test/registered/unit/auto_benchmark/test_search_tools.py index 3e8c0ae8812e..b142492b4b65 100644 --- a/test/registered/unit/auto_benchmark/test_search_tools.py +++ b/test/registered/unit/auto_benchmark/test_search_tools.py @@ -28,7 +28,7 @@ ) from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci -register_cuda_ci(est_time=6, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=6, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=6, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/unit/constrained/test_e2e_constrained_reasoning.py b/test/registered/unit/constrained/test_e2e_constrained_reasoning.py index 48e69a4aca91..767491c972a0 100644 --- a/test/registered/unit/constrained/test_e2e_constrained_reasoning.py +++ b/test/registered/unit/constrained/test_e2e_constrained_reasoning.py @@ -22,7 +22,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=120, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=120, stage="stage-b", runner_config="1-gpu-small") MODEL = "Qwen/Qwen3-0.6B" BASE_URL = "http://127.0.0.1:39877" diff --git a/test/registered/unit/layers/quantization/test_mxfp4_sm90_cutlass.py b/test/registered/unit/layers/quantization/test_mxfp4_sm90_cutlass.py index 830b55d7b498..85ca0fd27f94 100644 --- a/test/registered/unit/layers/quantization/test_mxfp4_sm90_cutlass.py +++ b/test/registered/unit/layers/quantization/test_mxfp4_sm90_cutlass.py @@ -20,7 +20,7 @@ from sglang.test.ci.ci_register import register_cuda_ci -register_cuda_ci(est_time=120, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=120, stage="stage-b", runner_config="1-gpu-large") flashinfer_fused_moe = pytest.importorskip("flashinfer.fused_moe") diff --git a/test/registered/unit/layers/test_conv_layer.py b/test/registered/unit/layers/test_conv_layer.py index c892b4fcd057..243a304b5f35 100644 --- a/test/registered/unit/layers/test_conv_layer.py +++ b/test/registered/unit/layers/test_conv_layer.py @@ -1,6 +1,6 @@ from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci -register_cuda_ci(est_time=7, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=7, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=7, suite="stage-b-test-1-gpu-small-amd") import unittest diff --git a/test/registered/unit/layers/test_mamba_state_scatter_triton.py b/test/registered/unit/layers/test_mamba_state_scatter_triton.py index 9382516418d8..863a2ccc06b5 100644 --- a/test/registered/unit/layers/test_mamba_state_scatter_triton.py +++ b/test/registered/unit/layers/test_mamba_state_scatter_triton.py @@ -1,6 +1,6 @@ from sglang.test.ci.ci_register import register_cuda_ci -register_cuda_ci(est_time=7, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=7, stage="stage-b", runner_config="1-gpu-small") import os import unittest diff --git a/test/registered/unit/lora/test_mem_pool_ep_unit.py b/test/registered/unit/lora/test_mem_pool_ep_unit.py index a0a3de607f0c..024770dffd73 100644 --- a/test/registered/unit/lora/test_mem_pool_ep_unit.py +++ b/test/registered/unit/lora/test_mem_pool_ep_unit.py @@ -16,7 +16,7 @@ from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci # CPU-only unit test; no CUDA/distributed dependencies. -register_cuda_ci(est_time=9, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=9, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=9, suite="stage-b-test-1-gpu-small-amd") import types diff --git a/test/registered/unit/managers/test_hisparse_unit.py b/test/registered/unit/managers/test_hisparse_unit.py index b0eba08a2788..14ef091ad503 100644 --- a/test/registered/unit/managers/test_hisparse_unit.py +++ b/test/registered/unit/managers/test_hisparse_unit.py @@ -16,7 +16,7 @@ from sglang.srt.utils import is_cuda, is_hip, is_npu, is_xpu from sglang.test.ci.ci_register import register_cuda_ci -register_cuda_ci(est_time=10, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=10, stage="stage-b", runner_config="1-gpu-small") # --------------------------------------------------------------------------- # Test configuration (small-scale for fast CI runs) diff --git a/test/registered/unit/managers/test_io_struct.py b/test/registered/unit/managers/test_io_struct.py index 47cdf5740bb9..6e2714570e27 100644 --- a/test/registered/unit/managers/test_io_struct.py +++ b/test/registered/unit/managers/test_io_struct.py @@ -9,7 +9,7 @@ CustomTestCase, ) -register_cuda_ci(est_time=8, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=8, stage="stage-b", runner_config="1-gpu-large") register_amd_ci(est_time=8, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/unit/managers/test_prefill_adder.py b/test/registered/unit/managers/test_prefill_adder.py index 22cc1f598a78..14d4eab70061 100644 --- a/test/registered/unit/managers/test_prefill_adder.py +++ b/test/registered/unit/managers/test_prefill_adder.py @@ -12,7 +12,7 @@ from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci from sglang.test.test_utils import CustomTestCase -register_cuda_ci(est_time=9, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=9, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=2, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/unit/managers/test_priority_scheduling_disaggregation.py b/test/registered/unit/managers/test_priority_scheduling_disaggregation.py index 8c48b616b3b0..7ce3e8de07ed 100644 --- a/test/registered/unit/managers/test_priority_scheduling_disaggregation.py +++ b/test/registered/unit/managers/test_priority_scheduling_disaggregation.py @@ -14,7 +14,7 @@ from sglang.srt.managers.scheduler import Scheduler # noqa: E402 from sglang.test.ci.ci_register import register_cuda_ci -register_cuda_ci(est_time=5, suite="stage-a-test-1-gpu-small") +register_cuda_ci(est_time=5, stage="stage-a", runner_config="1-gpu-small") class TestDisaggregationPriorityQueueing(unittest.TestCase): diff --git a/test/registered/unit/managers/test_profile_merger_http_api.py b/test/registered/unit/managers/test_profile_merger_http_api.py index 53e49feb4a0e..ccb12dae7103 100644 --- a/test/registered/unit/managers/test_profile_merger_http_api.py +++ b/test/registered/unit/managers/test_profile_merger_http_api.py @@ -4,7 +4,7 @@ from sglang.srt.managers.io_struct import ProfileReqInput from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci -register_cuda_ci(est_time=8, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=8, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=9, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/unit/mem_cache/test_decode_radix_lock_ref.py b/test/registered/unit/mem_cache/test_decode_radix_lock_ref.py index eba5e34ef57f..f392e9fcd0ef 100644 --- a/test/registered/unit/mem_cache/test_decode_radix_lock_ref.py +++ b/test/registered/unit/mem_cache/test_decode_radix_lock_ref.py @@ -22,7 +22,7 @@ from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci -register_cuda_ci(est_time=10, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=10, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=10, suite="stage-b-test-1-gpu-small-amd") import unittest diff --git a/test/registered/unit/mem_cache/test_mamba_unittest.py b/test/registered/unit/mem_cache/test_mamba_unittest.py index 8ea1db75512b..4cc253e56a62 100755 --- a/test/registered/unit/mem_cache/test_mamba_unittest.py +++ b/test/registered/unit/mem_cache/test_mamba_unittest.py @@ -23,7 +23,7 @@ from sglang.srt.utils import get_device from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci -register_cuda_ci(est_time=10, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=10, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=9, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/unit/mem_cache/test_nsa_pool_host_unit.py b/test/registered/unit/mem_cache/test_nsa_pool_host_unit.py index 6cac74027601..c4fafdb564f0 100644 --- a/test/registered/unit/mem_cache/test_nsa_pool_host_unit.py +++ b/test/registered/unit/mem_cache/test_nsa_pool_host_unit.py @@ -12,7 +12,7 @@ from sglang.srt.utils import is_cuda, is_hip, is_npu, is_xpu from sglang.test.ci.ci_register import register_cuda_ci -register_cuda_ci(est_time=9, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=9, stage="stage-b", runner_config="1-gpu-small") class TestNSAHiCacheTransfer(unittest.TestCase): diff --git a/test/registered/unit/mem_cache/test_radix_cache_slru_accuracy.py b/test/registered/unit/mem_cache/test_radix_cache_slru_accuracy.py index 95531c98426e..a167b4f02f1b 100644 --- a/test/registered/unit/mem_cache/test_radix_cache_slru_accuracy.py +++ b/test/registered/unit/mem_cache/test_radix_cache_slru_accuracy.py @@ -13,7 +13,7 @@ from sglang.srt.mem_cache.radix_cache import RadixCache, RadixKey from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci -register_cuda_ci(est_time=8, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=8, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=8, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/unit/mem_cache/test_radix_cache_unit.py b/test/registered/unit/mem_cache/test_radix_cache_unit.py index 73ff4fd138f5..9cbb7bc77729 100644 --- a/test/registered/unit/mem_cache/test_radix_cache_unit.py +++ b/test/registered/unit/mem_cache/test_radix_cache_unit.py @@ -21,7 +21,7 @@ from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci # CPU-based unit test, runs quickly on any GPU runner -register_cuda_ci(est_time=15, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=15, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=5, suite="stage-b-test-1-gpu-small-amd") import random diff --git a/test/registered/unit/mem_cache/test_swa_eviction_boundary.py b/test/registered/unit/mem_cache/test_swa_eviction_boundary.py index 2802f8b0a209..3dd0bf2f26a1 100644 --- a/test/registered/unit/mem_cache/test_swa_eviction_boundary.py +++ b/test/registered/unit/mem_cache/test_swa_eviction_boundary.py @@ -26,7 +26,7 @@ from sglang.srt.utils import get_device from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci -register_cuda_ci(est_time=12, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=12, stage="stage-b", runner_config="1-gpu-large") register_amd_ci(est_time=10, suite="stage-b-test-1-gpu-small-amd") # --------------------------------------------------------------------------- diff --git a/test/registered/unit/mem_cache/test_swa_lock_release_lifecycle.py b/test/registered/unit/mem_cache/test_swa_lock_release_lifecycle.py index aa6f826293be..7409fab8280a 100644 --- a/test/registered/unit/mem_cache/test_swa_lock_release_lifecycle.py +++ b/test/registered/unit/mem_cache/test_swa_lock_release_lifecycle.py @@ -30,7 +30,7 @@ from sglang.test.ci.ci_register import register_cuda_ci from sglang.test.test_utils import CustomTestCase -register_cuda_ci(est_time=12, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=12, stage="stage-b", runner_config="1-gpu-small") def _build_tree( diff --git a/test/registered/unit/mem_cache/test_swa_unittest.py b/test/registered/unit/mem_cache/test_swa_unittest.py index 417b798b66f9..08f9a0c1c1a2 100644 --- a/test/registered/unit/mem_cache/test_swa_unittest.py +++ b/test/registered/unit/mem_cache/test_swa_unittest.py @@ -21,7 +21,7 @@ from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci from sglang.test.test_utils import CustomTestCase -register_cuda_ci(est_time=9, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=9, stage="stage-b", runner_config="1-gpu-large") register_amd_ci(est_time=10, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/unit/mem_cache/test_unified_radix_cache_bench.py b/test/registered/unit/mem_cache/test_unified_radix_cache_bench.py index f775b213a254..b76ecbba4b4c 100644 --- a/test/registered/unit/mem_cache/test_unified_radix_cache_bench.py +++ b/test/registered/unit/mem_cache/test_unified_radix_cache_bench.py @@ -38,7 +38,7 @@ from sglang.srt.utils import get_device from sglang.test.ci.ci_register import register_cuda_ci -register_cuda_ci(est_time=25, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=25, stage="stage-b", runner_config="1-gpu-small") # --------------------------------------------------------------------------- # Constants diff --git a/test/registered/unit/mem_cache/test_unified_radix_cache_unittest.py b/test/registered/unit/mem_cache/test_unified_radix_cache_unittest.py index 22c176a1cd45..415ce22e5ba2 100644 --- a/test/registered/unit/mem_cache/test_unified_radix_cache_unittest.py +++ b/test/registered/unit/mem_cache/test_unified_radix_cache_unittest.py @@ -48,7 +48,7 @@ from sglang.test.ci.ci_register import register_cuda_ci from sglang.test.test_utils import CustomTestCase -register_cuda_ci(est_time=10, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=10, stage="stage-b", runner_config="1-gpu-small") @dataclass(frozen=True) diff --git a/test/registered/unit/model_loader/test_modelopt_export.py b/test/registered/unit/model_loader/test_modelopt_export.py index a1eb133c226a..22421c24b342 100644 --- a/test/registered/unit/model_loader/test_modelopt_export.py +++ b/test/registered/unit/model_loader/test_modelopt_export.py @@ -19,7 +19,7 @@ from sglang.srt.model_loader.loader import ModelOptModelLoader from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci -register_cuda_ci(est_time=11, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=11, stage="stage-b", runner_config="1-gpu-large") register_amd_ci(est_time=9, suite="stage-b-test-1-gpu-small-amd") # Note: PYTHONPATH=python should be set when running tests diff --git a/test/registered/unit/model_loader/test_modelopt_loader.py b/test/registered/unit/model_loader/test_modelopt_loader.py index 7f9652c0e5db..9acdfaabb878 100644 --- a/test/registered/unit/model_loader/test_modelopt_loader.py +++ b/test/registered/unit/model_loader/test_modelopt_loader.py @@ -30,7 +30,7 @@ CALIBRATION_NUM_SAMPLES = 512 DEFAULT_DEVICE = "cuda:0" -register_cuda_ci(est_time=11, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=11, stage="stage-b", runner_config="1-gpu-small") class TestModelOptModelLoader(CustomTestCase): diff --git a/test/registered/unit/models/test_llava.py b/test/registered/unit/models/test_llava.py index 91f7f34f4867..7a211193fc93 100644 --- a/test/registered/unit/models/test_llava.py +++ b/test/registered/unit/models/test_llava.py @@ -5,7 +5,7 @@ from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci from sglang.test.test_utils import CustomTestCase -register_cuda_ci(est_time=9, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=9, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=9, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/unit/utils/test_profile_merger.py b/test/registered/unit/utils/test_profile_merger.py index 29455288d327..a7f5d2cfb357 100644 --- a/test/registered/unit/utils/test_profile_merger.py +++ b/test/registered/unit/utils/test_profile_merger.py @@ -17,7 +17,7 @@ from sglang.srt.utils.profile_merger import ProfileMerger from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci -register_cuda_ci(est_time=9, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=9, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=8, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/unit/utils/test_weight_checker.py b/test/registered/unit/utils/test_weight_checker.py index 643c513694e8..ab84cfb46050 100644 --- a/test/registered/unit/utils/test_weight_checker.py +++ b/test/registered/unit/utils/test_weight_checker.py @@ -38,7 +38,7 @@ from sglang.test.ci.ci_register import register_cuda_ci from sglang.test.test_utils import CustomTestCase -register_cuda_ci(est_time=30, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=30, stage="stage-b", runner_config="1-gpu-small") # --------------------------------------------------------------------------- diff --git a/test/registered/utils/test_numa_utils.py b/test/registered/utils/test_numa_utils.py index 27719d30699c..a660933345c0 100644 --- a/test/registered/utils/test_numa_utils.py +++ b/test/registered/utils/test_numa_utils.py @@ -10,8 +10,8 @@ from sglang.test.ci.ci_register import register_cpu_ci, register_cuda_ci register_cpu_ci(est_time=7, suite="stage-a-test-cpu") -register_cuda_ci(est_time=10, suite="stage-c-test-4-gpu-gb200") -register_cuda_ci(est_time=10, suite="stage-c-test-8-gpu-b200") +register_cuda_ci(est_time=10, stage="stage-c", runner_config="4-gpu-gb200") +register_cuda_ci(est_time=10, stage="stage-c", runner_config="8-gpu-b200") class TestIsNumaAvailable(unittest.TestCase): diff --git a/test/registered/vlm/test_evs.py b/test/registered/vlm/test_evs.py index 881089abc8d3..f95839e5075a 100644 --- a/test/registered/vlm/test_evs.py +++ b/test/registered/vlm/test_evs.py @@ -6,7 +6,7 @@ from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci from sglang.test.test_utils import run_doctests -register_cuda_ci(est_time=11, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=11, stage="stage-b", runner_config="1-gpu-small") register_amd_ci(est_time=20, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/vlm/test_vision_chunked_prefill.py b/test/registered/vlm/test_vision_chunked_prefill.py index 76292335319d..9f42d04e1a17 100644 --- a/test/registered/vlm/test_vision_chunked_prefill.py +++ b/test/registered/vlm/test_vision_chunked_prefill.py @@ -1,6 +1,6 @@ from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci -register_cuda_ci(est_time=156, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=156, stage="stage-b", runner_config="1-gpu-large") register_amd_ci(est_time=270, suite="stage-b-test-1-gpu-small-amd") """ Usage: diff --git a/test/registered/vlm/test_vision_openai_server_a.py b/test/registered/vlm/test_vision_openai_server_a.py index d7acd2c167a3..8611be701534 100644 --- a/test/registered/vlm/test_vision_openai_server_a.py +++ b/test/registered/vlm/test_vision_openai_server_a.py @@ -19,7 +19,7 @@ VideoOpenAITestMixin, ) -register_cuda_ci(est_time=780, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=780, stage="stage-b", runner_config="1-gpu-large") class TestLlavaServer(ImageOpenAITestMixin): diff --git a/test/registered/vlm/test_vlm_input_format.py b/test/registered/vlm/test_vlm_input_format.py index 458752ebab85..70c4cc4eb629 100644 --- a/test/registered/vlm/test_vlm_input_format.py +++ b/test/registered/vlm/test_vlm_input_format.py @@ -38,7 +38,7 @@ def forward(self, x): from sglang.srt.utils.common import is_cuda, is_xpu from sglang.srt.utils.hf_transformers_utils import _fix_added_tokens_encoding -register_cuda_ci(est_time=747, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=747, stage="stage-b", runner_config="1-gpu-large") IMAGE_MAN_IRONING_URL = "https://raw.githubusercontent.com/sgl-project/sgl-test-files/refs/heads/main/images/man_ironing_on_back_of_suv.png" IMAGE_SGL_LOGO_URL = "https://raw.githubusercontent.com/sgl-project/sgl-test-files/refs/heads/main/images/sgl_logo.png" diff --git a/test/registered/vlm/test_vlm_tp4.py b/test/registered/vlm/test_vlm_tp4.py index 2ad16ff80de7..22361383b7dd 100644 --- a/test/registered/vlm/test_vlm_tp4.py +++ b/test/registered/vlm/test_vlm_tp4.py @@ -15,7 +15,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=133, suite="stage-c-test-4-gpu-h100") +register_cuda_ci(est_time=133, stage="stage-c", runner_config="4-gpu-h100") QWEN35_27B_MODEL = "Qwen/Qwen3.5-27B" MMMU_ACCURACY_THRESHOLD = 0.65 diff --git a/test/run_suite.py b/test/run_suite.py index 597fe3c9b704..f552ea13372f 100644 --- a/test/run_suite.py +++ b/test/run_suite.py @@ -151,9 +151,9 @@ def validate_all_suites(all_tests: List[CIRegistry]): if t.backend not in _SUITE_CHECKED_BACKENDS: continue valid = valid_by_backend.get(t.backend, set()) - if t.suite not in valid: + if t.effective_suite not in valid: errors.append( - f" {t.filename}: backend={t.backend.name}, suite='{t.suite}'" + f" {t.filename}: backend={t.backend.name}, suite='{t.effective_suite}'" ) if errors: raise ValueError("Tests registered to invalid suites:\n" + "\n".join(errors)) @@ -165,7 +165,7 @@ def filter_tests( ci_tests = [ t for t in ci_tests - if t.backend == hw and t.suite == suite and t.nightly == nightly + if t.backend == hw and t.effective_suite == suite and t.nightly == nightly ] valid_suites = (