diff --git a/test/registered/attention/test_chunk_gated_delta_rule.py b/test/registered/attention/test_chunk_gated_delta_rule.py index e5d53c32a6b6..5daeb6a1bb0a 100644 --- a/test/registered/attention/test_chunk_gated_delta_rule.py +++ b/test/registered/attention/test_chunk_gated_delta_rule.py @@ -8,7 +8,7 @@ ) from sglang.test.ci.ci_register import register_cuda_ci -register_cuda_ci(est_time=60, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=8, suite="stage-b-test-1-gpu-large") @unittest.skipIf(not torch.cuda.is_available(), "Test requires CUDA") diff --git a/test/registered/attention/test_hybrid_attn_backend.py b/test/registered/attention/test_hybrid_attn_backend.py index 22889b5682df..9e811fe107e7 100644 --- a/test/registered/attention/test_hybrid_attn_backend.py +++ b/test/registered/attention/test_hybrid_attn_backend.py @@ -20,7 +20,7 @@ # Hybrid attention backend tests (FA3 prefill + FlashInfer decode, requires SM 90+ / H100) # Multiple test classes: base, MLA, TorchCompile, SpecDecode variants -register_cuda_ci(est_time=200, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=350, suite="stage-b-test-1-gpu-large") GSM_DATASET_PATH = None diff --git a/test/registered/cp/test_deepseek_v32_cp_single_node.py b/test/registered/cp/test_deepseek_v32_cp_single_node.py index 89a15ce14b9c..55595eff5809 100644 --- a/test/registered/cp/test_deepseek_v32_cp_single_node.py +++ b/test/registered/cp/test_deepseek_v32_cp_single_node.py @@ -14,7 +14,7 @@ write_github_step_summary, ) -register_cuda_ci(est_time=360, suite="stage-c-test-deepep-8-gpu-h200") +register_cuda_ci(est_time=640, suite="stage-c-test-deepep-8-gpu-h200") DEEPSEEK_V32_MODEL_PATH = "deepseek-ai/DeepSeek-V3.2" diff --git a/test/registered/disaggregation/test_disaggregation_decode_offload.py b/test/registered/disaggregation/test_disaggregation_decode_offload.py index 9742c989686b..3bc2108031a2 100644 --- a/test/registered/disaggregation/test_disaggregation_decode_offload.py +++ b/test/registered/disaggregation/test_disaggregation_decode_offload.py @@ -12,16 +12,18 @@ from sglang.test.test_utils import ( DEFAULT_MODEL_NAME_FOR_TEST, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, - is_in_ci, popen_launch_pd_server, ) # Registering the test for CUDA CI with appropriate parameters # Increasing estimated time since we run evaluation twice -register_cuda_ci(est_time=600, suite="stage-b-test-2-gpu-large") +register_cuda_ci( + est_time=600, + suite="stage-b-test-2-gpu-large", + disabled="Temporarily disable the flaky test.", +) -@unittest.skipIf(is_in_ci(), "Temporarily disable the flaky test.") class TestDisaggregationDecodeOffload(PDDisaggregationServerBase): """ Test class for verifying KV cache offloading on the decode side in a diff --git a/test/registered/distributed/test_load_weights_from_remote_instance.py b/test/registered/distributed/test_load_weights_from_remote_instance.py index 4402d399d30f..00dc8454d325 100644 --- a/test/registered/distributed/test_load_weights_from_remote_instance.py +++ b/test/registered/distributed/test_load_weights_from_remote_instance.py @@ -38,7 +38,7 @@ mp.set_start_method("spawn", force=True) -register_cuda_ci(est_time=72, suite="stage-b-test-2-gpu-large") +register_cuda_ci(est_time=130, suite="stage-b-test-2-gpu-large") register_amd_ci(est_time=72, suite="stage-b-test-2-gpu-large-amd") diff --git a/test/registered/embedding/test_embedding_models.py b/test/registered/embedding/test_embedding_models.py index 4a9c43a95ca4..3060ae3048f0 100644 --- a/test/registered/embedding/test_embedding_models.py +++ b/test/registered/embedding/test_embedding_models.py @@ -35,7 +35,7 @@ suite="stage-b-test-1-gpu-small-amd", disabled="see https://github.com/sgl-project/sglang/issues/11127", ) -register_cuda_ci(est_time=73, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=200, suite="stage-b-test-1-gpu-small") MODEL_TO_CONFIG = { "Alibaba-NLP/gte-Qwen2-1.5B-instruct": (1, 1e-5), diff --git a/test/registered/ep/test_mooncake_ep_small.py b/test/registered/ep/test_mooncake_ep_small.py index e54381847679..5ef70914c698 100644 --- a/test/registered/ep/test_mooncake_ep_small.py +++ b/test/registered/ep/test_mooncake_ep_small.py @@ -15,7 +15,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=660, suite="stage-c-test-deepep-4-gpu-h100") +register_cuda_ci(est_time=200, suite="stage-c-test-deepep-4-gpu-h100") ib_devices = get_rdma_devices_args() diff --git a/test/registered/eval/test_eval_accuracy_large.py b/test/registered/eval/test_eval_accuracy_large.py index dab7bb6be55f..97b441f41d76 100644 --- a/test/registered/eval/test_eval_accuracy_large.py +++ b/test/registered/eval/test_eval_accuracy_large.py @@ -16,7 +16,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=300, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=580, suite="stage-b-test-1-gpu-small") register_amd_ci(est_time=420, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/function_call/test_kimik2_detector.py b/test/registered/function_call/test_kimik2_detector.py index 158223958c59..68b7c98a980b 100644 --- a/test/registered/function_call/test_kimik2_detector.py +++ b/test/registered/function_call/test_kimik2_detector.py @@ -11,7 +11,7 @@ from sglang.srt.parser.reasoning_parser import KimiK2Detector as KimiK2ReasoningDetector from sglang.test.ci.ci_register import register_cpu_ci -register_cpu_ci(1.0, "stage-a-test-cpu") +register_cpu_ci(5, "stage-a-test-cpu") def _make_tool(name, parameters=None): diff --git a/test/registered/kernels/test_nsa_indexer.py b/test/registered/kernels/test_nsa_indexer.py index 789baed00414..77c007dad470 100644 --- a/test/registered/kernels/test_nsa_indexer.py +++ b/test/registered/kernels/test_nsa_indexer.py @@ -24,7 +24,7 @@ from sglang.srt.server_args import ServerArgs, set_global_server_args_for_scheduler from sglang.test.test_utils import CustomTestCase -register_cuda_ci(est_time=2, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=15, suite="stage-b-test-1-gpu-large") # Global configuration for all indexer tests DEFAULT_CONFIG = { diff --git a/test/registered/lora/test_fused_moe_lora_kernel.py b/test/registered/lora/test_fused_moe_lora_kernel.py index 8c0bde3f3e6d..44179a5fa7e2 100644 --- a/test/registered/lora/test_fused_moe_lora_kernel.py +++ b/test/registered/lora/test_fused_moe_lora_kernel.py @@ -15,7 +15,7 @@ # ============================================================================== -register_cuda_ci(est_time=120, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=25, suite="stage-b-test-1-gpu-large") def round_up(x, base): diff --git a/test/registered/lora/test_lora_moe_vllm_sgl_logprob_diff.py b/test/registered/lora/test_lora_moe_vllm_sgl_logprob_diff.py index fdd3dca263de..7a7b47bbc76f 100644 --- a/test/registered/lora/test_lora_moe_vllm_sgl_logprob_diff.py +++ b/test/registered/lora/test_lora_moe_vllm_sgl_logprob_diff.py @@ -25,7 +25,7 @@ from sglang.test.runners import SRTRunner register_cuda_ci( - est_time=25, + est_time=50, suite="stage-b-test-1-gpu-large", ) diff --git a/test/registered/lora/test_lora_qwen3_30b_a3b_instruct_2507_logprob_diff.py b/test/registered/lora/test_lora_qwen3_30b_a3b_instruct_2507_logprob_diff.py index f660b2d07225..a729407f6986 100644 --- a/test/registered/lora/test_lora_qwen3_30b_a3b_instruct_2507_logprob_diff.py +++ b/test/registered/lora/test_lora_qwen3_30b_a3b_instruct_2507_logprob_diff.py @@ -35,7 +35,7 @@ from sglang.test.test_utils import CustomTestCase register_cuda_ci( - est_time=300, + est_time=160, suite="stage-c-test-4-gpu-b200", ) diff --git a/test/registered/lora/test_lora_qwen3_8b_logprob_diff.py b/test/registered/lora/test_lora_qwen3_8b_logprob_diff.py index 04f60cfdc8fe..b21638ba562d 100644 --- a/test/registered/lora/test_lora_qwen3_8b_logprob_diff.py +++ b/test/registered/lora/test_lora_qwen3_8b_logprob_diff.py @@ -38,7 +38,7 @@ from sglang.test.test_utils import CustomTestCase register_cuda_ci( - est_time=200, + est_time=40, suite="stage-b-test-1-gpu-large", ) diff --git a/test/registered/lora/test_lora_qwen3_vl_30b_a3b_instruct_logprob_diff.py b/test/registered/lora/test_lora_qwen3_vl_30b_a3b_instruct_logprob_diff.py index 67031048fa14..fc82dff71a9c 100644 --- a/test/registered/lora/test_lora_qwen3_vl_30b_a3b_instruct_logprob_diff.py +++ b/test/registered/lora/test_lora_qwen3_vl_30b_a3b_instruct_logprob_diff.py @@ -35,7 +35,7 @@ from sglang.test.test_utils import CustomTestCase register_cuda_ci( - est_time=300, + est_time=160, suite="stage-c-test-4-gpu-b200", ) diff --git a/test/registered/lora/test_lora_tp.py b/test/registered/lora/test_lora_tp.py index 60f7e0ee0e0d..32c4352889da 100644 --- a/test/registered/lora/test_lora_tp.py +++ b/test/registered/lora/test_lora_tp.py @@ -30,7 +30,7 @@ from sglang.test.test_utils import CustomTestCase, is_in_ci register_cuda_ci( - est_time=116, + est_time=190, suite="stage-c-test-8-gpu-h200", ) register_amd_ci( diff --git a/test/registered/metrics/test_metrics.py b/test/registered/metrics/test_metrics.py index 012bad626b24..d1f0f381bee4 100644 --- a/test/registered/metrics/test_metrics.py +++ b/test/registered/metrics/test_metrics.py @@ -20,7 +20,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=32, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=95, suite="stage-b-test-1-gpu-small") register_amd_ci(est_time=32, suite="stage-b-test-1-gpu-small-amd") _MODEL_NAME = "Qwen/Qwen3-0.6B" diff --git a/test/registered/models/test_kimi_linear_models.py b/test/registered/models/test_kimi_linear_models.py index f61aad77dac4..a97a3af8c08c 100644 --- a/test/registered/models/test_kimi_linear_models.py +++ b/test/registered/models/test_kimi_linear_models.py @@ -11,7 +11,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=90, suite="stage-b-test-2-gpu-large") +register_cuda_ci(est_time=180, suite="stage-b-test-2-gpu-large") class TestKimiLinear(CustomTestCase): diff --git a/test/registered/models/test_nvidia_nemotron_3_nano.py b/test/registered/models/test_nvidia_nemotron_3_nano.py index 0580979c49f3..1d26abd21c1a 100644 --- a/test/registered/models/test_nvidia_nemotron_3_nano.py +++ b/test/registered/models/test_nvidia_nemotron_3_nano.py @@ -4,7 +4,7 @@ from sglang.test.kits.lm_eval_kit import LMEvalMixin from sglang.test.server_fixtures.default_fixture import DefaultServerBase -register_cuda_ci(est_time=180, suite="stage-b-test-2-gpu-large") +register_cuda_ci(est_time=660, suite="stage-b-test-2-gpu-large") NEMOTRON_3_NANO_THINKING_ARGS = [ "--trust-remote-code", diff --git a/test/registered/models/test_nvidia_nemotron_nano_v2.py b/test/registered/models/test_nvidia_nemotron_nano_v2.py index e5b0150acfb7..b12395a6a3f0 100644 --- a/test/registered/models/test_nvidia_nemotron_nano_v2.py +++ b/test/registered/models/test_nvidia_nemotron_nano_v2.py @@ -5,7 +5,7 @@ from sglang.test.kits.eval_accuracy_kit import GSM8KMixin from sglang.test.server_fixtures.default_fixture import DefaultServerBase -register_cuda_ci(est_time=132, suite="stage-b-test-2-gpu-large") +register_cuda_ci(est_time=240, suite="stage-b-test-2-gpu-large") class TestNvidiaNemotronNanoV2BF16(GSM8KMixin, DefaultServerBase): diff --git a/test/registered/models/test_transformers_models.py b/test/registered/models/test_transformers_models.py index b91fd5831dd7..42416ad5481f 100644 --- a/test/registered/models/test_transformers_models.py +++ b/test/registered/models/test_transformers_models.py @@ -21,7 +21,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=245, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=450, suite="stage-b-test-1-gpu-small") register_amd_ci(est_time=320, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/moe/test_cutedsl_moe.py b/test/registered/moe/test_cutedsl_moe.py index 3846334d0f02..76b3b4a8723f 100644 --- a/test/registered/moe/test_cutedsl_moe.py +++ b/test/registered/moe/test_cutedsl_moe.py @@ -12,7 +12,7 @@ from sglang.srt.layers.moe.topk import TopKConfig, select_experts from sglang.test.ci.ci_register import register_cuda_ci -register_cuda_ci(est_time=300, suite="stage-c-test-4-gpu-b200") +register_cuda_ci(est_time=20, suite="stage-c-test-4-gpu-b200") SKIP_TEST = torch.cuda.get_device_capability() < (10, 0) SKIP_REASON = "Nvfp4 Requires compute capability of 10 or above." diff --git a/test/registered/moe/test_moe_ep.py b/test/registered/moe/test_moe_ep.py index 6db68e94885d..1d27d3c209c3 100644 --- a/test/registered/moe/test_moe_ep.py +++ b/test/registered/moe/test_moe_ep.py @@ -12,7 +12,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=140, suite="stage-b-test-2-gpu-large") +register_cuda_ci(est_time=250, suite="stage-b-test-2-gpu-large") class TestEp(CustomTestCase): diff --git a/test/registered/moe/test_triton_fused_moe.py b/test/registered/moe/test_triton_fused_moe.py index e20ed609fb19..2255d64bfc46 100644 --- a/test/registered/moe/test_triton_fused_moe.py +++ b/test/registered/moe/test_triton_fused_moe.py @@ -12,7 +12,7 @@ from sglang.test.ci.ci_register import register_cuda_ci from sglang.test.test_utils import CustomTestCase -register_cuda_ci(est_time=89, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=30, suite="stage-b-test-1-gpu-large") class TestFusedMOE(CustomTestCase): diff --git a/test/registered/openai_server/features/test_openai_server_ebnf.py b/test/registered/openai_server/features/test_openai_server_ebnf.py index 05cf9f1021e8..2ec7d2dcf9c9 100644 --- a/test/registered/openai_server/features/test_openai_server_ebnf.py +++ b/test/registered/openai_server/features/test_openai_server_ebnf.py @@ -13,7 +13,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=7, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=55, suite="stage-b-test-1-gpu-small") register_amd_ci(est_time=20, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/openai_server/function_call/test_tool_choice.py b/test/registered/openai_server/function_call/test_tool_choice.py index 9b0fad76dca0..9f2d2dfc6008 100644 --- a/test/registered/openai_server/function_call/test_tool_choice.py +++ b/test/registered/openai_server/function_call/test_tool_choice.py @@ -22,7 +22,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=120, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=250, suite="stage-b-test-1-gpu-small") register_amd_ci(est_time=258, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/openai_server/validation/test_openai_server_ignore_eos.py b/test/registered/openai_server/validation/test_openai_server_ignore_eos.py index 9014466c089b..2f27699ee14b 100644 --- a/test/registered/openai_server/validation/test_openai_server_ignore_eos.py +++ b/test/registered/openai_server/validation/test_openai_server_ignore_eos.py @@ -11,7 +11,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=6, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=55, suite="stage-b-test-1-gpu-small") register_amd_ci(est_time=47, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/quant/test_fp8_kernel.py b/test/registered/quant/test_fp8_kernel.py index 57a6362358f2..ea5c60689105 100644 --- a/test/registered/quant/test_fp8_kernel.py +++ b/test/registered/quant/test_fp8_kernel.py @@ -9,7 +9,7 @@ from sglang.test.ci.ci_register import register_cuda_ci from sglang.test.test_utils import CustomTestCase -register_cuda_ci(est_time=132, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=15, suite="stage-b-test-1-gpu-large") class TestFP8Base(CustomTestCase): diff --git a/test/registered/quant/test_fp8kv_triton.py b/test/registered/quant/test_fp8kv_triton.py index 218119c988ab..df9573e22891 100644 --- a/test/registered/quant/test_fp8kv_triton.py +++ b/test/registered/quant/test_fp8kv_triton.py @@ -12,7 +12,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=520, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=80, suite="stage-b-test-1-gpu-large") class TestFP8KVCacheTritonBackend(CustomTestCase): diff --git a/test/registered/quant/test_int8_kernel.py b/test/registered/quant/test_int8_kernel.py index c15de1d4a2f8..0a5d3001a826 100644 --- a/test/registered/quant/test_int8_kernel.py +++ b/test/registered/quant/test_int8_kernel.py @@ -11,7 +11,7 @@ from sglang.test.ci.ci_register import register_cuda_ci from sglang.test.test_utils import CustomTestCase -register_cuda_ci(est_time=8, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=16, suite="stage-b-test-1-gpu-small") def native_w8a8_per_token_matmul(A, B, As, Bs, output_dtype=torch.float16): diff --git a/test/registered/quant/test_quant_config_parsing.py b/test/registered/quant/test_quant_config_parsing.py index f2aeba7122d4..5b9d12817bd0 100644 --- a/test/registered/quant/test_quant_config_parsing.py +++ b/test/registered/quant/test_quant_config_parsing.py @@ -5,7 +5,7 @@ from sglang.test.ci.ci_register import register_cpu_ci from sglang.test.test_utils import CustomTestCase -register_cpu_ci(est_time=5, suite="stage-a-test-cpu") +register_cpu_ci(est_time=20, suite="stage-a-test-cpu") class TestQuantLogString(CustomTestCase): diff --git a/test/registered/quant/test_quantization.py b/test/registered/quant/test_quantization.py index 4ab48314482b..cdb1f0970ef4 100644 --- a/test/registered/quant/test_quantization.py +++ b/test/registered/quant/test_quantization.py @@ -16,7 +16,7 @@ write_results_to_json, ) -register_cuda_ci(est_time=185, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=370, suite="stage-b-test-1-gpu-large") MODEL_SCORE_THRESHOLDS = { "hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4": 0.825, diff --git a/test/registered/quant/test_torchao.py b/test/registered/quant/test_torchao.py index c5f6ad5991bd..a53b929a6dd6 100644 --- a/test/registered/quant/test_torchao.py +++ b/test/registered/quant/test_torchao.py @@ -5,7 +5,7 @@ from sglang import Engine from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci -register_cuda_ci(est_time=103, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=200, suite="stage-b-test-1-gpu-small") register_amd_ci(est_time=230, suite="stage-b-test-1-gpu-small-amd") from sglang.lang.chat_template import get_chat_template_by_model_path from sglang.srt.utils import kill_process_tree diff --git a/test/registered/sampling/test_pytorch_sampling_backend.py b/test/registered/sampling/test_pytorch_sampling_backend.py index 501d706787ef..6abea3c3e1ea 100644 --- a/test/registered/sampling/test_pytorch_sampling_backend.py +++ b/test/registered/sampling/test_pytorch_sampling_backend.py @@ -14,7 +14,7 @@ popen_launch_server, ) -register_cuda_ci(est_time=66, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=150, suite="stage-b-test-1-gpu-small") register_amd_ci(est_time=66, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/scheduler/test_abort.py b/test/registered/scheduler/test_abort.py index 9643ce68652f..47399bc4e07c 100644 --- a/test/registered/scheduler/test_abort.py +++ b/test/registered/scheduler/test_abort.py @@ -19,7 +19,7 @@ run_and_check_memory_leak, ) -register_cuda_ci(est_time=131, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=350, suite="stage-b-test-1-gpu-small") register_amd_ci(est_time=300, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/scheduler/test_chunked_prefill.py b/test/registered/scheduler/test_chunked_prefill.py index 9b72ce21f0ca..bc22976c344c 100644 --- a/test/registered/scheduler/test_chunked_prefill.py +++ b/test/registered/scheduler/test_chunked_prefill.py @@ -7,7 +7,7 @@ from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci from sglang.test.test_utils import CustomTestCase, run_mmlu_test, run_mulit_request_test -register_cuda_ci(est_time=312, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=550, suite="stage-b-test-1-gpu-small") register_amd_ci(est_time=312, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/scheduler/test_retract_decode.py b/test/registered/scheduler/test_retract_decode.py index 15f333104460..0628c97e425b 100644 --- a/test/registered/scheduler/test_retract_decode.py +++ b/test/registered/scheduler/test_retract_decode.py @@ -17,7 +17,7 @@ ) from sglang.utils import is_in_ci -register_cuda_ci(est_time=311, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=550, suite="stage-b-test-1-gpu-small") register_amd_ci(est_time=600, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/spec/eagle/test_eagle3_basic.py b/test/registered/spec/eagle/test_eagle3_basic.py index 442eefcc6f82..4cf48afc1468 100644 --- a/test/registered/spec/eagle/test_eagle3_basic.py +++ b/test/registered/spec/eagle/test_eagle3_basic.py @@ -12,7 +12,7 @@ DEFAULT_TARGET_MODEL_EAGLE3, ) -register_cuda_ci(est_time=50, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=200, suite="stage-b-test-1-gpu-small") register_amd_ci(est_time=50, suite="stage-b-test-1-gpu-small") _is_hip = is_hip() diff --git a/test/registered/spec/eagle/test_eagle_infer_a.py b/test/registered/spec/eagle/test_eagle_infer_a.py index fc2e6b68a349..cecc2eb531a7 100644 --- a/test/registered/spec/eagle/test_eagle_infer_a.py +++ b/test/registered/spec/eagle/test_eagle_infer_a.py @@ -12,7 +12,7 @@ CustomTestCase, ) -register_cuda_ci(est_time=250, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=450, suite="stage-b-test-1-gpu-large") class TestEAGLEEngine(CustomTestCase): diff --git a/test/registered/spec/eagle/test_eagle_infer_beta_dp_attention.py b/test/registered/spec/eagle/test_eagle_infer_beta_dp_attention.py index 50b054cdcb78..d1270fbafd7b 100644 --- a/test/registered/spec/eagle/test_eagle_infer_beta_dp_attention.py +++ b/test/registered/spec/eagle/test_eagle_infer_beta_dp_attention.py @@ -17,7 +17,7 @@ ) # EAGLE with DP attention on B200 (tp=2, dp=2, requires 4 B200 GPUs) -register_cuda_ci(est_time=300, suite="stage-c-test-4-gpu-b200") +register_cuda_ci(est_time=100, suite="stage-c-test-4-gpu-b200") def test_gsm8k(base_url: str, model: str): diff --git a/test/registered/spec/utils/test_build_eagle_tree.py b/test/registered/spec/utils/test_build_eagle_tree.py index f13ede2bf006..103349fcca0d 100644 --- a/test/registered/spec/utils/test_build_eagle_tree.py +++ b/test/registered/spec/utils/test_build_eagle_tree.py @@ -9,7 +9,7 @@ from sglang.srt.utils import get_device from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci -register_cuda_ci(est_time=3, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=6, suite="stage-b-test-1-gpu-small") register_amd_ci(est_time=3, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/spec/utils/test_ngram_corpus.py b/test/registered/spec/utils/test_ngram_corpus.py index 6f2427a40966..b921225ed18e 100644 --- a/test/registered/spec/utils/test_ngram_corpus.py +++ b/test/registered/spec/utils/test_ngram_corpus.py @@ -6,7 +6,7 @@ from sglang.test.ci.ci_register import register_cuda_ci from sglang.test.test_utils import CustomTestCase -register_cuda_ci(est_time=30, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=8, suite="stage-b-test-1-gpu-small") def _make_corpus(match_type="BFS", **kwargs): diff --git a/test/registered/unit/function_call/test_function_call_parser.py b/test/registered/unit/function_call/test_function_call_parser.py index c7b3fb173156..9b3159323125 100644 --- a/test/registered/unit/function_call/test_function_call_parser.py +++ b/test/registered/unit/function_call/test_function_call_parser.py @@ -18,7 +18,7 @@ from sglang.srt.function_call.qwen3_coder_detector import Qwen3CoderDetector from sglang.test.ci.ci_register import register_cpu_ci -register_cpu_ci(1.0, "stage-a-test-cpu") +register_cpu_ci(15, "stage-a-test-cpu") class TestPythonicDetector(unittest.TestCase): diff --git a/test/registered/unit/function_call/test_glm47_moe_detector.py b/test/registered/unit/function_call/test_glm47_moe_detector.py index 357514e19e85..e0c1921192b0 100644 --- a/test/registered/unit/function_call/test_glm47_moe_detector.py +++ b/test/registered/unit/function_call/test_glm47_moe_detector.py @@ -10,7 +10,7 @@ ) from sglang.test.ci.ci_register import register_cpu_ci -register_cpu_ci(1.0, "stage-a-test-cpu") +register_cpu_ci(5, "stage-a-test-cpu") class TestGlm47MoeDetector(unittest.TestCase): diff --git a/test/registered/unit/function_call/test_json_schema_constraint.py b/test/registered/unit/function_call/test_json_schema_constraint.py index bc6a9fa13913..65f14953805f 100644 --- a/test/registered/unit/function_call/test_json_schema_constraint.py +++ b/test/registered/unit/function_call/test_json_schema_constraint.py @@ -18,7 +18,7 @@ ) from sglang.test.ci.ci_register import register_cpu_ci -register_cpu_ci(1.0, "stage-a-test-cpu") +register_cpu_ci(5, "stage-a-test-cpu") class TestJsonSchemaConstraint(unittest.TestCase): diff --git a/test/registered/unit/function_call/test_parallel_tool_calls.py b/test/registered/unit/function_call/test_parallel_tool_calls.py index bf1e18a7baa8..2f5af4d9ae00 100644 --- a/test/registered/unit/function_call/test_parallel_tool_calls.py +++ b/test/registered/unit/function_call/test_parallel_tool_calls.py @@ -23,7 +23,7 @@ from sglang.srt.function_call.json_array_parser import JsonArrayParser from sglang.test.ci.ci_register import register_cpu_ci -register_cpu_ci(1.0, "stage-a-test-cpu") +register_cpu_ci(5, "stage-a-test-cpu") class TestParallelToolCalls(unittest.TestCase): diff --git a/test/registered/unit/function_call/test_unknown_tool_name.py b/test/registered/unit/function_call/test_unknown_tool_name.py index e7a8394ff2cb..ce96c698a0cd 100644 --- a/test/registered/unit/function_call/test_unknown_tool_name.py +++ b/test/registered/unit/function_call/test_unknown_tool_name.py @@ -9,7 +9,7 @@ from sglang.srt.function_call.core_types import StreamingParseResult from sglang.test.ci.ci_register import register_cpu_ci -register_cpu_ci(1.0, "stage-a-test-cpu") +register_cpu_ci(5, "stage-a-test-cpu") class DummyDetector(BaseFormatDetector): diff --git a/test/registered/unit/managers/test_prefill_adder.py b/test/registered/unit/managers/test_prefill_adder.py index e153d9d9c635..2a111171263e 100644 --- a/test/registered/unit/managers/test_prefill_adder.py +++ b/test/registered/unit/managers/test_prefill_adder.py @@ -12,7 +12,7 @@ from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci from sglang.test.test_utils import CustomTestCase -register_cuda_ci(est_time=1, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=8, suite="stage-b-test-1-gpu-small") register_amd_ci(est_time=2, suite="stage-b-test-1-gpu-small-amd") diff --git a/test/registered/unit/mem_cache/test_evict_policy.py b/test/registered/unit/mem_cache/test_evict_policy.py index 0dd2eed5d9b5..c59af743f9e7 100644 --- a/test/registered/unit/mem_cache/test_evict_policy.py +++ b/test/registered/unit/mem_cache/test_evict_policy.py @@ -2,7 +2,7 @@ from sglang.test.ci.ci_register import register_cpu_ci -register_cpu_ci(est_time=5, suite="stage-a-test-cpu") +register_cpu_ci(est_time=2, suite="stage-a-test-cpu") import unittest from unittest.mock import MagicMock diff --git a/test/registered/unit/mem_cache/test_nsa_pool_host_unit.py b/test/registered/unit/mem_cache/test_nsa_pool_host_unit.py index f75819557e1f..64ca27493081 100644 --- a/test/registered/unit/mem_cache/test_nsa_pool_host_unit.py +++ b/test/registered/unit/mem_cache/test_nsa_pool_host_unit.py @@ -11,7 +11,7 @@ from sglang.srt.utils import is_cuda, is_hip, is_npu, is_xpu from sglang.test.ci.ci_register import register_cuda_ci -register_cuda_ci(est_time=3, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=10, suite="stage-b-test-1-gpu-small") class TestNSAHiCacheTransfer(unittest.TestCase): diff --git a/test/registered/unit/mem_cache/test_radix_cache_unit.py b/test/registered/unit/mem_cache/test_radix_cache_unit.py index 33dc31f41c64..f7b8a43cea6a 100644 --- a/test/registered/unit/mem_cache/test_radix_cache_unit.py +++ b/test/registered/unit/mem_cache/test_radix_cache_unit.py @@ -21,7 +21,7 @@ from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci # CPU-based unit test, runs quickly on any GPU runner -register_cuda_ci(est_time=5, suite="stage-b-test-1-gpu-small") +register_cuda_ci(est_time=20, suite="stage-b-test-1-gpu-small") register_amd_ci(est_time=5, suite="stage-b-test-1-gpu-small-amd") import random diff --git a/test/registered/unit/server_args/test_server_args.py b/test/registered/unit/server_args/test_server_args.py index 1276493d3a95..ff381bd21883 100644 --- a/test/registered/unit/server_args/test_server_args.py +++ b/test/registered/unit/server_args/test_server_args.py @@ -10,7 +10,7 @@ CustomTestCase, ) -register_cpu_ci(est_time=1, suite="stage-a-test-cpu") +register_cpu_ci(est_time=10, suite="stage-a-test-cpu") # Mock get_device() so all tests run on CPU-only CI runners _mock_device = patch("sglang.srt.server_args.get_device", return_value="cuda") diff --git a/test/registered/unit/utils/test_json_response.py b/test/registered/unit/utils/test_json_response.py index f201ae735485..accc71fc5ba7 100644 --- a/test/registered/unit/utils/test_json_response.py +++ b/test/registered/unit/utils/test_json_response.py @@ -10,7 +10,7 @@ ) from sglang.test.ci.ci_register import register_cpu_ci -register_cpu_ci(est_time=2, suite="stage-a-test-cpu") +register_cpu_ci(est_time=5, suite="stage-a-test-cpu") class TestJSONResponseUtils(unittest.TestCase): diff --git a/test/registered/utils/test_log_utils.py b/test/registered/utils/test_log_utils.py index 810c12f36b7c..74341c13be60 100644 --- a/test/registered/utils/test_log_utils.py +++ b/test/registered/utils/test_log_utils.py @@ -9,7 +9,7 @@ from sglang.srt.utils.log_utils import create_log_targets, log_json from sglang.test.ci.ci_register import register_cpu_ci -register_cpu_ci(est_time=1, suite="stage-a-test-cpu") +register_cpu_ci(est_time=4, suite="stage-a-test-cpu") class TestLogUtils(unittest.TestCase): diff --git a/test/registered/utils/test_network_address.py b/test/registered/utils/test_network_address.py index 967345fae36c..ff05206c28b4 100644 --- a/test/registered/utils/test_network_address.py +++ b/test/registered/utils/test_network_address.py @@ -6,7 +6,7 @@ from sglang.srt.utils.network import NetworkAddress from sglang.test.ci.ci_register import register_cpu_ci -register_cpu_ci(est_time=1, suite="stage-a-test-cpu") +register_cpu_ci(est_time=7, suite="stage-a-test-cpu") # Mock get_device() so ServerArgs tests run on CPU-only CI runners _mock_device = patch("sglang.srt.server_args.get_device", return_value="cuda") diff --git a/test/registered/utils/test_socket_utils.py b/test/registered/utils/test_socket_utils.py index 42c12fd88dc4..63a42ac39237 100644 --- a/test/registered/utils/test_socket_utils.py +++ b/test/registered/utils/test_socket_utils.py @@ -15,7 +15,7 @@ from sglang.test.test_utils import CustomTestCase from sglang.utils import normalize_base_url, release_port, reserve_port -register_cpu_ci(est_time=1, suite="stage-a-test-cpu") +register_cpu_ci(est_time=7, suite="stage-a-test-cpu") class TestTryBindSocket(CustomTestCase): diff --git a/test/registered/vlm/test_patch_embed_perf.py b/test/registered/vlm/test_patch_embed_perf.py index ce2f38ca2de6..3ef3a645750d 100644 --- a/test/registered/vlm/test_patch_embed_perf.py +++ b/test/registered/vlm/test_patch_embed_perf.py @@ -8,7 +8,7 @@ from sglang.srt.models.glm4v import Glm4vVisionPatchEmbed from sglang.test.ci.ci_register import register_cuda_ci -register_cuda_ci(est_time=120, suite="stage-b-test-1-gpu-large") +register_cuda_ci(est_time=12, suite="stage-b-test-1-gpu-large") PATCH_SIZE = 14 TEMPORAL_PATCH_SIZE = 2