Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion test/registered/attention/test_chunk_gated_delta_rule.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
)
from sglang.test.ci.ci_register import register_cuda_ci

register_cuda_ci(est_time=60, suite="stage-b-test-1-gpu-large")
register_cuda_ci(est_time=8, suite="stage-b-test-1-gpu-large")


@unittest.skipIf(not torch.cuda.is_available(), "Test requires CUDA")
Expand Down
2 changes: 1 addition & 1 deletion test/registered/attention/test_hybrid_attn_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

# Hybrid attention backend tests (FA3 prefill + FlashInfer decode, requires SM 90+ / H100)
# Multiple test classes: base, MLA, TorchCompile, SpecDecode variants
register_cuda_ci(est_time=200, suite="stage-b-test-1-gpu-large")
register_cuda_ci(est_time=350, suite="stage-b-test-1-gpu-large")

GSM_DATASET_PATH = None

Expand Down
2 changes: 1 addition & 1 deletion test/registered/cp/test_deepseek_v32_cp_single_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
write_github_step_summary,
)

register_cuda_ci(est_time=360, suite="stage-c-test-deepep-8-gpu-h200")
register_cuda_ci(est_time=640, suite="stage-c-test-deepep-8-gpu-h200")
DEEPSEEK_V32_MODEL_PATH = "deepseek-ai/DeepSeek-V3.2"


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,18 @@
from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
is_in_ci,
popen_launch_pd_server,
)

# Registering the test for CUDA CI with appropriate parameters
# Increasing estimated time since we run evaluation twice
register_cuda_ci(est_time=600, suite="stage-b-test-2-gpu-large")
register_cuda_ci(
est_time=600,
suite="stage-b-test-2-gpu-large",
disabled="Temporarily disable the flaky test.",
)


@unittest.skipIf(is_in_ci(), "Temporarily disable the flaky test.")
class TestDisaggregationDecodeOffload(PDDisaggregationServerBase):
"""
Test class for verifying KV cache offloading on the decode side in a
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@

mp.set_start_method("spawn", force=True)

register_cuda_ci(est_time=72, suite="stage-b-test-2-gpu-large")
register_cuda_ci(est_time=130, suite="stage-b-test-2-gpu-large")
register_amd_ci(est_time=72, suite="stage-b-test-2-gpu-large-amd")


Expand Down
2 changes: 1 addition & 1 deletion test/registered/embedding/test_embedding_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
suite="stage-b-test-1-gpu-small-amd",
disabled="see https://github.com/sgl-project/sglang/issues/11127",
)
register_cuda_ci(est_time=73, suite="stage-b-test-1-gpu-small")
register_cuda_ci(est_time=200, suite="stage-b-test-1-gpu-small")

MODEL_TO_CONFIG = {
"Alibaba-NLP/gte-Qwen2-1.5B-instruct": (1, 1e-5),
Expand Down
2 changes: 1 addition & 1 deletion test/registered/ep/test_mooncake_ep_small.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
popen_launch_server,
)

register_cuda_ci(est_time=660, suite="stage-c-test-deepep-4-gpu-h100")
register_cuda_ci(est_time=200, suite="stage-c-test-deepep-4-gpu-h100")

ib_devices = get_rdma_devices_args()

Expand Down
2 changes: 1 addition & 1 deletion test/registered/eval/test_eval_accuracy_large.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
popen_launch_server,
)

register_cuda_ci(est_time=300, suite="stage-b-test-1-gpu-small")
register_cuda_ci(est_time=580, suite="stage-b-test-1-gpu-small")
register_amd_ci(est_time=420, suite="stage-b-test-1-gpu-small-amd")


Expand Down
2 changes: 1 addition & 1 deletion test/registered/function_call/test_kimik2_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from sglang.srt.parser.reasoning_parser import KimiK2Detector as KimiK2ReasoningDetector
from sglang.test.ci.ci_register import register_cpu_ci

register_cpu_ci(1.0, "stage-a-test-cpu")
register_cpu_ci(5, "stage-a-test-cpu")


def _make_tool(name, parameters=None):
Expand Down
2 changes: 1 addition & 1 deletion test/registered/kernels/test_nsa_indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from sglang.srt.server_args import ServerArgs, set_global_server_args_for_scheduler
from sglang.test.test_utils import CustomTestCase

register_cuda_ci(est_time=2, suite="stage-b-test-1-gpu-large")
register_cuda_ci(est_time=15, suite="stage-b-test-1-gpu-large")

# Global configuration for all indexer tests
DEFAULT_CONFIG = {
Expand Down
2 changes: 1 addition & 1 deletion test/registered/lora/test_fused_moe_lora_kernel.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

# ==============================================================================

register_cuda_ci(est_time=120, suite="stage-b-test-1-gpu-large")
register_cuda_ci(est_time=25, suite="stage-b-test-1-gpu-large")


def round_up(x, base):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from sglang.test.runners import SRTRunner

register_cuda_ci(
est_time=25,
est_time=50,
suite="stage-b-test-1-gpu-large",
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
from sglang.test.test_utils import CustomTestCase

register_cuda_ci(
est_time=300,
est_time=160,
suite="stage-c-test-4-gpu-b200",
)

Expand Down
2 changes: 1 addition & 1 deletion test/registered/lora/test_lora_qwen3_8b_logprob_diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
from sglang.test.test_utils import CustomTestCase

register_cuda_ci(
est_time=200,
est_time=40,
suite="stage-b-test-1-gpu-large",
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
from sglang.test.test_utils import CustomTestCase

register_cuda_ci(
est_time=300,
est_time=160,
suite="stage-c-test-4-gpu-b200",
)

Expand Down
2 changes: 1 addition & 1 deletion test/registered/lora/test_lora_tp.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
from sglang.test.test_utils import CustomTestCase, is_in_ci

register_cuda_ci(
est_time=116,
est_time=190,
suite="stage-c-test-8-gpu-h200",
)
register_amd_ci(
Expand Down
2 changes: 1 addition & 1 deletion test/registered/metrics/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
popen_launch_server,
)

register_cuda_ci(est_time=32, suite="stage-b-test-1-gpu-small")
register_cuda_ci(est_time=95, suite="stage-b-test-1-gpu-small")
register_amd_ci(est_time=32, suite="stage-b-test-1-gpu-small-amd")

_MODEL_NAME = "Qwen/Qwen3-0.6B"
Expand Down
2 changes: 1 addition & 1 deletion test/registered/models/test_kimi_linear_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
popen_launch_server,
)

register_cuda_ci(est_time=90, suite="stage-b-test-2-gpu-large")
register_cuda_ci(est_time=180, suite="stage-b-test-2-gpu-large")


class TestKimiLinear(CustomTestCase):
Expand Down
2 changes: 1 addition & 1 deletion test/registered/models/test_nvidia_nemotron_3_nano.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from sglang.test.kits.lm_eval_kit import LMEvalMixin
from sglang.test.server_fixtures.default_fixture import DefaultServerBase

register_cuda_ci(est_time=180, suite="stage-b-test-2-gpu-large")
register_cuda_ci(est_time=660, suite="stage-b-test-2-gpu-large")

NEMOTRON_3_NANO_THINKING_ARGS = [
"--trust-remote-code",
Expand Down
2 changes: 1 addition & 1 deletion test/registered/models/test_nvidia_nemotron_nano_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from sglang.test.kits.eval_accuracy_kit import GSM8KMixin
from sglang.test.server_fixtures.default_fixture import DefaultServerBase

register_cuda_ci(est_time=132, suite="stage-b-test-2-gpu-large")
register_cuda_ci(est_time=240, suite="stage-b-test-2-gpu-large")


class TestNvidiaNemotronNanoV2BF16(GSM8KMixin, DefaultServerBase):
Expand Down
2 changes: 1 addition & 1 deletion test/registered/models/test_transformers_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
popen_launch_server,
)

register_cuda_ci(est_time=245, suite="stage-b-test-1-gpu-small")
register_cuda_ci(est_time=450, suite="stage-b-test-1-gpu-small")
register_amd_ci(est_time=320, suite="stage-b-test-1-gpu-small-amd")


Expand Down
2 changes: 1 addition & 1 deletion test/registered/moe/test_cutedsl_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from sglang.srt.layers.moe.topk import TopKConfig, select_experts
from sglang.test.ci.ci_register import register_cuda_ci

register_cuda_ci(est_time=300, suite="stage-c-test-4-gpu-b200")
register_cuda_ci(est_time=20, suite="stage-c-test-4-gpu-b200")

SKIP_TEST = torch.cuda.get_device_capability() < (10, 0)
SKIP_REASON = "Nvfp4 Requires compute capability of 10 or above."
Expand Down
2 changes: 1 addition & 1 deletion test/registered/moe/test_moe_ep.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
popen_launch_server,
)

register_cuda_ci(est_time=140, suite="stage-b-test-2-gpu-large")
register_cuda_ci(est_time=250, suite="stage-b-test-2-gpu-large")


class TestEp(CustomTestCase):
Expand Down
2 changes: 1 addition & 1 deletion test/registered/moe/test_triton_fused_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from sglang.test.ci.ci_register import register_cuda_ci
from sglang.test.test_utils import CustomTestCase

register_cuda_ci(est_time=89, suite="stage-b-test-1-gpu-large")
register_cuda_ci(est_time=30, suite="stage-b-test-1-gpu-large")


class TestFusedMOE(CustomTestCase):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
popen_launch_server,
)

register_cuda_ci(est_time=7, suite="stage-b-test-1-gpu-small")
register_cuda_ci(est_time=55, suite="stage-b-test-1-gpu-small")
register_amd_ci(est_time=20, suite="stage-b-test-1-gpu-small-amd")


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
popen_launch_server,
)

register_cuda_ci(est_time=120, suite="stage-b-test-1-gpu-small")
register_cuda_ci(est_time=250, suite="stage-b-test-1-gpu-small")
register_amd_ci(est_time=258, suite="stage-b-test-1-gpu-small-amd")


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
popen_launch_server,
)

register_cuda_ci(est_time=6, suite="stage-b-test-1-gpu-small")
register_cuda_ci(est_time=55, suite="stage-b-test-1-gpu-small")
register_amd_ci(est_time=47, suite="stage-b-test-1-gpu-small-amd")


Expand Down
2 changes: 1 addition & 1 deletion test/registered/quant/test_fp8_kernel.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from sglang.test.ci.ci_register import register_cuda_ci
from sglang.test.test_utils import CustomTestCase

register_cuda_ci(est_time=132, suite="stage-b-test-1-gpu-large")
register_cuda_ci(est_time=15, suite="stage-b-test-1-gpu-large")


class TestFP8Base(CustomTestCase):
Expand Down
2 changes: 1 addition & 1 deletion test/registered/quant/test_fp8kv_triton.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
popen_launch_server,
)

register_cuda_ci(est_time=520, suite="stage-b-test-1-gpu-large")
register_cuda_ci(est_time=80, suite="stage-b-test-1-gpu-large")


class TestFP8KVCacheTritonBackend(CustomTestCase):
Expand Down
2 changes: 1 addition & 1 deletion test/registered/quant/test_int8_kernel.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from sglang.test.ci.ci_register import register_cuda_ci
from sglang.test.test_utils import CustomTestCase

register_cuda_ci(est_time=8, suite="stage-b-test-1-gpu-small")
register_cuda_ci(est_time=16, suite="stage-b-test-1-gpu-small")


def native_w8a8_per_token_matmul(A, B, As, Bs, output_dtype=torch.float16):
Expand Down
2 changes: 1 addition & 1 deletion test/registered/quant/test_quant_config_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from sglang.test.ci.ci_register import register_cpu_ci
from sglang.test.test_utils import CustomTestCase

register_cpu_ci(est_time=5, suite="stage-a-test-cpu")
register_cpu_ci(est_time=20, suite="stage-a-test-cpu")


class TestQuantLogString(CustomTestCase):
Expand Down
2 changes: 1 addition & 1 deletion test/registered/quant/test_quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
write_results_to_json,
)

register_cuda_ci(est_time=185, suite="stage-b-test-1-gpu-large")
register_cuda_ci(est_time=370, suite="stage-b-test-1-gpu-large")

MODEL_SCORE_THRESHOLDS = {
"hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4": 0.825,
Expand Down
2 changes: 1 addition & 1 deletion test/registered/quant/test_torchao.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from sglang import Engine
from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci

register_cuda_ci(est_time=103, suite="stage-b-test-1-gpu-small")
register_cuda_ci(est_time=200, suite="stage-b-test-1-gpu-small")
register_amd_ci(est_time=230, suite="stage-b-test-1-gpu-small-amd")
from sglang.lang.chat_template import get_chat_template_by_model_path
from sglang.srt.utils import kill_process_tree
Expand Down
2 changes: 1 addition & 1 deletion test/registered/sampling/test_pytorch_sampling_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
popen_launch_server,
)

register_cuda_ci(est_time=66, suite="stage-b-test-1-gpu-small")
register_cuda_ci(est_time=150, suite="stage-b-test-1-gpu-small")
register_amd_ci(est_time=66, suite="stage-b-test-1-gpu-small-amd")


Expand Down
2 changes: 1 addition & 1 deletion test/registered/scheduler/test_abort.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
run_and_check_memory_leak,
)

register_cuda_ci(est_time=131, suite="stage-b-test-1-gpu-small")
register_cuda_ci(est_time=350, suite="stage-b-test-1-gpu-small")
register_amd_ci(est_time=300, suite="stage-b-test-1-gpu-small-amd")


Expand Down
2 changes: 1 addition & 1 deletion test/registered/scheduler/test_chunked_prefill.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci
from sglang.test.test_utils import CustomTestCase, run_mmlu_test, run_mulit_request_test

register_cuda_ci(est_time=312, suite="stage-b-test-1-gpu-small")
register_cuda_ci(est_time=550, suite="stage-b-test-1-gpu-small")
register_amd_ci(est_time=312, suite="stage-b-test-1-gpu-small-amd")


Expand Down
2 changes: 1 addition & 1 deletion test/registered/scheduler/test_retract_decode.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
)
from sglang.utils import is_in_ci

register_cuda_ci(est_time=311, suite="stage-b-test-1-gpu-small")
register_cuda_ci(est_time=550, suite="stage-b-test-1-gpu-small")
register_amd_ci(est_time=600, suite="stage-b-test-1-gpu-small-amd")


Expand Down
2 changes: 1 addition & 1 deletion test/registered/spec/eagle/test_eagle3_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
DEFAULT_TARGET_MODEL_EAGLE3,
)

register_cuda_ci(est_time=50, suite="stage-b-test-1-gpu-small")
register_cuda_ci(est_time=200, suite="stage-b-test-1-gpu-small")
register_amd_ci(est_time=50, suite="stage-b-test-1-gpu-small")

_is_hip = is_hip()
Expand Down
2 changes: 1 addition & 1 deletion test/registered/spec/eagle/test_eagle_infer_a.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
CustomTestCase,
)

register_cuda_ci(est_time=250, suite="stage-b-test-1-gpu-large")
register_cuda_ci(est_time=450, suite="stage-b-test-1-gpu-large")


class TestEAGLEEngine(CustomTestCase):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
)

# EAGLE with DP attention on B200 (tp=2, dp=2, requires 4 B200 GPUs)
register_cuda_ci(est_time=300, suite="stage-c-test-4-gpu-b200")
register_cuda_ci(est_time=100, suite="stage-c-test-4-gpu-b200")


def test_gsm8k(base_url: str, model: str):
Expand Down
2 changes: 1 addition & 1 deletion test/registered/spec/utils/test_build_eagle_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from sglang.srt.utils import get_device
from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci

register_cuda_ci(est_time=3, suite="stage-b-test-1-gpu-small")
register_cuda_ci(est_time=6, suite="stage-b-test-1-gpu-small")
register_amd_ci(est_time=3, suite="stage-b-test-1-gpu-small-amd")


Expand Down
2 changes: 1 addition & 1 deletion test/registered/spec/utils/test_ngram_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from sglang.test.ci.ci_register import register_cuda_ci
from sglang.test.test_utils import CustomTestCase

register_cuda_ci(est_time=30, suite="stage-b-test-1-gpu-small")
register_cuda_ci(est_time=8, suite="stage-b-test-1-gpu-small")


def _make_corpus(match_type="BFS", **kwargs):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from sglang.srt.function_call.qwen3_coder_detector import Qwen3CoderDetector
from sglang.test.ci.ci_register import register_cpu_ci

register_cpu_ci(1.0, "stage-a-test-cpu")
register_cpu_ci(15, "stage-a-test-cpu")


class TestPythonicDetector(unittest.TestCase):
Expand Down
Loading
Loading