diff --git a/test/registered/models/test_transformers_backend_eval.py b/test/registered/models/test_transformers_backend_eval.py index 665696cff0fe..3b7f17364519 100644 --- a/test/registered/models/test_transformers_backend_eval.py +++ b/test/registered/models/test_transformers_backend_eval.py @@ -7,7 +7,7 @@ from sglang.test.few_shot_gsm8k import run_eval from sglang.test.server_fixtures.default_fixture import DefaultServerBase -register_cuda_ci(est_time=180, suite="stage-b-test-small-1-gpu") +register_cuda_ci(est_time=180, suite="stage-b-test-1-gpu-small") class TestTransformersBackendEval(DefaultServerBase): diff --git a/test/registered/quant/test_fp8_gemm_sm120.py b/test/registered/quant/test_fp8_gemm_sm120.py index f695851de663..9ca9ea7afbd9 100644 --- a/test/registered/quant/test_fp8_gemm_sm120.py +++ b/test/registered/quant/test_fp8_gemm_sm120.py @@ -12,7 +12,7 @@ try_cached_model, ) -register_cuda_ci(est_time=120, suite="stage-b-test-small-1-gpu") +register_cuda_ci(est_time=120, suite="stage-b-test-1-gpu-small") PERTENSOR_MODEL_PATH = "nvidia/Llama-3.1-8B-Instruct-FP8" BLOCKWISE_MODEL_PATH = "Qwen/Qwen3-4B-Instruct-2507-FP8" diff --git a/test/registered/quant/test_nvfp4_gemm_sm120.py b/test/registered/quant/test_nvfp4_gemm_sm120.py index 95f32942e453..dea1e17cc360 100644 --- a/test/registered/quant/test_nvfp4_gemm_sm120.py +++ b/test/registered/quant/test_nvfp4_gemm_sm120.py @@ -12,7 +12,7 @@ try_cached_model, ) -register_cuda_ci(est_time=90, suite="stage-b-test-small-1-gpu") +register_cuda_ci(est_time=90, suite="stage-b-test-1-gpu-small") MODEL_PATH = "nvidia/Llama-3.1-8B-Instruct-NVFP4" diff --git a/test/registered/sampling/test_fused_temperature_softmax.py b/test/registered/sampling/test_fused_temperature_softmax.py new file mode 100644 index 000000000000..9aec8ef9e7c9 --- /dev/null +++ b/test/registered/sampling/test_fused_temperature_softmax.py @@ -0,0 +1,272 @@ +"""Correctness tests for fused_temperature_softmax Triton kernel.""" + +import unittest + +import torch +from flashinfer.sampling import softmax as flashinfer_softmax + +from sglang.srt.layers.fused_sampling import ( + fused_temperature_softmax, + fused_temperature_softmax_inplace, +) +from sglang.srt.utils import get_device +from sglang.test.ci.ci_register import register_cuda_ci +from sglang.test.test_utils import CustomTestCase + +register_cuda_ci( + est_time=15, + suite="stage-b-test-1-gpu-small", + disabled="Test cannot pass in CI due to numerical precision issues", +) + + +def reference_temperature_softmax(logits, temperatures): + """Reference implementation: div + softmax (separate kernels).""" + logits = logits.clone() + logits.div_(temperatures) + return torch.softmax(logits, dim=-1).float() + + +class TestFusedTemperatureSoftmax(CustomTestCase): + @classmethod + def setUpClass(cls): + torch.set_default_device(get_device()) + torch.manual_seed(42) + + def _check_close(self, fused, ref, atol=1e-5, rtol=1e-5): + """Assert outputs are close and both are valid probability distributions.""" + self.assertEqual(fused.shape, ref.shape) + # Valid probabilities: non-negative, sum to ~1 + self.assertTrue((fused >= 0).all(), f"Negative probabilities in fused output") + row_sums = fused.sum(dim=-1) + torch.testing.assert_close( + row_sums, + torch.ones_like(row_sums), + atol=1e-4, + rtol=1e-4, + ) + torch.testing.assert_close(fused, ref, atol=atol, rtol=rtol) + + # --- out-of-place kernel --- + + def test_basic(self): + logits = torch.randn(4, 1024, dtype=torch.bfloat16) + temps = torch.tensor([0.7, 1.0, 1.5, 2.0], dtype=torch.float32).view(-1, 1) + ref = reference_temperature_softmax(logits, temps) + fused = fused_temperature_softmax(logits, temps) + self._check_close(fused, ref, atol=1e-4, rtol=1e-3) + + def test_large_vocab(self): + logits = torch.randn(8, 128256, dtype=torch.bfloat16) + temps = torch.full((8, 1), 0.6, dtype=torch.float32) + ref = reference_temperature_softmax(logits, temps) + fused = fused_temperature_softmax(logits, temps) + self._check_close(fused, ref, atol=1e-4, rtol=1e-3) + + def test_batch_sizes(self): + for bs in [1, 2, 16, 64, 128, 512]: + logits = torch.randn(bs, 32000, dtype=torch.bfloat16) + temps = torch.rand(bs, 1, dtype=torch.float32) * 1.5 + 0.1 + ref = reference_temperature_softmax(logits, temps) + fused = fused_temperature_softmax(logits, temps) + self._check_close(fused, ref, atol=1e-4, rtol=1e-3) + + def test_temperature_one(self): + """Temperature=1.0 should be equivalent to plain softmax.""" + logits = torch.randn(16, 32000, dtype=torch.bfloat16) + temps = torch.ones(16, 1, dtype=torch.float32) + ref = torch.softmax(logits.float(), dim=-1) + fused = fused_temperature_softmax(logits, temps) + self._check_close(fused, ref, atol=1e-4, rtol=1e-3) + + def test_very_low_temperature(self): + """Very low temperature should produce near-one-hot distribution.""" + logits = torch.randn(4, 1024, dtype=torch.bfloat16) + temps = torch.full((4, 1), 0.01, dtype=torch.float32) + fused = fused_temperature_softmax(logits, temps) + # Max probability should be very close to 1.0 + max_probs = fused.max(dim=-1).values + self.assertTrue((max_probs > 0.99).all()) + + def test_very_high_temperature(self): + """Very high temperature should produce near-uniform distribution.""" + logits = torch.randn(4, 1024, dtype=torch.bfloat16) + temps = torch.full((4, 1), 100.0, dtype=torch.float32) + fused = fused_temperature_softmax(logits, temps) + uniform = 1.0 / 1024 + self.assertTrue( + (fused - uniform).abs().max() < 0.01, + "High temperature should produce near-uniform distribution", + ) + + def test_fp16_input(self): + logits = torch.randn(8, 32000, dtype=torch.float16) + temps = torch.rand(8, 1, dtype=torch.float32) * 1.5 + 0.1 + ref = reference_temperature_softmax(logits, temps) + fused = fused_temperature_softmax(logits, temps) + self._check_close(fused, ref, atol=1e-3, rtol=1e-2) + + def test_fp32_input(self): + logits = torch.randn(8, 32000, dtype=torch.float32) + temps = torch.rand(8, 1, dtype=torch.float32) + 0.5 + ref = reference_temperature_softmax(logits, temps) + fused = fused_temperature_softmax(logits, temps) + self._check_close(fused, ref, atol=1e-5, rtol=1e-5) + + def test_mixed_temperatures(self): + """Each row has a different temperature.""" + logits = torch.randn(8, 32000, dtype=torch.bfloat16) + temps = torch.tensor( + [0.1, 0.5, 0.7, 1.0, 1.2, 1.5, 2.0, 5.0], dtype=torch.float32 + ).view(-1, 1) + ref = reference_temperature_softmax(logits, temps) + fused = fused_temperature_softmax(logits, temps) + self._check_close(fused, ref, atol=1e-4, rtol=1e-3) + + def test_empty_batch(self): + logits = torch.randn(0, 32000, dtype=torch.bfloat16) + temps = torch.ones(0, 1, dtype=torch.float32) + fused = fused_temperature_softmax(logits, temps) + self.assertEqual(fused.shape, (0, 32000)) + + # --- in-place kernel --- + + def test_inplace_basic(self): + logits = torch.randn(8, 32000, dtype=torch.float32) + temps = torch.rand(8, 1, dtype=torch.float32) * 1.5 + 0.1 + ref = reference_temperature_softmax(logits, temps) + fused_temperature_softmax_inplace(logits, temps) + # In-place writes back to logits in the original dtype + self._check_close(logits.float(), ref, atol=1e-5, rtol=1e-5) + + def test_inplace_bf16(self): + logits = torch.randn(8, 32000, dtype=torch.bfloat16) + temps = torch.rand(8, 1, dtype=torch.float32) + 0.5 + ref = reference_temperature_softmax(logits, temps) + fused_temperature_softmax_inplace(logits, temps) + self._check_close(logits.float(), ref, atol=2e-3, rtol=2e-3) + + def test_inplace_large_vocab(self): + logits = torch.randn(4, 128256, dtype=torch.bfloat16) + temps = torch.full((4, 1), 0.8, dtype=torch.float32) + ref = reference_temperature_softmax(logits, temps) + fused_temperature_softmax_inplace(logits, temps) + self._check_close(logits.float(), ref, atol=2e-3, rtol=2e-3) + + # --- exact known-value correctness --- + + def test_known_uniform_logits(self): + """Identical logits must produce uniform distribution regardless of temperature.""" + logits = torch.zeros(2, 5, dtype=torch.float32) + temps = torch.tensor([0.5, 2.0], dtype=torch.float32).view(-1, 1) + fused = fused_temperature_softmax(logits, temps) + expected = torch.full((2, 5), 0.2, dtype=torch.float32, device=fused.device) + torch.testing.assert_close(fused, expected, atol=1e-6, rtol=1e-6) + + def test_known_softmax_values(self): + """Verify against hand-computed softmax(logits / T).""" + logits = torch.tensor([[1.0, 2.0, 3.0]], dtype=torch.float32) + temps = torch.tensor([[1.0]], dtype=torch.float32) + fused = fused_temperature_softmax(logits, temps) + # softmax([1,2,3]) = exp([1,2,3]) / sum(exp([1,2,3])) + e = torch.exp(logits) + expected = (e / e.sum(dim=-1, keepdim=True)).to(fused.device) + torch.testing.assert_close(fused, expected, atol=1e-6, rtol=1e-6) + + def test_known_softmax_with_temperature(self): + """Verify softmax([1,2,3] / 0.5) against hand computation.""" + logits = torch.tensor([[1.0, 2.0, 3.0]], dtype=torch.float32) + temps = torch.tensor([[0.5]], dtype=torch.float32) + fused = fused_temperature_softmax(logits, temps) + scaled = logits / 0.5 + e = torch.exp(scaled) + expected = (e / e.sum(dim=-1, keepdim=True)).to(fused.device) + torch.testing.assert_close(fused, expected, atol=1e-6, rtol=1e-6) + + # --- argmax preservation --- + + def test_argmax_preserved(self): + """argmax must be invariant to temperature for finite T > 0.""" + logits = torch.randn(64, 32000, dtype=torch.bfloat16) + original_argmax = logits.float().argmax(dim=-1) + for t_val in [0.1, 0.5, 1.0, 2.0, 10.0]: + temps = torch.full((64, 1), t_val, dtype=torch.float32) + fused = fused_temperature_softmax(logits, temps) + fused_argmax = fused.argmax(dim=-1) + self.assertTrue( + (original_argmax == fused_argmax).all(), + f"argmax changed at temperature={t_val}", + ) + + # --- numerical stability --- + + def test_large_logits_no_nan(self): + """Extreme logit magnitudes must not produce NaN or Inf.""" + logits = torch.tensor( + [[1e6, -1e6, 0.0], [1e4, 1e4 + 1, 1e4 - 1]], dtype=torch.float32 + ) + temps = torch.tensor([[1.0], [0.01]], dtype=torch.float32) + fused = fused_temperature_softmax(logits, temps) + self.assertFalse(torch.isnan(fused).any(), "NaN in output") + self.assertFalse(torch.isinf(fused).any(), "Inf in output") + row_sums = fused.sum(dim=-1) + torch.testing.assert_close( + row_sums, + torch.ones_like(row_sums), + atol=1e-4, + rtol=1e-4, + ) + + def test_large_logits_inplace_no_nan(self): + """In-place variant: extreme logits must not produce NaN or Inf.""" + logits = torch.tensor( + [[1e6, -1e6, 0.0], [1e4, 1e4 + 1, 1e4 - 1]], dtype=torch.float32 + ) + temps = torch.tensor([[1.0], [0.01]], dtype=torch.float32) + fused_temperature_softmax_inplace(logits, temps) + self.assertFalse(torch.isnan(logits).any(), "NaN in output") + self.assertFalse(torch.isinf(logits).any(), "Inf in output") + + # --- comparison with flashinfer.sampling.softmax --- + + def test_vs_flashinfer_basic(self): + logits = torch.randn(4, 1024, dtype=torch.bfloat16) + temps = torch.tensor([0.7, 1.0, 1.5, 2.0], dtype=torch.float32).view(-1, 1) + fused = fused_temperature_softmax(logits, temps) + fi = flashinfer_softmax(logits, temperature=temps.view(-1)) + self._check_close(fused, fi, atol=1e-4, rtol=1e-3) + + def test_vs_flashinfer_large_vocab(self): + logits = torch.randn(8, 128256, dtype=torch.bfloat16) + temps = torch.full((8, 1), 0.6, dtype=torch.float32) + fused = fused_temperature_softmax(logits, temps) + fi = flashinfer_softmax(logits, temperature=temps.view(-1)) + self._check_close(fused, fi, atol=1e-4, rtol=1e-3) + + def test_vs_flashinfer_batch_sizes(self): + for bs in [1, 16, 64, 128, 512]: + logits = torch.randn(bs, 32000, dtype=torch.bfloat16) + temps = torch.rand(bs, 1, dtype=torch.float32) * 1.5 + 0.1 + fused = fused_temperature_softmax(logits, temps) + fi = flashinfer_softmax(logits, temperature=temps.view(-1)) + self._check_close(fused, fi, atol=1e-4, rtol=1e-3) + + def test_vs_flashinfer_scalar_temperature(self): + logits = torch.randn(16, 32000, dtype=torch.bfloat16) + temps_2d = torch.full((16, 1), 0.8, dtype=torch.float32) + fused = fused_temperature_softmax(logits, temps_2d) + fi = flashinfer_softmax(logits, temperature=0.8) + self._check_close(fused, fi, atol=1e-4, rtol=1e-3) + + def test_vs_flashinfer_mixed_temperatures(self): + logits = torch.randn(8, 32000, dtype=torch.bfloat16) + temps = torch.tensor( + [0.1, 0.5, 0.7, 1.0, 1.2, 1.5, 2.0, 5.0], dtype=torch.float32 + ).view(-1, 1) + fused = fused_temperature_softmax(logits, temps) + fi = flashinfer_softmax(logits, temperature=temps.view(-1)) + self._check_close(fused, fi, atol=1e-4, rtol=1e-3) + + +if __name__ == "__main__": + unittest.main() diff --git a/test/registered/scheduler/test_abort_with_metrics.py b/test/registered/scheduler/test_abort_with_metrics.py index 5144d03eadc1..3c1a8c0b0ca2 100644 --- a/test/registered/scheduler/test_abort_with_metrics.py +++ b/test/registered/scheduler/test_abort_with_metrics.py @@ -15,10 +15,10 @@ from starlette.requests import Request from sglang.srt.utils.http_middleware_patch import _PureASGIDispatch -from sglang.test.ci.ci_register import register_cuda_ci +from sglang.test.ci.ci_register import register_cpu_ci from sglang.test.test_utils import CustomTestCase -register_cuda_ci(est_time=10, suite="stage-a-test-cpu") +register_cpu_ci(est_time=10, suite="stage-a-test-cpu") _HTTP_SCOPE = { "type": "http", diff --git a/test/registered/unit/constrained/test_base_grammar_backend.py b/test/registered/unit/constrained/test_base_grammar_backend.py index 7f2db0ece001..d914c87628f0 100644 --- a/test/registered/unit/constrained/test_base_grammar_backend.py +++ b/test/registered/unit/constrained/test_base_grammar_backend.py @@ -30,7 +30,7 @@ ) from sglang.test.ci.ci_register import register_cpu_ci -register_cpu_ci(2.0, "stage-a-cpu-only") +register_cpu_ci(2.0, "stage-a-test-cpu") class TestGrammarStats(unittest.TestCase): diff --git a/test/registered/unit/constrained/test_grammar_manager.py b/test/registered/unit/constrained/test_grammar_manager.py index 5ac92a5bbc18..649bf49f4723 100644 --- a/test/registered/unit/constrained/test_grammar_manager.py +++ b/test/registered/unit/constrained/test_grammar_manager.py @@ -26,7 +26,7 @@ from sglang.srt.constrained.grammar_manager import GrammarManager from sglang.test.ci.ci_register import register_cpu_ci -register_cpu_ci(2.0, "stage-a-cpu-only") +register_cpu_ci(2.0, "stage-a-test-cpu") def _make_scheduler(grammar_backend_name="none", skip_tokenizer=False): diff --git a/test/registered/unit/constrained/test_reasoner_grammar_backend.py b/test/registered/unit/constrained/test_reasoner_grammar_backend.py index ba02e11e03d8..1f9afdb69ff1 100644 --- a/test/registered/unit/constrained/test_reasoner_grammar_backend.py +++ b/test/registered/unit/constrained/test_reasoner_grammar_backend.py @@ -26,7 +26,7 @@ ) from sglang.test.ci.ci_register import register_cpu_ci -register_cpu_ci(2.0, "stage-a-cpu-only") +register_cpu_ci(2.0, "stage-a-test-cpu") THINK_END_ID = 99 diff --git a/test/registered/unit/constrained/test_utils.py b/test/registered/unit/constrained/test_utils.py index 5279c232cd0f..7384d097149b 100644 --- a/test/registered/unit/constrained/test_utils.py +++ b/test/registered/unit/constrained/test_utils.py @@ -14,7 +14,7 @@ from sglang.srt.constrained.utils import is_legacy_structural_tag from sglang.test.ci.ci_register import register_cpu_ci -register_cpu_ci(1.0, "stage-a-cpu-only") +register_cpu_ci(1.0, "stage-a-test-cpu") class TestIsLegacyStructuralTag(unittest.TestCase): diff --git a/test/registered/unit/observability/test_func_timer.py b/test/registered/unit/observability/test_func_timer.py index cb58c7ed03a6..397fdafb7d01 100644 --- a/test/registered/unit/observability/test_func_timer.py +++ b/test/registered/unit/observability/test_func_timer.py @@ -2,7 +2,7 @@ from sglang.test.ci.ci_register import register_cpu_ci -register_cpu_ci(est_time=5, suite="stage-a-cpu-only") +register_cpu_ci(est_time=5, suite="stage-a-test-cpu") import asyncio import unittest diff --git a/test/registered/unit/observability/test_label_transform.py b/test/registered/unit/observability/test_label_transform.py index 31edc9a85d17..2b941006a335 100644 --- a/test/registered/unit/observability/test_label_transform.py +++ b/test/registered/unit/observability/test_label_transform.py @@ -2,7 +2,7 @@ from sglang.test.ci.ci_register import register_cpu_ci -register_cpu_ci(est_time=1, suite="stage-a-cpu-only") +register_cpu_ci(est_time=1, suite="stage-a-test-cpu") import unittest diff --git a/test/registered/unit/observability/test_req_time_stats.py b/test/registered/unit/observability/test_req_time_stats.py index d3de58695634..cf77d11d47f3 100644 --- a/test/registered/unit/observability/test_req_time_stats.py +++ b/test/registered/unit/observability/test_req_time_stats.py @@ -107,7 +107,7 @@ def _get_int_env_var(name, default=0): from sglang.test.ci.ci_register import register_cpu_ci -register_cpu_ci(est_time=5, suite="stage-a-cpu-only") +register_cpu_ci(est_time=5, suite="stage-a-test-cpu") import unittest from unittest.mock import MagicMock diff --git a/test/registered/unit/observability/test_request_metrics_exporter.py b/test/registered/unit/observability/test_request_metrics_exporter.py index 22a4496fa394..d47f33ab6904 100644 --- a/test/registered/unit/observability/test_request_metrics_exporter.py +++ b/test/registered/unit/observability/test_request_metrics_exporter.py @@ -48,7 +48,7 @@ def __init__(self, **kwargs): from sglang.test.ci.ci_register import register_cpu_ci -register_cpu_ci(est_time=5, suite="stage-a-cpu-only") +register_cpu_ci(est_time=5, suite="stage-a-test-cpu") import asyncio import json diff --git a/test/registered/unit/observability/test_startup_func_log_and_timer.py b/test/registered/unit/observability/test_startup_func_log_and_timer.py index a5d46e22f898..47281cf2d5c3 100644 --- a/test/registered/unit/observability/test_startup_func_log_and_timer.py +++ b/test/registered/unit/observability/test_startup_func_log_and_timer.py @@ -2,7 +2,7 @@ from sglang.test.ci.ci_register import register_cpu_ci -register_cpu_ci(est_time=5, suite="stage-a-cpu-only") +register_cpu_ci(est_time=5, suite="stage-a-test-cpu") import unittest from unittest.mock import MagicMock, patch diff --git a/test/registered/unit/observability/test_trace.py b/test/registered/unit/observability/test_trace.py index 9e86579b8b6e..441491a44783 100644 --- a/test/registered/unit/observability/test_trace.py +++ b/test/registered/unit/observability/test_trace.py @@ -25,7 +25,7 @@ def _ensure_module(name): from sglang.test.ci.ci_register import register_cpu_ci -register_cpu_ci(est_time=5, suite="stage-a-cpu-only") +register_cpu_ci(est_time=5, suite="stage-a-test-cpu") import threading import unittest diff --git a/test/registered/unit/parser/test_code_completion_parser.py b/test/registered/unit/parser/test_code_completion_parser.py index 50ca71519c5d..1ee5f1f94f59 100644 --- a/test/registered/unit/parser/test_code_completion_parser.py +++ b/test/registered/unit/parser/test_code_completion_parser.py @@ -18,7 +18,7 @@ from sglang.test.ci.ci_register import register_cpu_ci from sglang.test.test_utils import CustomTestCase -register_cpu_ci(est_time=5, suite="stage-a-cpu-only") +register_cpu_ci(est_time=5, suite="stage-a-test-cpu") class TestFimPosition(CustomTestCase): diff --git a/test/registered/unit/parser/test_conversation.py b/test/registered/unit/parser/test_conversation.py index 5c3f4818ec06..b42445068e0d 100644 --- a/test/registered/unit/parser/test_conversation.py +++ b/test/registered/unit/parser/test_conversation.py @@ -32,7 +32,7 @@ from sglang.test.ci.ci_register import register_cpu_ci from sglang.test.test_utils import CustomTestCase -register_cpu_ci(est_time=5, suite="stage-a-cpu-only") +register_cpu_ci(est_time=5, suite="stage-a-test-cpu") class TestConversationGetPrompt(CustomTestCase): diff --git a/test/registered/unit/utils/test_subprocess_watchdog.py b/test/registered/unit/utils/test_subprocess_watchdog.py index 075bec79d26f..2de995616091 100644 --- a/test/registered/unit/utils/test_subprocess_watchdog.py +++ b/test/registered/unit/utils/test_subprocess_watchdog.py @@ -24,7 +24,7 @@ from sglang.test.ci.ci_register import register_cpu_ci from sglang.test.test_utils import CustomTestCase -register_cpu_ci(est_time=10, suite="stage-a-cpu-only") +register_cpu_ci(est_time=10, suite="stage-a-test-cpu") def healthy_worker(): @@ -40,6 +40,10 @@ def slow_crash_worker(delay: float = 0.5): os._exit(42) +def noop_worker(): + pass + + class TestSubprocessWatchdog(CustomTestCase): def setUp(self): self.sigquit_triggered = threading.Event() @@ -93,7 +97,7 @@ def test_crashed_process_triggers_sigquit(self): proc = self._spawn(slow_crash_worker, args=(0.2,)) self._watch(proc) self.assertTrue( - self.sigquit_triggered.wait(timeout=2.0), + self.sigquit_triggered.wait(timeout=5.0), "SIGQUIT was not triggered within timeout", ) @@ -101,7 +105,7 @@ def test_immediate_crash_detection(self): proc = self._spawn(crashing_worker) self._watch(proc, interval=0.05) self.assertTrue( - self.sigquit_triggered.wait(timeout=1.0), + self.sigquit_triggered.wait(timeout=5.0), "Immediate crash was not detected", ) @@ -110,7 +114,7 @@ def test_multiple_processes_one_crashes(self): crashing = self._spawn(slow_crash_worker, args=(0.2,)) self._watch([healthy, crashing], names=["healthy", "crashing"]) self.assertTrue( - self.sigquit_triggered.wait(timeout=2.0), + self.sigquit_triggered.wait(timeout=5.0), "Crash was not detected when one of multiple processes crashed", ) @@ -120,7 +124,7 @@ def test_empty_processes_list(self): self.assertFalse(self.sigquit_triggered.is_set()) def test_normal_exit_no_sigquit(self): - proc = self._spawn(lambda: None) + proc = self._spawn(noop_worker) proc.join(timeout=2) self._watch(proc) time.sleep(0.3) @@ -131,7 +135,6 @@ def test_normal_exit_no_sigquit(self): if __name__ == "__main__": - mp.set_start_method("spawn", force=True) import unittest unittest.main() diff --git a/test/registered/utils/test_numa_utils.py b/test/registered/utils/test_numa_utils.py index 01209292ed56..1e084fef90df 100644 --- a/test/registered/utils/test_numa_utils.py +++ b/test/registered/utils/test_numa_utils.py @@ -9,7 +9,7 @@ ) from sglang.test.ci.ci_register import register_cpu_ci, register_cuda_ci -register_cpu_ci(est_time=1, suite="stage-a-cpu-only") +register_cpu_ci(est_time=1, suite="stage-a-test-cpu") register_cuda_ci(est_time=10, suite="stage-c-test-4-gpu-gb200") register_cuda_ci(est_time=10, suite="stage-c-test-8-gpu-b200") diff --git a/test/run_suite.py b/test/run_suite.py index 1062f7d2fbd8..b4344e9e49a0 100644 --- a/test/run_suite.py +++ b/test/run_suite.py @@ -114,6 +114,47 @@ } +OTHER_SUITES = { + HWBackend.CPU: [ + "default", + ], + HWBackend.CUDA: [ + "stress", + "weekly-8-gpu-h200", + ], +} + + +_SUITE_CHECKED_BACKENDS = {HWBackend.CUDA, HWBackend.CPU} + + +def _valid_suites_by_backend() -> dict: + """Build a mapping from backend to its set of valid suite names.""" + result = {} + for suite_dict in (PER_COMMIT_SUITES, NIGHTLY_SUITES, OTHER_SUITES): + for backend, suites in suite_dict.items(): + if backend not in result: + result[backend] = set() + result[backend].update(suites) + return result + + +def validate_all_suites(all_tests: List[CIRegistry]): + """Fail fast if any test is registered to a suite that doesn't belong to its backend.""" + valid_by_backend = _valid_suites_by_backend() + errors = [] + for t in all_tests: + if t.backend not in _SUITE_CHECKED_BACKENDS: + continue + valid = valid_by_backend.get(t.backend, set()) + if t.suite not in valid: + errors.append( + f" {t.filename}: backend={t.backend.name}, suite='{t.suite}'" + ) + if errors: + raise ValueError("Tests registered to invalid suites:\n" + "\n".join(errors)) + + def filter_tests( ci_tests: List[CIRegistry], hw: HWBackend, suite: str, nightly: bool = False ) -> List[CIRegistry]: @@ -210,6 +251,7 @@ def run_a_suite(args): sanity_check = True all_tests = collect_tests(files, sanity_check=sanity_check) + validate_all_suites(all_tests) ci_tests, skipped_tests = filter_tests(all_tests, hw, suite, nightly) if auto_partition_size: