diff --git a/test/registered/models/test_gpt_oss_models_pcg.py b/test/registered/models/test_gpt_oss_models_pcg.py deleted file mode 100644 index 438f127c2a4f..000000000000 --- a/test/registered/models/test_gpt_oss_models_pcg.py +++ /dev/null @@ -1,72 +0,0 @@ -""" -GPT-OSS piecewise CUDA graph tests. -""" - -import unittest -from types import SimpleNamespace - -from sglang.srt.utils import kill_process_tree -from sglang.test.ci.ci_register import register_cuda_ci -from sglang.test.few_shot_gsm8k import run_eval -from sglang.test.test_utils import ( - DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, - DEFAULT_URL_FOR_TEST, - CustomTestCase, - popen_launch_server, -) - -register_cuda_ci( - est_time=400, - suite="stage-b-test-2-gpu-large", -) - -GPT_OSS_MODEL = "openai/gpt-oss-120b" - -ACC_THRESHOLDS = { - GPT_OSS_MODEL: {"gsm8k": 0.81}, -} - - -class TestGptOssPiecewiseCudaGraph(CustomTestCase): - - @classmethod - def setUpClass(cls): - cls.model = GPT_OSS_MODEL - cls.base_url = DEFAULT_URL_FOR_TEST - cls.process = popen_launch_server( - cls.model, - cls.base_url, - timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, - other_args=[ - "--tp", - "2", - "--trust-remote-code", - "--reasoning-parser", - "gpt-oss", - "--enable-piecewise-cuda-graph", - ], - ) - - @classmethod - def tearDownClass(cls): - kill_process_tree(cls.process.pid) - - def test_gsm8k(self): - args = SimpleNamespace( - num_shots=5, - data_path=None, - num_questions=200, - max_new_tokens=512, - parallel=128, - host="http://127.0.0.1", - port=int(self.base_url.split(":")[-1]), - ) - metrics = run_eval(args) - print(f"{metrics=}") - self.assertGreaterEqual( - metrics["accuracy"], ACC_THRESHOLDS[self.model]["gsm8k"] - ) - - -if __name__ == "__main__": - unittest.main() diff --git a/test/registered/models/test_kimi_linear_models_pcg.py b/test/registered/models/test_kimi_linear_models_pcg.py deleted file mode 100644 index 32314f2b80f7..000000000000 --- a/test/registered/models/test_kimi_linear_models_pcg.py +++ /dev/null @@ -1,69 +0,0 @@ -""" -Kimi-Linear piecewise CUDA graph tests. -""" - -import unittest -from types import SimpleNamespace - -from sglang.srt.utils import kill_process_tree -from sglang.test.ci.ci_register import register_cuda_ci -from sglang.test.few_shot_gsm8k import run_eval -from sglang.test.test_utils import ( - DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, - DEFAULT_URL_FOR_TEST, - CustomTestCase, - popen_launch_server, -) - -register_cuda_ci( - est_time=100, - suite="stage-b-test-2-gpu-large", -) - -KIMI_LINEAR_MODEL = "moonshotai/Kimi-Linear-48B-A3B-Instruct" - -ACC_THRESHOLDS = { - KIMI_LINEAR_MODEL: {"gsm8k": 0.88}, -} - - -class TestKimiLinearPiecewiseCudaGraph(CustomTestCase): - - @classmethod - def setUpClass(cls): - cls.model = KIMI_LINEAR_MODEL - cls.base_url = DEFAULT_URL_FOR_TEST - cls.process = popen_launch_server( - cls.model, - cls.base_url, - timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, - other_args=[ - "--tp", - "2", - "--trust-remote-code", - ], - ) - - @classmethod - def tearDownClass(cls): - kill_process_tree(cls.process.pid) - - def test_gsm8k(self): - args = SimpleNamespace( - num_shots=5, - data_path=None, - num_questions=200, - max_new_tokens=512, - parallel=128, - host="http://127.0.0.1", - port=int(self.base_url.split(":")[-1]), - ) - metrics = run_eval(args) - print(f"{metrics=}") - self.assertGreaterEqual( - metrics["accuracy"], ACC_THRESHOLDS[self.model]["gsm8k"] - ) - - -if __name__ == "__main__": - unittest.main() diff --git a/test/registered/models/test_qwen3_next_models_pcg.py b/test/registered/models/test_qwen3_next_models_pcg.py deleted file mode 100644 index 593b6d70079e..000000000000 --- a/test/registered/models/test_qwen3_next_models_pcg.py +++ /dev/null @@ -1,29 +0,0 @@ -""" -Qwen3 Next piecewise CUDA graph tests. -""" - -import unittest - -from sglang.test.ci.ci_register import register_cuda_ci -from sglang.test.kits.eval_accuracy_kit import GSM8KMixin -from sglang.test.server_fixtures.default_fixture import DefaultServerBase - -register_cuda_ci( - est_time=400, - suite="stage-c-test-4-gpu-h100", -) - -QWEN3_NEXT_MODEL = "Qwen/Qwen3-Next-80B-A3B-Instruct" - - -class TestQwen3NextPiecewiseCudaGraph(GSM8KMixin, DefaultServerBase): - model = QWEN3_NEXT_MODEL - gsm8k_accuracy_thres = 0.93 - other_args = [ - "--tp", - "4", - ] - - -if __name__ == "__main__": - unittest.main()