sgl-project · Kangyan-Zhou · Apr 8, 2026 · Apr 8, 2026
diff --git a/test/registered/8-gpu-models/test_deepseek_v31.py b/test/registered/8-gpu-models/test_deepseek_v31.py
@@ -1,11 +1,14 @@
 import unittest
 
 from sglang.test.accuracy_test_runner import AccuracyTestParams
+from sglang.test.ci.ci_register import register_cuda_ci
 from sglang.test.performance_test_runner import PerformanceTestParams
 from sglang.test.run_combined_tests import run_combined_tests
 from sglang.test.test_utils import ModelLaunchSettings
 
-# Manual-only: not registered in any CI suite
+# Runs on both H200 and B200 via nightly-8-gpu-common suite
+register_cuda_ci(est_time=5400, suite="nightly-8-gpu-common", nightly=True)
+
 DEEPSEEK_V31_MODEL_PATH = "deepseek-ai/DeepSeek-V3.1"
 
 

diff --git a/test/registered/8-gpu-models/test_glm_46.py b/test/registered/8-gpu-models/test_glm_46.py
@@ -0,0 +1,52 @@
+import unittest
+
+from sglang.test.accuracy_test_runner import AccuracyTestParams
+from sglang.test.ci.ci_register import register_cuda_ci
+from sglang.test.performance_test_runner import PerformanceTestParams
+from sglang.test.run_combined_tests import run_combined_tests
+from sglang.test.test_utils import ModelLaunchSettings
+
+# Runs on both H200 and B200 via nightly-8-gpu-common suite
+register_cuda_ci(est_time=1800, suite="nightly-8-gpu-common", nightly=True)
+
+GLM_4_6_MODEL_PATH = "zai-org/GLM-4.6"
+
+
+class TestGLM46(unittest.TestCase):
+    """Unified test class for GLM-4.6 performance and accuracy.
+
+    Single variant with simple TP=8 configuration.
+    GLM-4.6 is a 357B MoE model.
+    Runs BOTH:
+    - Performance test (using NightlyBenchmarkRunner)
+    - Accuracy test (using run_eval with mgsm_en)
+    """
+
+    def test_glm_46(self):
+        """Run performance and accuracy for GLM-4.6."""
+        base_args = [
+            "--tp=8",
+            "--trust-remote-code",
+        ]
+
+        variants = [
+            ModelLaunchSettings(
+                GLM_4_6_MODEL_PATH,
+                tp_size=8,
+                extra_args=base_args,
+                variant="TP8",
+            ),
+        ]
+
+        run_combined_tests(
+            models=variants,
+            test_name="GLM-4.6",
+            accuracy_params=AccuracyTestParams(dataset="gsm8k", baseline_accuracy=0.80),
+            performance_params=PerformanceTestParams(
+                profile_dir="performance_profiles_glm_4_6",
+            ),
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/test/registered/8-gpu-models/test_glm_46_fp8.py b/test/registered/8-gpu-models/test_glm_46_fp8.py
@@ -1,11 +1,14 @@
 import unittest
 
 from sglang.test.accuracy_test_runner import AccuracyTestParams
+from sglang.test.ci.ci_register import register_cuda_ci
 from sglang.test.performance_test_runner import PerformanceTestParams
 from sglang.test.run_combined_tests import run_combined_tests
 from sglang.test.test_utils import ModelLaunchSettings
 
-# Manual-only: not registered in any CI suite
+# Runs on both H200 and B200 via nightly-8-gpu-common suite
+register_cuda_ci(est_time=1800, suite="nightly-8-gpu-common", nightly=True)
+
 GLM_4_6_FP8_MODEL_PATH = "zai-org/GLM-4.6-FP8"
 
 

diff --git a/test/registered/8-gpu-models/test_qwen35.py b/test/registered/8-gpu-models/test_qwen35.py
@@ -9,7 +9,7 @@
 # Runs on both H200 and B200 via nightly-8-gpu-common suite
 register_cuda_ci(est_time=1800, suite="nightly-8-gpu-common", nightly=True)
 
-QWEN35_MODEL_PATH = "Qwen/Qwen3.5-397B-A17B-FP8"
+QWEN35_MODEL_PATH = "Qwen/Qwen3.5-397B-A17B"
 
 
 class TestQwen35(unittest.TestCase):
@@ -30,7 +30,6 @@ def test_qwen35(self):
             "--tool-call-parser=qwen3_coder",
             "--mem-fraction-static=0.8",
         ]
-        dp_args = ["--dp=8", "--enable-dp-attention"]
         mtp_args = [
             "--speculative-algorithm=EAGLE",
             "--speculative-num-steps=3",
@@ -49,14 +48,8 @@ def test_qwen35(self):
             ModelLaunchSettings(
                 QWEN35_MODEL_PATH,
                 tp_size=8,
-                extra_args=base_args + dp_args,
-                variant="TP8+DP8",
-            ),
-            ModelLaunchSettings(
-                QWEN35_MODEL_PATH,
-                tp_size=8,
-                extra_args=base_args + dp_args + mtp_args,
-                variant="TP8+DP8+MTP",
+                extra_args=base_args + mtp_args,
+                variant="TP8+MTP",
                 env={"SGLANG_ENABLE_SPEC_V2": "1"},
             ),
         ]

diff --git a/test/registered/8-gpu-models/test_qwen3_235b.py b/test/registered/8-gpu-models/test_qwen3_235b.py
@@ -1,11 +1,14 @@
 import unittest
 
 from sglang.test.accuracy_test_runner import AccuracyTestParams
+from sglang.test.ci.ci_register import register_cuda_ci
 from sglang.test.performance_test_runner import PerformanceTestParams
 from sglang.test.run_combined_tests import run_combined_tests
 from sglang.test.test_utils import ModelLaunchSettings, is_blackwell_system
 
-# Manual-only: not registered in any CI suite
+# Runs on both H200 and B200 via nightly-8-gpu-common suite
+register_cuda_ci(est_time=1800, suite="nightly-8-gpu-common", nightly=True)
+
 QWEN3_235B_FP8_MODEL_PATH = "Qwen/Qwen3-235B-A22B-Instruct-2507-FP8"
 QWEN3_235B_EAGLE3_MODEL_PATH = (
     "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge-Meituan"