Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion test/registered/8-gpu-models/test_deepseek_v31.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
import unittest

from sglang.test.accuracy_test_runner import AccuracyTestParams
from sglang.test.ci.ci_register import register_cuda_ci
from sglang.test.performance_test_runner import PerformanceTestParams
from sglang.test.run_combined_tests import run_combined_tests
from sglang.test.test_utils import ModelLaunchSettings

# Manual-only: not registered in any CI suite
# Runs on both H200 and B200 via nightly-8-gpu-common suite
register_cuda_ci(est_time=5400, suite="nightly-8-gpu-common", nightly=True)

DEEPSEEK_V31_MODEL_PATH = "deepseek-ai/DeepSeek-V3.1"


Expand Down
52 changes: 52 additions & 0 deletions test/registered/8-gpu-models/test_glm_46.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import unittest

from sglang.test.accuracy_test_runner import AccuracyTestParams
from sglang.test.ci.ci_register import register_cuda_ci
from sglang.test.performance_test_runner import PerformanceTestParams
from sglang.test.run_combined_tests import run_combined_tests
from sglang.test.test_utils import ModelLaunchSettings

# Runs on both H200 and B200 via nightly-8-gpu-common suite
register_cuda_ci(est_time=1800, suite="nightly-8-gpu-common", nightly=True)

GLM_4_6_MODEL_PATH = "zai-org/GLM-4.6"


class TestGLM46(unittest.TestCase):
"""Unified test class for GLM-4.6 performance and accuracy.

Single variant with simple TP=8 configuration.
GLM-4.6 is a 357B MoE model.
Runs BOTH:
- Performance test (using NightlyBenchmarkRunner)
- Accuracy test (using run_eval with mgsm_en)
"""

def test_glm_46(self):
"""Run performance and accuracy for GLM-4.6."""
base_args = [
"--tp=8",
"--trust-remote-code",
]

variants = [
ModelLaunchSettings(
GLM_4_6_MODEL_PATH,
tp_size=8,
extra_args=base_args,
variant="TP8",
),
]

run_combined_tests(
models=variants,
test_name="GLM-4.6",
accuracy_params=AccuracyTestParams(dataset="gsm8k", baseline_accuracy=0.80),
performance_params=PerformanceTestParams(
profile_dir="performance_profiles_glm_4_6",
),
)


if __name__ == "__main__":
unittest.main()
5 changes: 4 additions & 1 deletion test/registered/8-gpu-models/test_glm_46_fp8.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
import unittest

from sglang.test.accuracy_test_runner import AccuracyTestParams
from sglang.test.ci.ci_register import register_cuda_ci
from sglang.test.performance_test_runner import PerformanceTestParams
from sglang.test.run_combined_tests import run_combined_tests
from sglang.test.test_utils import ModelLaunchSettings

# Manual-only: not registered in any CI suite
# Runs on both H200 and B200 via nightly-8-gpu-common suite
register_cuda_ci(est_time=1800, suite="nightly-8-gpu-common", nightly=True)

GLM_4_6_FP8_MODEL_PATH = "zai-org/GLM-4.6-FP8"


Expand Down
13 changes: 3 additions & 10 deletions test/registered/8-gpu-models/test_qwen35.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
# Runs on both H200 and B200 via nightly-8-gpu-common suite
register_cuda_ci(est_time=1800, suite="nightly-8-gpu-common", nightly=True)

QWEN35_MODEL_PATH = "Qwen/Qwen3.5-397B-A17B-FP8"
QWEN35_MODEL_PATH = "Qwen/Qwen3.5-397B-A17B"


class TestQwen35(unittest.TestCase):
Expand All @@ -30,7 +30,6 @@ def test_qwen35(self):
"--tool-call-parser=qwen3_coder",
"--mem-fraction-static=0.8",
]
dp_args = ["--dp=8", "--enable-dp-attention"]
mtp_args = [
"--speculative-algorithm=EAGLE",
"--speculative-num-steps=3",
Expand All @@ -49,14 +48,8 @@ def test_qwen35(self):
ModelLaunchSettings(
QWEN35_MODEL_PATH,
tp_size=8,
extra_args=base_args + dp_args,
variant="TP8+DP8",
),
ModelLaunchSettings(
QWEN35_MODEL_PATH,
tp_size=8,
extra_args=base_args + dp_args + mtp_args,
variant="TP8+DP8+MTP",
extra_args=base_args + mtp_args,
variant="TP8+MTP",
env={"SGLANG_ENABLE_SPEC_V2": "1"},
),
]
Expand Down
5 changes: 4 additions & 1 deletion test/registered/8-gpu-models/test_qwen3_235b.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
import unittest

from sglang.test.accuracy_test_runner import AccuracyTestParams
from sglang.test.ci.ci_register import register_cuda_ci
from sglang.test.performance_test_runner import PerformanceTestParams
from sglang.test.run_combined_tests import run_combined_tests
from sglang.test.test_utils import ModelLaunchSettings, is_blackwell_system

# Manual-only: not registered in any CI suite
# Runs on both H200 and B200 via nightly-8-gpu-common suite
register_cuda_ci(est_time=1800, suite="nightly-8-gpu-common", nightly=True)

QWEN3_235B_FP8_MODEL_PATH = "Qwen/Qwen3-235B-A22B-Instruct-2507-FP8"
QWEN3_235B_EAGLE3_MODEL_PATH = (
"lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge-Meituan"
Expand Down
Loading