From d0e353014c65f1fbe4cd94885c95fc9d31d5fbaf Mon Sep 17 00:00:00 2001 From: Baizhou Zhang Date: Wed, 23 Apr 2025 16:30:17 -0700 Subject: [PATCH 01/11] Add CI for Deepseek-V3 --- .github/workflows/pr-test.yml | 17 +++++ test/srt/test_full_deepseek_v3.py | 107 ++++++++++++++++++++++++++++++ 2 files changed, 124 insertions(+) create mode 100644 test/srt/test_full_deepseek_v3.py diff --git a/.github/workflows/pr-test.yml b/.github/workflows/pr-test.yml index 0c71bf05bff..2bbbcbf4c36 100644 --- a/.github/workflows/pr-test.yml +++ b/.github/workflows/pr-test.yml @@ -237,6 +237,23 @@ jobs: cd test/srt python3 test_moe_eval_accuracy_large.py + unit-test-backend-8-gpu: + if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && + github.event.pull_request.draft == false + runs-on: 8-gpu-runner + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Install dependencies + run: | + bash scripts/ci_install_dependency.sh + - name: Run DeepSeek-V3 test + timeout-minutes: 40 + run: | + cd test/srt + python3 test_full_deepseek_v3.py + finish: if: always() needs: [ diff --git a/test/srt/test_full_deepseek_v3.py b/test/srt/test_full_deepseek_v3.py new file mode 100644 index 00000000000..d6275cdfbc7 --- /dev/null +++ b/test/srt/test_full_deepseek_v3.py @@ -0,0 +1,107 @@ +import unittest +from types import SimpleNamespace + +import requests +import torch + +from sglang.srt.utils import kill_process_tree +from sglang.test.few_shot_gsm8k import run_eval as run_eval_few_shot_gsm8k +from sglang.test.test_utils import ( + DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + DEFAULT_URL_FOR_TEST, + CustomTestCase, + popen_launch_server, +) + + +class TestDeepseekV3(CustomTestCase): + @classmethod + def setUpClass(cls): + cls.model = "deepseek-ai/DeepSeek-V3-0324" + cls.base_url = DEFAULT_URL_FOR_TEST + other_args = ["--trust-remote-code", "--tp", "8"] + cls.process = popen_launch_server( + cls.model, + cls.base_url, + timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + other_args=other_args, + ) + + @classmethod + def tearDownClass(cls): + kill_process_tree(cls.process.pid) + + def test_gsm8k(self): + args = SimpleNamespace( + num_shots=8, + data_path=None, + num_questions=1400, + parallel=1400, + max_new_tokens=512, + host="http://127.0.0.1", + port=int(self.base_url.split(":")[-1]), + ) + metrics = run_eval_few_shot_gsm8k(args) + print(metrics) + + self.assertGreater(metrics["accuracy"], 0.945) + + +class TestDeepseekV3MTP(CustomTestCase): + @classmethod + def setUpClass(cls): + cls.model = "deepseek-ai/DeepSeek-V3-0324" + cls.base_url = DEFAULT_URL_FOR_TEST + other_args = [ + "--tp", + "8", + "--trust-remote-code", + "--speculative-algorithm", + "EAGLE", + "--speculative-draft", + "lmsys/DeepSeek-V3-0324-NextN", + "--speculative-num-steps", + "5", + "--speculative-eagle-topk", + "4", + "--speculative-num-draft-tokens", + "8", + "--mem-fraction-static", + "0.6", + ] + cls.process = popen_launch_server( + cls.model, + cls.base_url, + timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + other_args=other_args, + ) + + @classmethod + def tearDownClass(cls): + kill_process_tree(cls.process.pid) + + def test_gsm8k(self): + requests.get(self.base_url + "/flush_cache") + + args = SimpleNamespace( + num_shots=5, + data_path=None, + num_questions=200, + max_new_tokens=512, + parallel=128, + host="http://127.0.0.1", + port=int(self.base_url.split(":")[-1]), + ) + metrics = run_eval_few_shot_gsm8k(args) + print(metrics) + + self.assertGreater(metrics["accuracy"], 0.94) + + server_info = requests.get(self.base_url + "/get_server_info") + avg_spec_accept_length = server_info.json()["avg_spec_accept_length"] + print(f"{avg_spec_accept_length=}") + self.assertGreater(avg_spec_accept_length, 3.0) + + +if __name__ == "__main__": + unittest.main() From c55b056f5dcadf1a39da2209013171b7b4be10e7 Mon Sep 17 00:00:00 2001 From: Baizhou Zhang Date: Thu, 24 Apr 2025 03:01:08 +0000 Subject: [PATCH 02/11] add throughput test --- test/srt/test_full_deepseek_v3.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/test/srt/test_full_deepseek_v3.py b/test/srt/test_full_deepseek_v3.py index d6275cdfbc7..52c70943906 100644 --- a/test/srt/test_full_deepseek_v3.py +++ b/test/srt/test_full_deepseek_v3.py @@ -10,7 +10,10 @@ DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_URL_FOR_TEST, CustomTestCase, + is_in_ci, popen_launch_server, + run_bench_one_batch, + write_github_step_summary, ) @@ -47,6 +50,21 @@ def test_gsm8k(self): self.assertGreater(metrics["accuracy"], 0.945) +class TestBenchOneBatch(CustomTestCase): + def test_bs1(self): + output_throughput = run_bench_one_batch( + "deepseek-ai/DeepSeek-V3-0324", + ["--trust-remote-code", "--tp", "8", "--cuda-graph-max-bs", "2"], + ) + + if is_in_ci(): + write_github_step_summary( + f"### test_bs1\n" + f"output_throughput : {output_throughput:.2f} token/s\n" + ) + self.assertGreater(output_throughput, 60) + + class TestDeepseekV3MTP(CustomTestCase): @classmethod def setUpClass(cls): @@ -95,7 +113,8 @@ def test_gsm8k(self): metrics = run_eval_few_shot_gsm8k(args) print(metrics) - self.assertGreater(metrics["accuracy"], 0.94) + # TODO: Remove this comment after the accuracy bug is fixed + # self.assertGreater(metrics["accuracy"], 0.94) server_info = requests.get(self.base_url + "/get_server_info") avg_spec_accept_length = server_info.json()["avg_spec_accept_length"] From 723f4cbfffa16fa9e9aef496037708c5bcc23b6b Mon Sep 17 00:00:00 2001 From: Baizhou Zhang Date: Thu, 24 Apr 2025 06:43:57 +0000 Subject: [PATCH 03/11] print throughput --- test/srt/test_full_deepseek_v3.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/test/srt/test_full_deepseek_v3.py b/test/srt/test_full_deepseek_v3.py index 52c70943906..507d3dc6584 100644 --- a/test/srt/test_full_deepseek_v3.py +++ b/test/srt/test_full_deepseek_v3.py @@ -56,13 +56,8 @@ def test_bs1(self): "deepseek-ai/DeepSeek-V3-0324", ["--trust-remote-code", "--tp", "8", "--cuda-graph-max-bs", "2"], ) - - if is_in_ci(): - write_github_step_summary( - f"### test_bs1\n" - f"output_throughput : {output_throughput:.2f} token/s\n" - ) - self.assertGreater(output_throughput, 60) + print(f"output_throughput : {output_throughput:.2f} token/s") + self.assertGreater(output_throughput, 60) class TestDeepseekV3MTP(CustomTestCase): From f02b75408c0ee1aef2281ef1e82bd0b023f1e2e8 Mon Sep 17 00:00:00 2001 From: Baizhou Zhang Date: Thu, 24 Apr 2025 17:19:45 +0000 Subject: [PATCH 04/11] modify thresholds --- test/srt/test_full_deepseek_v3.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/test/srt/test_full_deepseek_v3.py b/test/srt/test_full_deepseek_v3.py index 507d3dc6584..f8863968646 100644 --- a/test/srt/test_full_deepseek_v3.py +++ b/test/srt/test_full_deepseek_v3.py @@ -57,7 +57,7 @@ def test_bs1(self): ["--trust-remote-code", "--tp", "8", "--cuda-graph-max-bs", "2"], ) print(f"output_throughput : {output_throughput:.2f} token/s") - self.assertGreater(output_throughput, 60) + self.assertGreater(output_throughput, 65) class TestDeepseekV3MTP(CustomTestCase): @@ -107,14 +107,12 @@ def test_gsm8k(self): ) metrics = run_eval_few_shot_gsm8k(args) print(metrics) - - # TODO: Remove this comment after the accuracy bug is fixed - # self.assertGreater(metrics["accuracy"], 0.94) + self.assertGreater(metrics["accuracy"], 0.945) server_info = requests.get(self.base_url + "/get_server_info") avg_spec_accept_length = server_info.json()["avg_spec_accept_length"] print(f"{avg_spec_accept_length=}") - self.assertGreater(avg_spec_accept_length, 3.0) + self.assertGreater(avg_spec_accept_length, 3.2) if __name__ == "__main__": From a616777fb0555a2482346f51e84d80ce082502ae Mon Sep 17 00:00:00 2001 From: Baizhou Zhang Date: Thu, 24 Apr 2025 18:29:47 +0000 Subject: [PATCH 05/11] Add is_in_ci for throughput test --- test/srt/test_full_deepseek_v3.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/test/srt/test_full_deepseek_v3.py b/test/srt/test_full_deepseek_v3.py index f8863968646..41142b13876 100644 --- a/test/srt/test_full_deepseek_v3.py +++ b/test/srt/test_full_deepseek_v3.py @@ -16,11 +16,13 @@ write_github_step_summary, ) +FULL_DEEPSEEK_V3_MODEL_PATH = "deepseek-ai/DeepSeek-V3-0324" + class TestDeepseekV3(CustomTestCase): @classmethod def setUpClass(cls): - cls.model = "deepseek-ai/DeepSeek-V3-0324" + cls.model = FULL_DEEPSEEK_V3_MODEL_PATH cls.base_url = DEFAULT_URL_FOR_TEST other_args = ["--trust-remote-code", "--tp", "8"] cls.process = popen_launch_server( @@ -53,17 +55,22 @@ def test_gsm8k(self): class TestBenchOneBatch(CustomTestCase): def test_bs1(self): output_throughput = run_bench_one_batch( - "deepseek-ai/DeepSeek-V3-0324", + FULL_DEEPSEEK_V3_MODEL_PATH, ["--trust-remote-code", "--tp", "8", "--cuda-graph-max-bs", "2"], ) print(f"output_throughput : {output_throughput:.2f} token/s") - self.assertGreater(output_throughput, 65) + if is_in_ci(): + write_github_step_summary( + f"### test_bs1\n" + f"output_throughput : {output_throughput:.2f} token/s\n" + ) + self.assertGreater(output_throughput, 70) class TestDeepseekV3MTP(CustomTestCase): @classmethod def setUpClass(cls): - cls.model = "deepseek-ai/DeepSeek-V3-0324" + cls.model = FULL_DEEPSEEK_V3_MODEL_PATH cls.base_url = DEFAULT_URL_FOR_TEST other_args = [ "--tp", From 7ecebdb98b9c62480d8406feaae572b263149644 Mon Sep 17 00:00:00 2001 From: Baizhou Zhang Date: Sat, 26 Apr 2025 04:19:28 +0000 Subject: [PATCH 06/11] relax threshold a little bit --- test/srt/test_full_deepseek_v3.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/srt/test_full_deepseek_v3.py b/test/srt/test_full_deepseek_v3.py index 41142b13876..90267049da2 100644 --- a/test/srt/test_full_deepseek_v3.py +++ b/test/srt/test_full_deepseek_v3.py @@ -49,7 +49,7 @@ def test_gsm8k(self): metrics = run_eval_few_shot_gsm8k(args) print(metrics) - self.assertGreater(metrics["accuracy"], 0.945) + self.assertGreater(metrics["accuracy"], 0.94) class TestBenchOneBatch(CustomTestCase): @@ -114,7 +114,7 @@ def test_gsm8k(self): ) metrics = run_eval_few_shot_gsm8k(args) print(metrics) - self.assertGreater(metrics["accuracy"], 0.945) + self.assertGreater(metrics["accuracy"], 0.94) server_info = requests.get(self.base_url + "/get_server_info") avg_spec_accept_length = server_info.json()["avg_spec_accept_length"] From 69cbe09aa1baa43d59df30bea9dcc596d10eb4d7 Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Sat, 26 Apr 2025 18:23:14 -0700 Subject: [PATCH 07/11] Apply suggestions from code review --- .github/workflows/pr-test.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/pr-test.yml b/.github/workflows/pr-test.yml index 2bbbcbf4c36..bebb090bb46 100644 --- a/.github/workflows/pr-test.yml +++ b/.github/workflows/pr-test.yml @@ -248,6 +248,7 @@ jobs: - name: Install dependencies run: | bash scripts/ci_install_dependency.sh + - name: Run DeepSeek-V3 test timeout-minutes: 40 run: | From 2020d42531388b2a2d2e68c6a27c9c49ef7f5453 Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Sat, 26 Apr 2025 18:24:39 -0700 Subject: [PATCH 08/11] Update test/srt/test_full_deepseek_v3.py --- test/srt/test_full_deepseek_v3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/srt/test_full_deepseek_v3.py b/test/srt/test_full_deepseek_v3.py index 90267049da2..fca4f36b311 100644 --- a/test/srt/test_full_deepseek_v3.py +++ b/test/srt/test_full_deepseek_v3.py @@ -47,7 +47,7 @@ def test_gsm8k(self): port=int(self.base_url.split(":")[-1]), ) metrics = run_eval_few_shot_gsm8k(args) - print(metrics) + print(f"{metrics=}") self.assertGreater(metrics["accuracy"], 0.94) From 30f9eeb10f2ce2d4487e65807db7ef5d3b2cdee1 Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Sat, 26 Apr 2025 18:24:43 -0700 Subject: [PATCH 09/11] Update test/srt/test_full_deepseek_v3.py --- test/srt/test_full_deepseek_v3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/srt/test_full_deepseek_v3.py b/test/srt/test_full_deepseek_v3.py index fca4f36b311..552e0e36b6d 100644 --- a/test/srt/test_full_deepseek_v3.py +++ b/test/srt/test_full_deepseek_v3.py @@ -113,7 +113,7 @@ def test_gsm8k(self): port=int(self.base_url.split(":")[-1]), ) metrics = run_eval_few_shot_gsm8k(args) - print(metrics) + print(f"{metrics=}") self.assertGreater(metrics["accuracy"], 0.94) server_info = requests.get(self.base_url + "/get_server_info") From 7c95a2685f08215e13e0159255bb835d30bb5d8e Mon Sep 17 00:00:00 2001 From: Baizhou Zhang Date: Sun, 27 Apr 2025 05:23:01 +0000 Subject: [PATCH 10/11] movedsv3 test to suite --- .github/workflows/pr-test.yml | 20 +------------------- test/srt/run_suite.py | 1 + 2 files changed, 2 insertions(+), 19 deletions(-) diff --git a/.github/workflows/pr-test.yml b/.github/workflows/pr-test.yml index 7e4be091d9b..68e82e2fdac 100644 --- a/.github/workflows/pr-test.yml +++ b/.github/workflows/pr-test.yml @@ -102,7 +102,7 @@ jobs: bash scripts/ci_install_dependency.sh - name: Run test - timeout-minutes: 30 + timeout-minutes: 40 run: | cd test/srt python3 run_suite.py --suite per-commit-8-gpu @@ -257,24 +257,6 @@ jobs: cd test/srt python3 test_moe_eval_accuracy_large.py - unit-test-backend-8-gpu: - if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && - github.event.pull_request.draft == false - runs-on: 8-gpu-runner - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Install dependencies - run: | - bash scripts/ci_install_dependency.sh - - - name: Run DeepSeek-V3 test - timeout-minutes: 40 - run: | - cd test/srt - python3 test_full_deepseek_v3.py - finish: if: always() needs: [ diff --git a/test/srt/run_suite.py b/test/srt/run_suite.py index 6a2497aaee1..3cb950a6b95 100644 --- a/test/srt/run_suite.py +++ b/test/srt/run_suite.py @@ -92,6 +92,7 @@ class TestFile: ], "per-commit-8-gpu": [ TestFile("test_fa3.py", 30), + TestFile("test_full_deepseek_v3.py", 30), ], "nightly": [ TestFile("test_nightly_gsm8k_eval.py"), From 3e87f99623292bfa7d513d478537d917f49d48de Mon Sep 17 00:00:00 2001 From: Baizhou Zhang Date: Sun, 27 Apr 2025 17:44:32 +0000 Subject: [PATCH 11/11] Fix conflict --- test/srt/run_suite.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/srt/run_suite.py b/test/srt/run_suite.py index 3cb950a6b95..7b6db0afc9b 100644 --- a/test/srt/run_suite.py +++ b/test/srt/run_suite.py @@ -91,8 +91,8 @@ class TestFile: TestFile("test_verl_engine.py", 100), ], "per-commit-8-gpu": [ - TestFile("test_fa3.py", 30), - TestFile("test_full_deepseek_v3.py", 30), + TestFile("test_local_attn.py", 250), + TestFile("test_full_deepseek_v3.py", 250), ], "nightly": [ TestFile("test_nightly_gsm8k_eval.py"),