-
Notifications
You must be signed in to change notification settings - Fork 3.4k
Add 8-GPU Test for Deepseek-V3 #5691
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 3 commits
Commits
Show all changes
19 commits
Select commit
Hold shift + click to select a range
d0e3530
Add CI for Deepseek-V3
Fridge003 c55b056
add throughput test
Fridge003 3e6f75c
Merge branch 'main' into test
ispobock 723f4cb
print throughput
Fridge003 3fd926d
Merge branch 'main' into test
Fridge003 f02b754
modify thresholds
Fridge003 a616777
Add is_in_ci for throughput test
Fridge003 e036909
Merge branch 'main' into test
Fridge003 178b073
Merge branch 'main' into test
Fridge003 7ecebdb
relax threshold a little bit
Fridge003 45b0af4
Merge branch 'main' into test
Fridge003 69cbe09
Apply suggestions from code review
merrymercy 2020d42
Update test/srt/test_full_deepseek_v3.py
merrymercy 30f9eeb
Update test/srt/test_full_deepseek_v3.py
merrymercy 331a42f
Merge branch 'main' into test
merrymercy a653777
Merge branch 'main' into test
Fridge003 7c95a26
movedsv3 test to suite
Fridge003 3e87f99
Fix conflict
Fridge003 f4f8e83
Merge branch 'main' into test
Fridge003 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,126 @@ | ||
| import unittest | ||
| from types import SimpleNamespace | ||
|
|
||
| import requests | ||
| import torch | ||
|
|
||
| from sglang.srt.utils import kill_process_tree | ||
| from sglang.test.few_shot_gsm8k import run_eval as run_eval_few_shot_gsm8k | ||
| from sglang.test.test_utils import ( | ||
| DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, | ||
| DEFAULT_URL_FOR_TEST, | ||
| CustomTestCase, | ||
| is_in_ci, | ||
| popen_launch_server, | ||
| run_bench_one_batch, | ||
| write_github_step_summary, | ||
| ) | ||
|
|
||
|
|
||
| class TestDeepseekV3(CustomTestCase): | ||
| @classmethod | ||
| def setUpClass(cls): | ||
| cls.model = "deepseek-ai/DeepSeek-V3-0324" | ||
| cls.base_url = DEFAULT_URL_FOR_TEST | ||
| other_args = ["--trust-remote-code", "--tp", "8"] | ||
| cls.process = popen_launch_server( | ||
| cls.model, | ||
| cls.base_url, | ||
| timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, | ||
| other_args=other_args, | ||
| ) | ||
|
|
||
| @classmethod | ||
| def tearDownClass(cls): | ||
| kill_process_tree(cls.process.pid) | ||
|
|
||
| def test_gsm8k(self): | ||
| args = SimpleNamespace( | ||
| num_shots=8, | ||
| data_path=None, | ||
| num_questions=1400, | ||
| parallel=1400, | ||
| max_new_tokens=512, | ||
| host="http://127.0.0.1", | ||
| port=int(self.base_url.split(":")[-1]), | ||
| ) | ||
| metrics = run_eval_few_shot_gsm8k(args) | ||
| print(metrics) | ||
merrymercy marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| self.assertGreater(metrics["accuracy"], 0.945) | ||
|
|
||
|
|
||
| class TestBenchOneBatch(CustomTestCase): | ||
| def test_bs1(self): | ||
| output_throughput = run_bench_one_batch( | ||
| "deepseek-ai/DeepSeek-V3-0324", | ||
| ["--trust-remote-code", "--tp", "8", "--cuda-graph-max-bs", "2"], | ||
| ) | ||
|
|
||
| if is_in_ci(): | ||
| write_github_step_summary( | ||
| f"### test_bs1\n" | ||
| f"output_throughput : {output_throughput:.2f} token/s\n" | ||
| ) | ||
| self.assertGreater(output_throughput, 60) | ||
|
|
||
|
|
||
| class TestDeepseekV3MTP(CustomTestCase): | ||
| @classmethod | ||
| def setUpClass(cls): | ||
| cls.model = "deepseek-ai/DeepSeek-V3-0324" | ||
| cls.base_url = DEFAULT_URL_FOR_TEST | ||
| other_args = [ | ||
| "--tp", | ||
| "8", | ||
| "--trust-remote-code", | ||
| "--speculative-algorithm", | ||
| "EAGLE", | ||
| "--speculative-draft", | ||
| "lmsys/DeepSeek-V3-0324-NextN", | ||
| "--speculative-num-steps", | ||
| "5", | ||
| "--speculative-eagle-topk", | ||
| "4", | ||
| "--speculative-num-draft-tokens", | ||
| "8", | ||
| "--mem-fraction-static", | ||
| "0.6", | ||
| ] | ||
| cls.process = popen_launch_server( | ||
| cls.model, | ||
| cls.base_url, | ||
| timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, | ||
| other_args=other_args, | ||
| ) | ||
|
|
||
| @classmethod | ||
| def tearDownClass(cls): | ||
| kill_process_tree(cls.process.pid) | ||
|
|
||
| def test_gsm8k(self): | ||
| requests.get(self.base_url + "/flush_cache") | ||
|
|
||
| args = SimpleNamespace( | ||
| num_shots=5, | ||
| data_path=None, | ||
| num_questions=200, | ||
| max_new_tokens=512, | ||
| parallel=128, | ||
| host="http://127.0.0.1", | ||
| port=int(self.base_url.split(":")[-1]), | ||
| ) | ||
| metrics = run_eval_few_shot_gsm8k(args) | ||
| print(metrics) | ||
merrymercy marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| # TODO: Remove this comment after the accuracy bug is fixed | ||
| # self.assertGreater(metrics["accuracy"], 0.94) | ||
|
|
||
| server_info = requests.get(self.base_url + "/get_server_info") | ||
| avg_spec_accept_length = server_info.json()["avg_spec_accept_length"] | ||
| print(f"{avg_spec_accept_length=}") | ||
| self.assertGreater(avg_spec_accept_length, 3.0) | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| unittest.main() | ||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.