-
Notifications
You must be signed in to change notification settings - Fork 5k
[CI] Add Mistral Large 3 Eagle basic PR test #14526
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
Closed
Changes from all commits
Commits
Show all changes
8 commits
Select commit
Hold shift + click to select a range
0fe1340
[CI] Add Mistral Large 3 Eagle basic PR test
alisonshao f6f9ba4
Add unit-test-backend-8-gpu-b200 to rerun-stage supported stages
alisonshao f926c39
Fix black formatting
alisonshao 32bac74
Fix Eagle test: use base model with Eagle as draft model for speculat…
alisonshao 2dbbc1b
Fix: use speculative-eagle-topk=1 for trtllm_mla backend compatibility
alisonshao 4b00120
Merge branch 'main' into add-eagle-pr-ci-test
alisonshao 296650d
Fix: incorrect import path in mistral_large_3_eagle.py preventing mod…
alisonshao ea07490
Fix isort import order
alisonshao File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,105 @@ | ||
| import os | ||
| import unittest | ||
| from types import SimpleNamespace | ||
|
|
||
| from sglang.srt.utils import kill_process_tree | ||
| from sglang.test.few_shot_gsm8k import run_eval as run_eval_few_shot_gsm8k | ||
| from sglang.test.send_one import BenchArgs, send_one_prompt | ||
| from sglang.test.test_utils import ( | ||
| DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, | ||
| DEFAULT_URL_FOR_TEST, | ||
| CustomTestCase, | ||
| is_in_ci, | ||
| popen_launch_server, | ||
| write_github_step_summary, | ||
| ) | ||
|
|
||
| # Base model and Eagle draft model | ||
| MISTRAL_LARGE3_MODEL_PATH = "mistralai/Mistral-Large-3-675B-Instruct-2512" | ||
| MISTRAL_LARGE3_EAGLE_MODEL_PATH = "mistralai/Mistral-Large-3-675B-Instruct-2512-Eagle" | ||
|
|
||
|
|
||
| class TestMistralLarge3EagleBasic(CustomTestCase): | ||
| @classmethod | ||
| def setUpClass(cls): | ||
| # Set environment variable to disable JIT DeepGemm | ||
| os.environ["SGLANG_ENABLE_JIT_DEEPGEMM"] = "0" | ||
|
|
||
| cls.model = MISTRAL_LARGE3_MODEL_PATH | ||
| cls.base_url = DEFAULT_URL_FOR_TEST | ||
| # Mistral-Large-3 with Eagle speculative decoding | ||
| # Eagle model is used as draft model for speculative decoding | ||
| other_args = [ | ||
| "--tp", | ||
| "8", | ||
| "--attention-backend", | ||
| "trtllm_mla", | ||
| "--speculative-algorithm", | ||
| "EAGLE", | ||
| "--speculative-draft-model-path", | ||
| MISTRAL_LARGE3_EAGLE_MODEL_PATH, | ||
| "--speculative-num-steps", | ||
| "3", | ||
| "--speculative-eagle-topk", | ||
| "1", | ||
| "--speculative-num-draft-tokens", | ||
| "4", | ||
| "--kv-cache-dtype", | ||
| "auto", | ||
| "--model-loader-extra-config", | ||
| '{"enable_multithread_load": true}', | ||
| "--chat-template", | ||
| "mistral", | ||
| ] | ||
| cls.process = popen_launch_server( | ||
| cls.model, | ||
| cls.base_url, | ||
| timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH * 5, | ||
| other_args=other_args, | ||
| ) | ||
|
|
||
| @classmethod | ||
| def tearDownClass(cls): | ||
| kill_process_tree(cls.process.pid) | ||
| # Clean up environment variable | ||
| if "SGLANG_ENABLE_JIT_DEEPGEMM" in os.environ: | ||
| del os.environ["SGLANG_ENABLE_JIT_DEEPGEMM"] | ||
|
|
||
| def test_a_gsm8k( | ||
| self, | ||
| ): # Append an "a" to make this test run first (alphabetically) to warm up the server | ||
| args = SimpleNamespace( | ||
| num_shots=8, | ||
| data_path=None, | ||
| num_questions=1400, | ||
| parallel=1400, | ||
| max_new_tokens=512, | ||
| host="http://127.0.0.1", | ||
| port=int(self.base_url.split(":")[-1]), | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| ) | ||
| metrics = run_eval_few_shot_gsm8k(args) | ||
| print(f"{metrics=}") | ||
|
|
||
| if is_in_ci(): | ||
| write_github_step_summary( | ||
| f"### test_gsm8k (mistral-large-3-eagle)\n" | ||
| f'{metrics["accuracy"]=:.3f}\n' | ||
| ) | ||
| self.assertGreater(metrics["accuracy"], 0.90) | ||
|
|
||
| def test_bs_1_speed(self): | ||
| args = BenchArgs(port=int(self.base_url.split(":")[-1]), max_new_tokens=2048) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| acc_length, speed = send_one_prompt(args) | ||
|
|
||
| print(f"{speed=:.2f}") | ||
|
|
||
| if is_in_ci(): | ||
| write_github_step_summary( | ||
| f"### test_bs_1_speed (mistral-large-3-eagle)\n" | ||
| f"{speed=:.2f} token/s\n" | ||
| ) | ||
| self.assertGreater(speed, 50) | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| unittest.main() | ||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Using a
try...finallyblock ensures that the environment variable is cleaned up even ifkill_process_treeraises an exception. This makes the test cleanup more robust.