diff --git a/.github/workflows/pr-test.yml b/.github/workflows/pr-test.yml index beaa89acc7db..7f3971645612 100644 --- a/.github/workflows/pr-test.yml +++ b/.github/workflows/pr-test.yml @@ -1146,6 +1146,32 @@ jobs: cd test/srt python3 run_suite.py --suite per-commit-4-gpu-gb200 --auto-partition-id 0 --auto-partition-size 1 --timeout-per-file 3600 + unit-test-backend-8-gpu-b200: + needs: [check-changes, call-gate, unit-test-backend-2-gpu] + if: | + (inputs.target_stage == 'unit-test-backend-8-gpu-b200') || + ( + always() && + (github.event_name == 'schedule' || (!failure() && !cancelled())) && + ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) + ) + runs-on: 8-gpu-b200 + env: + RUNNER_LABELS: 8-gpu-b200 + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Install dependencies + run: | + IS_BLACKWELL=1 bash scripts/ci/ci_install_dependency.sh + + - name: Run test + timeout-minutes: 45 + run: | + cd test/srt + python3 run_suite.py --suite per-commit-8-gpu-b200 --timeout-per-file 1800 + pr-test-finish: needs: [ @@ -1178,6 +1204,7 @@ jobs: unit-test-deepep-8-gpu, unit-test-backend-4-gpu-b200, unit-test-backend-4-gpu-gb200, + unit-test-backend-8-gpu-b200, ] if: always() runs-on: ubuntu-latest diff --git a/test/srt/run_suite.py b/test/srt/run_suite.py index 5c899a6c24fe..e17c25fcdae6 100644 --- a/test/srt/run_suite.py +++ b/test/srt/run_suite.py @@ -180,7 +180,9 @@ # TODO: Add it back after the bug is fixed # TestFile("test_eagle_infer_beta_dp_attention.py", 200), ], - "per-commit-8-gpu-b200": [], + "per-commit-8-gpu-b200": [ + TestFile("test_mistral_large3_basic.py", 275), + ], "per-commit-4-gpu-gb200": [ TestFile("test_cutedsl_moe.py", 300), TestFile("test_deepseek_v3_cutedsl_4gpu.py", 590), diff --git a/test/srt/test_mistral_large3_basic.py b/test/srt/test_mistral_large3_basic.py new file mode 100644 index 000000000000..3b173f1abd7c --- /dev/null +++ b/test/srt/test_mistral_large3_basic.py @@ -0,0 +1,87 @@ +import os +import unittest +from types import SimpleNamespace + +from sglang.srt.utils import kill_process_tree +from sglang.test.few_shot_gsm8k import run_eval as run_eval_few_shot_gsm8k +from sglang.test.send_one import BenchArgs, send_one_prompt +from sglang.test.test_utils import ( + DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + DEFAULT_URL_FOR_TEST, + CustomTestCase, + is_in_ci, + popen_launch_server, + write_github_step_summary, +) + +MISTRAL_LARGE3_MODEL_PATH = "mistralai/Mistral-Large-3-675B-Instruct-2512" + + +class TestMistralLarge3Basic(CustomTestCase): + @classmethod + def setUpClass(cls): + # Set environment variable to disable JIT DeepGemm + os.environ["SGLANG_ENABLE_JIT_DEEPGEMM"] = "0" + + cls.model = MISTRAL_LARGE3_MODEL_PATH + cls.base_url = DEFAULT_URL_FOR_TEST + other_args = [ + "--tp", + "8", + "--attention-backend", + "trtllm_mla", + "--model-loader-extra-config", + '{"enable_multithread_load": true}', + "--chat-template", + "mistral", + ] + cls.process = popen_launch_server( + cls.model, + cls.base_url, + timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH * 5, + other_args=other_args, + ) + + @classmethod + def tearDownClass(cls): + kill_process_tree(cls.process.pid) + # Clean up environment variable + if "SGLANG_ENABLE_JIT_DEEPGEMM" in os.environ: + del os.environ["SGLANG_ENABLE_JIT_DEEPGEMM"] + + def test_a_gsm8k( + self, + ): # Append an "a" to make this test run first (alphabetically) to warm up the server + args = SimpleNamespace( + num_shots=8, + data_path=None, + num_questions=1400, + parallel=1400, + max_new_tokens=512, + host="http://127.0.0.1", + port=int(self.base_url.split(":")[-1]), + ) + metrics = run_eval_few_shot_gsm8k(args) + print(f"{metrics=}") + + if is_in_ci(): + write_github_step_summary( + f"### test_gsm8k (mistral-large-3)\n" f'{metrics["accuracy"]=:.3f}\n' + ) + self.assertGreater(metrics["accuracy"], 0.90) + + def test_bs_1_speed(self): + args = BenchArgs(port=int(self.base_url.split(":")[-1]), max_new_tokens=2048) + acc_length, speed = send_one_prompt(args) + + print(f"{speed=:.2f}") + + if is_in_ci(): + write_github_step_summary( + f"### test_bs_1_speed (mistral-large-3)\n" f"{speed=:.2f} token/s\n" + ) + self.assertGreater(speed, 50) + + +if __name__ == "__main__": + unittest.main()