From 6d7373eb3ec82094e47a90581b05e97bac78e1e9 Mon Sep 17 00:00:00 2001 From: Brayden Zhong Date: Mon, 8 Dec 2025 20:32:58 -0800 Subject: [PATCH 1/3] more --- test/srt/test_llama31_fp4.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/srt/test_llama31_fp4.py b/test/srt/test_llama31_fp4.py index 1be9671842a1..5e5da986275d 100644 --- a/test/srt/test_llama31_fp4.py +++ b/test/srt/test_llama31_fp4.py @@ -14,7 +14,7 @@ @unittest.skipIf(get_device_sm() < 100, "Test requires CUDA SM 100 or higher") -class TestLlama31FP4B200(unittest.TestCase): +class TestLlama31FP4(unittest.TestCase): @classmethod def setUpClass(cls): cls.model = MODEL_PATH @@ -40,11 +40,11 @@ def tearDownClass(cls): def test_gsm8k(self): parsed_url = urlparse(self.base_url) args = SimpleNamespace( - num_shots=4, + num_shots=5, data_path=None, - num_questions=100, + num_questions=1319, max_new_tokens=512, - parallel=128, + parallel=200, host=f"{parsed_url.scheme}://{parsed_url.hostname}", port=parsed_url.port, ) From 6640cb04dcd7db25f46363a32801d1920685f02c Mon Sep 17 00:00:00 2001 From: Brayden Zhong Date: Mon, 8 Dec 2025 20:37:29 -0800 Subject: [PATCH 2/3] more --- test/srt/test_llama31_fp4.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/srt/test_llama31_fp4.py b/test/srt/test_llama31_fp4.py index 5e5da986275d..b870edb4eb91 100644 --- a/test/srt/test_llama31_fp4.py +++ b/test/srt/test_llama31_fp4.py @@ -21,8 +21,8 @@ def setUpClass(cls): cls.base_url = DEFAULT_URL_FOR_TEST other_args = [ "--trust-remote-code", - "--mem-fraction-static", - "0.8", + "--attention-backend", + "flashinfer", "--quantization", "modelopt_fp4", ] From b4ce5557cb2cf651780806a345b5fddfa0716e49 Mon Sep 17 00:00:00 2001 From: Brayden Zhong Date: Mon, 8 Dec 2025 20:46:55 -0800 Subject: [PATCH 3/3] more --- test/srt/test_llama31_fp4.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/test/srt/test_llama31_fp4.py b/test/srt/test_llama31_fp4.py index b870edb4eb91..36ae3697114f 100644 --- a/test/srt/test_llama31_fp4.py +++ b/test/srt/test_llama31_fp4.py @@ -21,8 +21,6 @@ def setUpClass(cls): cls.base_url = DEFAULT_URL_FOR_TEST other_args = [ "--trust-remote-code", - "--attention-backend", - "flashinfer", "--quantization", "modelopt_fp4", ] @@ -51,7 +49,7 @@ def test_gsm8k(self): metrics = run_eval_few_shot_gsm8k(args) print(metrics) - self.assertGreater(metrics["accuracy"], 0.61) + self.assertGreater(metrics["accuracy"], 0.54) if __name__ == "__main__":