diff --git a/test/registered/lora/test_lora_moe_vllm_sgl_logprob_diff.py b/test/registered/lora/test_lora_moe_vllm_sgl_logprob_diff.py index 6926f1d89a58..fdd3dca263de 100644 --- a/test/registered/lora/test_lora_moe_vllm_sgl_logprob_diff.py +++ b/test/registered/lora/test_lora_moe_vllm_sgl_logprob_diff.py @@ -332,7 +332,7 @@ def test_sglang_moe_parity_strict(self): ref = REFERENCE_STATS[i] # Epsilon to allow room for different, but correct, implementations - eps = 1e-4 + eps = 2e-4 # Assertions self.assertEqual(v_text, s_text, f"String mismatch on prompt {i}") diff --git a/test/registered/quant/test_deepseek_v32_fp4_4gpu.py b/test/registered/quant/test_deepseek_v32_fp4_4gpu.py index 4bdcd1367092..711da8b1aefa 100644 --- a/test/registered/quant/test_deepseek_v32_fp4_4gpu.py +++ b/test/registered/quant/test_deepseek_v32_fp4_4gpu.py @@ -58,8 +58,8 @@ def test_a_gsm8k( args = SimpleNamespace( num_shots=20, data_path=None, - num_questions=1319, - parallel=1319, + num_questions=500, + parallel=500, max_new_tokens=512, host="http://127.0.0.1", port=int(self.base_url.split(":")[-1]), @@ -72,7 +72,7 @@ def test_a_gsm8k( f"### test_gsm8k (deepseek-v3-fp4)\n" f'{metrics["accuracy"]=:.3f}\n' ) - self.assertGreater(metrics["accuracy"], 0.935) + self.assertGreater(metrics["accuracy"], 0.93) def test_bs_1_speed(self): args = BenchArgs(port=int(self.base_url.split(":")[-1]), max_new_tokens=2048) @@ -125,8 +125,8 @@ def test_a_gsm8k( args = SimpleNamespace( num_shots=20, data_path=None, - num_questions=1319, - parallel=1319, + num_questions=500, + parallel=500, max_new_tokens=512, host="http://127.0.0.1", port=int(self.base_url.split(":")[-1]), @@ -139,7 +139,7 @@ def test_a_gsm8k( f"### test_gsm8k (deepseek-v3-fp4)\n" f'{metrics["accuracy"]=:.3f}\n' ) - self.assertGreater(metrics["accuracy"], 0.935) + self.assertGreater(metrics["accuracy"], 0.93) def test_bs_1_speed(self): args = BenchArgs(port=int(self.base_url.split(":")[-1]), max_new_tokens=2048) diff --git a/test/registered/quant/test_deepseek_v32_fp4_mtp_4gpu.py b/test/registered/quant/test_deepseek_v32_fp4_mtp_4gpu.py index 3a99f6ad9f12..81c6162af342 100644 --- a/test/registered/quant/test_deepseek_v32_fp4_mtp_4gpu.py +++ b/test/registered/quant/test_deepseek_v32_fp4_mtp_4gpu.py @@ -95,7 +95,7 @@ def test_a_gsm8k( f'{metrics["accuracy"]=:.3f}\n' f"{avg_spec_accept_length=:.2f}\n" ) - self.assertGreater(metrics["accuracy"], 0.94) + self.assertGreater(metrics["accuracy"], 0.93) self.assertGreater(avg_spec_accept_length, 2.7) def test_bs_1_speed(self): @@ -185,7 +185,7 @@ def test_a_gsm8k( f'{metrics["accuracy"]=:.3f}\n' f"{avg_spec_accept_length=:.2f}\n" ) - self.assertGreater(metrics["accuracy"], 0.94) + self.assertGreater(metrics["accuracy"], 0.93) self.assertGreater(avg_spec_accept_length, 2.7) def test_bs_1_speed(self):