diff --git a/python/sglang/srt/layers/moe/utils.py b/python/sglang/srt/layers/moe/utils.py index 70466bb20838..cd85fc2f2656 100644 --- a/python/sglang/srt/layers/moe/utils.py +++ b/python/sglang/srt/layers/moe/utils.py @@ -284,7 +284,7 @@ def speculative_moe_a2a_backend_context(): global MOE_A2A_BACKEND original_backend = MOE_A2A_BACKEND try: - MOE_A2A_BACKEND = MoeA2ABackend.NONE + MOE_A2A_BACKEND = get_speculative_moe_a2a_backend() yield finally: MOE_A2A_BACKEND = original_backend diff --git a/test/srt/ep/test_deepep_large.py b/test/srt/ep/test_deepep_large.py index 58b31cae1646..1c4a91e6ac34 100644 --- a/test/srt/ep/test_deepep_large.py +++ b/test/srt/ep/test_deepep_large.py @@ -35,6 +35,8 @@ def setUpClass(cls): "--enable-dp-lm-head", "--moe-a2a-backend", "deepep", + "--moe-runner-backend", + "deep_gemm", "--enable-two-batch-overlap", "--ep-num-redundant-experts", "32", @@ -72,7 +74,6 @@ def test_gsm8k(self): self.assertGreater(metrics["accuracy"], 0.92) -@unittest.skip("Skipping this test until it's fixed.") class TestDeepseekMTP(CustomTestCase): @classmethod def setUpClass(cls): @@ -94,6 +95,8 @@ def setUpClass(cls): "--enable-dp-lm-head", "--moe-a2a-backend", "deepep", + "--moe-runner-backend", + "deep_gemm", "--enable-two-batch-overlap", "--ep-num-redundant-experts", "32",