From bfbf8def19867d7176384f356dc8f470c9d956dc Mon Sep 17 00:00:00 2001 From: Kangyan Zhou Date: Fri, 21 Nov 2025 16:17:09 -0800 Subject: [PATCH] Fix B200 Nightly tests and move one manual test back to unit test to prevent the same issue --- python/sglang/srt/layers/moe/fused_moe_triton/layer.py | 4 +++- test/srt/run_suite.py | 1 + test/{manual => srt}/test_triton_fused_moe.py | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) rename test/{manual => srt}/test_triton_fused_moe.py (98%) diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/layer.py b/python/sglang/srt/layers/moe/fused_moe_triton/layer.py index 47a9da781b7e..3b3b8e7b716b 100644 --- a/python/sglang/srt/layers/moe/fused_moe_triton/layer.py +++ b/python/sglang/srt/layers/moe/fused_moe_triton/layer.py @@ -1034,7 +1034,9 @@ def forward(self, hidden_states: torch.Tensor, topk_output: TopKOutput): final_hidden_states = self.quant_method.apply_with_router_logits( layer=self, dispatch_output=StandardDispatchOutput( - hidden_states=hidden_states, topk_output=topk_output + hidden_states=hidden_states, + hidden_states_scale=None, + topk_output=topk_output, ), ) diff --git a/test/srt/run_suite.py b/test/srt/run_suite.py index 48510c959eef..863192c00495 100644 --- a/test/srt/run_suite.py +++ b/test/srt/run_suite.py @@ -116,6 +116,7 @@ TestFile("test_swa_unittest.py", 1), TestFile("test_torch_compile.py", 76), TestFile("test_torch_compile_moe.py", 210), + TestFile("test_triton_fused_moe.py", 80), TestFile("test_torch_native_attention_backend.py", 123), TestFile("test_torchao.py", 70), TestFile("test_triton_attention_kernels.py", 4), diff --git a/test/manual/test_triton_fused_moe.py b/test/srt/test_triton_fused_moe.py similarity index 98% rename from test/manual/test_triton_fused_moe.py rename to test/srt/test_triton_fused_moe.py index d989494c95d0..07e774a9b0cc 100644 --- a/test/manual/test_triton_fused_moe.py +++ b/test/srt/test_triton_fused_moe.py @@ -115,7 +115,7 @@ def _test_case(self, m, n, k, e, topk, dtype): quant_info = TritonKernelsQuantInfo(w13_weight=w1_tri, w2_weight=w2_tri) dispatch_output = StandardDispatchOutput( - hidden_states=a, topk_output=triton_topk_output + hidden_states=a, hidden_states_scale=None, topk_output=triton_topk_output ) torch_per_expert = self.torch_naive_moe(