diff --git a/python/sglang/srt/layers/moe/ep_moe/kernels.py b/python/sglang/srt/layers/moe/ep_moe/kernels.py index 56c6c7db7c7..e00562f58cc 100644 --- a/python/sglang/srt/layers/moe/ep_moe/kernels.py +++ b/python/sglang/srt/layers/moe/ep_moe/kernels.py @@ -621,7 +621,7 @@ def grouped_gemm_triton_kernel( b_ptr += BLOCK_SIZE_K if use_fp8_w8a8 and not (group_k > 0 and group_n > 0): - scale_a_value = tl.load(scale_a + expert_id) + scale_a_value = tl.load(scale_a + m_range_start + offs_am[:, None]) scale_b_value = tl.load(scale_b + expert_id) accumulator *= scale_a_value * scale_b_value