From f8089976a2535683d430e11d78f7cd80c063ccae Mon Sep 17 00:00:00 2001 From: BBuf <1182563586@qq.com> Date: Wed, 19 Nov 2025 21:34:40 +0800 Subject: [PATCH] delete useless pad kernel in sgl_moe_align_block_size --- .../layers/moe/fused_moe_triton/moe_align_block_size.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/moe_align_block_size.py b/python/sglang/srt/layers/moe/fused_moe_triton/moe_align_block_size.py index 64d0126d6271..ce1cae66e9e8 100644 --- a/python/sglang/srt/layers/moe/fused_moe_triton/moe_align_block_size.py +++ b/python/sglang/srt/layers/moe/fused_moe_triton/moe_align_block_size.py @@ -69,11 +69,6 @@ def moe_align_block_size( (num_experts + 2,), dtype=torch.int32, device=topk_ids.device ) - # Threshold based on benchmark results - fuse_sorted_ids_padding = sorted_ids.shape[0] <= 4096 - if not fuse_sorted_ids_padding: - sorted_ids.fill_(topk_ids.numel()) - sgl_moe_align_block_size( topk_ids, num_experts + 1, @@ -82,6 +77,6 @@ def moe_align_block_size( expert_ids, num_tokens_post_pad, cumsum_buffer, - fuse_sorted_ids_padding, + True, ) return sorted_ids, expert_ids, num_tokens_post_pad