Expand to other AlltoallMethodType

StudyingShao · StudyingShao · commit d2608f4401c8 · 2025-07-21T19:20:16.000Z
Signed-off-by: Jiang Shao &lt;91270701+StudyingShao@users.noreply.github.com&gt;
diff --git a/tensorrt_llm/_torch/modules/fused_moe/fused_moe_wide_ep.py b/tensorrt_llm/_torch/modules/fused_moe/fused_moe_wide_ep.py
@@ -11,7 +11,7 @@
 from ...distributed import allgather, reducescatter
 from ...expert_statistic import ExpertStatistic
 from ...model_config import ModelConfig
-from ...utils import EventType, Fp4QuantizedTensor, swizzle_sf
+from ...utils import EventType, Fp4QuantizedTensor
 from .deep_ep_utils import buffer_pool, deep_ep_installed
 from .interface import MoE
 from .moe_load_balancer import get_moe_load_balancer
@@ -552,7 +552,6 @@ def forward_chunk(
             # Fp4 gemm has extra scaling factor
             if x_sf is not None:
                 assert not x_is_sf_swizzled, "Fp4QuantizedTensor should not be swizzled before allgather"
-                x_sf = swizzle_sf(x_sf, x_row, x_col, self.scaling_vector_size)
 
         if self.layer_load_balancer and not self.layer_load_balancer.is_static_routing(
         ):
@@ -657,8 +656,6 @@ def forward_chunk(
                 x = x.reshape(x.shape[0] * x.shape[1], x.shape[2]).view(x_dtype)
                 x_sf = x_sf.reshape(x_sf.shape[0] * x_sf.shape[1],
                                     x_sf.shape[2]).view(x_sf_dtype)
-                x_sf = swizzle_sf(x_sf, x.shape[0], x.shape[1] * 2,
-                                  self.scaling_vector_size)
                 token_selected_slots = token_selected_slots.view(x.shape[0], 1)
                 token_final_scales = torch.ones_like(
                     token_selected_slots, dtype=token_final_scales.dtype)
@@ -967,10 +964,6 @@ def alltoall_postquant_dispatch(self, x: torch.Tensor, x_sf: torch.Tensor,
                                                 self.alltoall_workspace,
                                                 self.ep_rank, self.ep_size)
 
-            if self.has_nvfp4:
-                x_sf = swizzle_sf(x_sf, x.shape[0], x.shape[1] * 2,
-                                  self.scaling_vector_size)
-
         return x, x_sf
 
     def alltoall_combine(self, final_hidden_states: torch.Tensor,