diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py index 323e0ee09fc9..374dffde5724 100644 --- a/vllm/model_executor/layers/fused_moe/layer.py +++ b/vllm/model_executor/layers/fused_moe/layer.py @@ -1899,11 +1899,11 @@ def forward_impl( ) post_quant_allgather = ( - has_flashinfer_trtllm_fused_moe() - and self.quant_method is not None + self.quant_method is not None and self.dp_size > 1 and self.use_ep and isinstance(self.quant_method, ModelOptNvFp4FusedMoE) + and has_flashinfer_trtllm_fused_moe() ) if post_quant_allgather: hidden_states_to_dispatch, extra_tensors = (