vllm-project · vllm-agent · Apr 25, 2026 · gemini-code-assist · Apr 25, 2026 · gemini-code-assist
@@ -550,14 +550,10 @@ def forward(
             hidden_states
         )
 
-        # Record before `_maybe_pad_hidden_states` pads activations to match
-        # `moe_config.hidden_dim`, e.g. after `align_trtllm_fp4_moe_hidden_dim_for_fi`
-        routed_hidden_dim = hidden_states.shape[-1]
         hidden_states, og_hidden_dim = self._maybe_pad_hidden_states(
             shared_experts_input,
             hidden_states,
         )
-        hidden_dim_was_padded = hidden_states.shape[-1] > routed_hidden_dim
 
         result = self._forward_entry(
             hidden_states,
@@ -577,8 +573,6 @@ def forward(
 
         # Extract outputs from result
         shared_output, fused_output = _unpack(result)
-        shared_output, fused_output = _unpack(result)
+        shared_output, fused_output = _unpack(result)
+        if hidden_dim_was_padded:
+            fused_output = fused_output[..., :routed_hidden_dim].contiguous()
-        shared_output, fused_output = _unpack(result)
+        shared_output, fused_output = _unpack(result)
+        if hidden_dim_was_padded:
+            fused_output = fused_output[..., :routed_hidden_dim].contiguous()
-        if hidden_dim_was_padded:
-            fused_output = fused_output[..., :routed_hidden_dim]
 
         # If combine kernel already reduced fused, reduce shared to match.
         # See note above re: the two all-reduce points.