vllm-project · adobrzyn · Jan 26, 2026 · Jan 21, 2026 · Copilot · Jan 21, 2026
@@ -160,7 +160,10 @@ def forward_oot(
             permuted_weights=True,
             activation=layer.activation,
         )
-        return output.view(*(output.size(0), *input_shape[1:]))
+        if layer.dp_size > 1:
+            return output.view(*(output.size(0), *input_shape[1:]))
+        else:
-        if layer.dp_size > 1:
-            return output.view(*(output.size(0), *input_shape[1:]))
-        else:
+        if len(input_shape) == 2:
+            # Handle 2D inputs where the leading dimension may have been
+            # modified (e.g. by data parallel dispatch); keep the trailing
+            # dimension(s) from the original shape and infer the leading one
+            # from the actual output tensor.
+            return output.view(output.size(0), *input_shape[1:])
+        else:
+            # For higher-rank inputs, restore the original shape directly.
-        if layer.dp_size > 1:
-            return output.view(*(output.size(0), *input_shape[1:]))
-        else:
+        if len(input_shape) == 2:
+            # Handle 2D inputs where the leading dimension may have been
+            # modified (e.g. by data parallel dispatch); keep the trailing
+            # dimension(s) from the original shape and infer the leading one
+            # from the actual output tensor.
+            return output.view(output.size(0), *input_shape[1:])
+        else:
+            # For higher-rank inputs, restore the original shape directly.
+            return output.view(*input_shape)
 
 
 def reduce_output(self, states: torch.Tensor) -> torch.Tensor: