From d7d52bce0481250ae65f039fcfdfbc6301d7d0cf Mon Sep 17 00:00:00 2001 From: Artur Fierka Date: Tue, 20 Jan 2026 13:53:10 +0200 Subject: [PATCH 1/3] Fix Llama4 shape mismatch for 32k+ context window Signed-off-by: Artur Fierka --- vllm_gaudi/ops/hpu_fused_moe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm_gaudi/ops/hpu_fused_moe.py b/vllm_gaudi/ops/hpu_fused_moe.py index 42cdad1526..099741aec7 100644 --- a/vllm_gaudi/ops/hpu_fused_moe.py +++ b/vllm_gaudi/ops/hpu_fused_moe.py @@ -89,7 +89,7 @@ def forward_oot( permuted_weights=True, activation=layer.activation, ) - return output.view(*(output.size(0), *input_shape[1:])) + return output.view(*input_shape) def reduce_output(self, states: torch.Tensor) -> torch.Tensor: From 2ed6db6a1ba2017ffcd9602a8231d2d8ffd840f7 Mon Sep 17 00:00:00 2001 From: Artur Fierka Date: Tue, 20 Jan 2026 14:25:53 +0200 Subject: [PATCH 2/3] Add condition for dp_size Signed-off-by: Artur Fierka --- vllm_gaudi/ops/hpu_fused_moe.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/vllm_gaudi/ops/hpu_fused_moe.py b/vllm_gaudi/ops/hpu_fused_moe.py index 099741aec7..b4fdfd56ef 100644 --- a/vllm_gaudi/ops/hpu_fused_moe.py +++ b/vllm_gaudi/ops/hpu_fused_moe.py @@ -89,7 +89,10 @@ def forward_oot( permuted_weights=True, activation=layer.activation, ) - return output.view(*input_shape) + if layer.dp_size > 1: + output.view(*(output.size(0), *input_shape[1:])) + else: + return output.view(*input_shape) def reduce_output(self, states: torch.Tensor) -> torch.Tensor: From e7c069c79d6ac16d1a7a9415a4df971f8798009d Mon Sep 17 00:00:00 2001 From: Artur Fierka Date: Tue, 20 Jan 2026 21:59:40 +0200 Subject: [PATCH 3/3] Fix missing return Signed-off-by: Artur Fierka --- vllm_gaudi/ops/hpu_fused_moe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm_gaudi/ops/hpu_fused_moe.py b/vllm_gaudi/ops/hpu_fused_moe.py index b4fdfd56ef..884b27c97d 100644 --- a/vllm_gaudi/ops/hpu_fused_moe.py +++ b/vllm_gaudi/ops/hpu_fused_moe.py @@ -90,7 +90,7 @@ def forward_oot( activation=layer.activation, ) if layer.dp_size > 1: - output.view(*(output.size(0), *input_shape[1:])) + return output.view(*(output.size(0), *input_shape[1:])) else: return output.view(*input_shape)