diff --git a/vllm/model_executor/layers/rotary_embedding/base.py b/vllm/model_executor/layers/rotary_embedding/base.py index 17cd39bb8cd6..2c0f4dd13080 100644 --- a/vllm/model_executor/layers/rotary_embedding/base.py +++ b/vllm/model_executor/layers/rotary_embedding/base.py @@ -168,7 +168,8 @@ def forward_hip( else: # ops.rotary_embedding() is an in-place operation # that updates the query and key tensors. - self.forward_cuda(positions, query, key) + # FIXME: self.forward_cuda is not a in-place operation in eager mode. + return self.forward_cuda(positions, query, key) return query, key def forward_xpu(