From 000e9525d8fe79832b65532655f3a3e7c956fe11 Mon Sep 17 00:00:00 2001 From: yan ma Date: Thu, 27 Mar 2025 18:17:28 +0800 Subject: [PATCH] minor fix for XPU Signed-off-by: yan ma --- .../getting_started/installation/gpu/xpu.inc.md | 2 ++ vllm/attention/backends/ipex_attn.py | 12 ++++++------ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/docs/source/getting_started/installation/gpu/xpu.inc.md b/docs/source/getting_started/installation/gpu/xpu.inc.md index c41905f250f8..fbf5421eeec5 100644 --- a/docs/source/getting_started/installation/gpu/xpu.inc.md +++ b/docs/source/getting_started/installation/gpu/xpu.inc.md @@ -23,6 +23,8 @@ Currently, there are no pre-built XPU wheels. - Second, install Python packages for vLLM XPU backend building: ```console +git clone https://github.com/vllm-project/vllm.git +cd vllm pip install --upgrade pip pip install -v -r requirements/xpu.txt ``` diff --git a/vllm/attention/backends/ipex_attn.py b/vllm/attention/backends/ipex_attn.py index 99917a92af5f..27959caa651a 100644 --- a/vllm/attention/backends/ipex_attn.py +++ b/vllm/attention/backends/ipex_attn.py @@ -220,8 +220,8 @@ def forward( value_cache, attn_metadata.slot_mapping.flatten(), self.kv_cache_dtype, - layer._k_scale, - layer._v_scale, + layer._k_scale_float, + layer._v_scale_float, ) if attn_metadata.is_prompt: @@ -306,8 +306,8 @@ def forward( max_seq_len, self.alibi_slopes, self.kv_cache_dtype, - layer._k_scale, - layer._v_scale, + layer._k_scale_float, + layer._v_scale_float, ) else: # Run PagedAttention V2. @@ -339,8 +339,8 @@ def forward( max_seq_len, self.alibi_slopes, self.kv_cache_dtype, - layer._k_scale, - layer._v_scale, + layer._k_scale_float, + layer._v_scale_float, ) # Reshape the output tensor.