vllm-project · youkaichao · Apr 21, 2025 · Mar 27, 2025
diff --git a/docs/source/getting_started/installation/gpu/xpu.inc.md b/docs/source/getting_started/installation/gpu/xpu.inc.md
@@ -23,6 +23,8 @@ Currently, there are no pre-built XPU wheels.
 - Second, install Python packages for vLLM XPU backend building:
 
 ```console
+git clone https://github.com/vllm-project/vllm.git
+cd vllm
 pip install --upgrade pip
 pip install -v -r requirements/xpu.txt
 ```

@@ -220,8 +220,8 @@ def forward(
                 value_cache,
                 attn_metadata.slot_mapping.flatten(),
                 self.kv_cache_dtype,
-                layer._k_scale,
-                layer._v_scale,
+                layer._k_scale_float,
+                layer._v_scale_float,
             )
 
         if attn_metadata.is_prompt:
@@ -306,8 +306,8 @@ def forward(
                     max_seq_len,
                     self.alibi_slopes,
                     self.kv_cache_dtype,
-                    layer._k_scale,
-                    layer._v_scale,
+                    layer._k_scale_float,
+                    layer._v_scale_float,
                 )
             else:
                 # Run PagedAttention V2.
@@ -339,8 +339,8 @@ def forward(
                     max_seq_len,
                     self.alibi_slopes,
                     self.kv_cache_dtype,
-                    layer._k_scale,
-                    layer._v_scale,
+                    layer._k_scale_float,
+                    layer._v_scale_float,
                 )
 
             # Reshape the output tensor.