From 0c6414d886aa86ea64b7da5dc61e0fa9215c0b47 Mon Sep 17 00:00:00 2001 From: Qiliang Cui Date: Mon, 8 Dec 2025 17:36:05 +0000 Subject: [PATCH 1/2] Add an argument to TpuPlatform.get_attn_backend_cls to adopt interface change. Signed-off-by: Qiliang Cui --- tpu_inference/platforms/tpu_platform.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tpu_inference/platforms/tpu_platform.py b/tpu_inference/platforms/tpu_platform.py index 9f2a785264..2c24f142e0 100644 --- a/tpu_inference/platforms/tpu_platform.py +++ b/tpu_inference/platforms/tpu_platform.py @@ -55,7 +55,8 @@ def get_attn_backend_cls(cls, selected_backend: "AttentionBackendEnum", head_size: int, dtype: jnp.dtype, kv_cache_dtype: Optional[str], block_size: int, use_v1: bool, use_mla: bool, has_sink: bool, - use_sparse: bool, attn_type: Any) -> str: + use_sparse: bool, use_mm_prefix: bool, + attn_type: Any) -> str: from vllm.attention.backends.registry import AttentionBackendEnum if selected_backend != AttentionBackendEnum.PALLAS: logger.info("Cannot use %s backend on TPU.", selected_backend) From 3a911aa5a46849eea0413f0518c993600eb644e1 Mon Sep 17 00:00:00 2001 From: Qiliang Cui Date: Mon, 8 Dec 2025 18:47:47 +0000 Subject: [PATCH 2/2] Fix lora test failure. Signed-off-by: Qiliang Cui --- tests/lora/test_layers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/lora/test_layers.py b/tests/lora/test_layers.py index 5f88e162a7..6e7db20bbd 100644 --- a/tests/lora/test_layers.py +++ b/tests/lora/test_layers.py @@ -18,7 +18,7 @@ ReplicatedLinearWithLoRA, RowParallelLinearWithLoRA) # yapf: enable -from vllm.lora.models import LoRALayerWeights, PackedLoRALayerWeights +from vllm.lora.lora_weights import LoRALayerWeights, PackedLoRALayerWeights from vllm.lora.punica_wrapper import get_punica_wrapper from vllm.model_executor.layers.linear import (ColumnParallelLinear, MergedColumnParallelLinear,