From 0c6414d886aa86ea64b7da5dc61e0fa9215c0b47 Mon Sep 17 00:00:00 2001
From: Qiliang Cui <derrhein@gmail.com>
Date: Mon, 8 Dec 2025 17:36:05 +0000
Subject: [PATCH 1/2] Add an argument to TpuPlatform.get_attn_backend_cls to
 adopt interface change.

Signed-off-by: Qiliang Cui <derrhein@gmail.com>
---
 tpu_inference/platforms/tpu_platform.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tpu_inference/platforms/tpu_platform.py b/tpu_inference/platforms/tpu_platform.py
index 9f2a785264..2c24f142e0 100644
--- a/tpu_inference/platforms/tpu_platform.py
+++ b/tpu_inference/platforms/tpu_platform.py
@@ -55,7 +55,8 @@ def get_attn_backend_cls(cls, selected_backend: "AttentionBackendEnum",
                              head_size: int, dtype: jnp.dtype,
                              kv_cache_dtype: Optional[str], block_size: int,
                              use_v1: bool, use_mla: bool, has_sink: bool,
-                             use_sparse: bool, attn_type: Any) -> str:
+                             use_sparse: bool, use_mm_prefix: bool,
+                             attn_type: Any) -> str:
         from vllm.attention.backends.registry import AttentionBackendEnum
         if selected_backend != AttentionBackendEnum.PALLAS:
             logger.info("Cannot use %s backend on TPU.", selected_backend)

From 3a911aa5a46849eea0413f0518c993600eb644e1 Mon Sep 17 00:00:00 2001
From: Qiliang Cui <derrhein@gmail.com>
Date: Mon, 8 Dec 2025 18:47:47 +0000
Subject: [PATCH 2/2] Fix lora test failure.

Signed-off-by: Qiliang Cui <derrhein@gmail.com>
---
 tests/lora/test_layers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/lora/test_layers.py b/tests/lora/test_layers.py
index 5f88e162a7..6e7db20bbd 100644
--- a/tests/lora/test_layers.py
+++ b/tests/lora/test_layers.py
@@ -18,7 +18,7 @@
                               ReplicatedLinearWithLoRA,
                               RowParallelLinearWithLoRA)
 # yapf: enable
-from vllm.lora.models import LoRALayerWeights, PackedLoRALayerWeights
+from vllm.lora.lora_weights import LoRALayerWeights, PackedLoRALayerWeights
 from vllm.lora.punica_wrapper import get_punica_wrapper
 from vllm.model_executor.layers.linear import (ColumnParallelLinear,
                                                MergedColumnParallelLinear,