vllm-project · QiliangCui · Dec 8, 2025 · Dec 8, 2025 · Dec 8, 2025
@@ -18,7 +18,7 @@
                               ReplicatedLinearWithLoRA,
                               RowParallelLinearWithLoRA)
 # yapf: enable
-from vllm.lora.models import LoRALayerWeights, PackedLoRALayerWeights
+from vllm.lora.lora_weights import LoRALayerWeights, PackedLoRALayerWeights
 from vllm.lora.punica_wrapper import get_punica_wrapper
 from vllm.model_executor.layers.linear import (ColumnParallelLinear,
                                                MergedColumnParallelLinear,

@@ -55,7 +55,8 @@ def get_attn_backend_cls(cls, selected_backend: "AttentionBackendEnum",
                              head_size: int, dtype: jnp.dtype,
                              kv_cache_dtype: Optional[str], block_size: int,
                              use_v1: bool, use_mla: bool, has_sink: bool,
-                             use_sparse: bool, attn_type: Any) -> str:
+                             use_sparse: bool, use_mm_prefix: bool,
+                             attn_type: Any) -> str:
         from vllm.attention.backends.registry import AttentionBackendEnum
         if selected_backend != AttentionBackendEnum.PALLAS:
             logger.info("Cannot use %s backend on TPU.", selected_backend)