vllm-project · jeejeelee · Mar 18, 2026 · Mar 18, 2026 · Mar 18, 2026 · Mar 18, 2026
@@ -161,16 +161,28 @@ def _maybe_init_mm(
             device=self.device,
             lora_config=self.lora_config,
         )
+
         lm_prefix = self.mm_mapping.language_model[0]
         self.punica_wrapper_mapping[lm_prefix] = llm_punica_wrapper
-
         if self.lora_config.enable_tower_connector_lora:
             self.supports_tower_connector_lora = self.supports_mm and hasattr(
                 self.model, "get_num_mm_encoder_tokens"
             )
         if not self.supports_tower_connector_lora:
             return
 
+        if (
+            vllm_config.model_config.multimodal_config
+            and vllm_config.model_config.multimodal_config.language_model_only
+        ):
+            if self.supports_tower_connector_lora:
+                logger.warning(
+                    "Disabling `enable_tower_connector_lora` because the multimodal "
+                    "model is configured to initialize the language model only."
+                )
+                self.supports_tower_connector_lora = False
+            return
+
         logger.warning(
             "LoRA for the tower and connector of multimodal models is "
             "experimental and may contain bugs. Please report any related issues on "

@@ -10,11 +10,10 @@
     tensor_model_parallel_all_gather,
     tensor_model_parallel_all_reduce,
 )
+from vllm.lora.ops.triton_ops.utils import supports_pdl
 from vllm.triton_utils import tl, triton
 from vllm.utils.torch_utils import direct_register_custom_op
 
-from .utils import supports_pdl
-
 
 @triton.jit
 def _get_lora_id(