From f60dbe80c8f1b0a12c4ac52c09a3933a84614e17 Mon Sep 17 00:00:00 2001
From: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Date: Tue, 24 Mar 2026 22:06:22 +0100
Subject: [PATCH 1/2] Better weight tying check for multimodal models

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
---
 vllm/config/vllm.py | 32 +++++++++++++++++++++-----------
 1 file changed, 21 insertions(+), 11 deletions(-)

diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py
index c7449840d525..995da38bc9e9 100644
--- a/vllm/config/vllm.py
+++ b/vllm/config/vllm.py
@@ -550,26 +550,36 @@ def with_hf_config(
 
         model_config = copy.deepcopy(self.model_config)
 
-        if (
-            model_config.is_multimodal_model
-            and hasattr(model_config.hf_config, "tie_word_embeddings")
-            and not hasattr(hf_config.get_text_config(), "tie_word_embeddings")
+        if model_config.is_multimodal_model and hasattr(
+            model_config.hf_config, "tie_word_embeddings"
         ):
             # In Transformers v5, tie_word_embeddings belongs to the config of the class
             # that can see both layers to be tied. For example:
             #
             # SomeVLModel:
-            #   self.language_model = SomeLanguageModel()
-            #   self.vision_model = SomeVisionModel()
+            #   self.language_model = SomeLanguageModel(SomeVLTextConfig)
+            #   self.vision_model = SomeVisionModel(SomeVLVisionConfig)
             #
             # SomeVLModelForMultimodalLM:
-            #   self.model = SomeVLModel()
+            #   self.model = SomeVLModel(SomeVLConfig)
+            #   self.lm_head = nn.Linear()
+            #
+            # Therefore, tie_word_embeddings is defined in SomeVLConfig and is not
+            # present in SomeVLTextConfig*. In vLLM, the lm_head belongs to the
+            # language_model, so we must ensure that tie_word_embeddings is set in the
+            # language_model's config.
+            #
+            # *For some models, SomeVLTextConfig may also have a tie_word_embeddings
+            # field. This is only the case if SomeVLTextConfig is also used for a text
+            # only version of the same model. For example:
+            #
+            # SomeVLModelForCausalLM:
+            #   self.model = SomeLanguageModel(SomeVLTextConfig)
             #   self.lm_head = nn.Linear()
             #
-            # Therefore, tie_word_embeddings is defined in SomeVLModelForMultimodalLM's
-            # config and is not present in SomeVLModel's config. In vLLM, the lm_head
-            # belongs to the language_model, so we must ensure that tie_word_embeddings
-            # is set in the language_model's config.
+            # Therefore, the presence of tie_word_embeddings in SomeVLTextConfig cannot
+            # be used as a signal for whether tie_word_embeddings should be copied from
+            # hf_config to the language_model config.
             tie_word_embeddings = model_config.hf_config.tie_word_embeddings
             hf_config.get_text_config().tie_word_embeddings = tie_word_embeddings
 

From 0aeb9951b3eafab65a4f8ef7662f31a80288af8d Mon Sep 17 00:00:00 2001
From: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Date: Wed, 25 Mar 2026 11:43:42 +0100
Subject: [PATCH 2/2] Only do the `tie_word_embeddings` patch in v5

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
---
 vllm/config/vllm.py | 61 ++++++++++++++++++++++++---------------------
 1 file changed, 32 insertions(+), 29 deletions(-)

diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py
index 995da38bc9e9..88942fc8685e 100644
--- a/vllm/config/vllm.py
+++ b/vllm/config/vllm.py
@@ -13,10 +13,12 @@
 from datetime import datetime
 from enum import IntEnum
 from functools import lru_cache
+from importlib.metadata import version
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, Literal, TypeVar, get_args
 
 import torch
+from packaging.version import Version
 from pydantic import ConfigDict, Field, model_validator
 
 import vllm.envs as envs
@@ -550,36 +552,37 @@ def with_hf_config(
 
         model_config = copy.deepcopy(self.model_config)
 
-        if model_config.is_multimodal_model and hasattr(
-            model_config.hf_config, "tie_word_embeddings"
+        # In Transformers v5, tie_word_embeddings belongs to the config of the class
+        # that can see both layers to be tied. For example:
+        #
+        # SomeVLModel:
+        #   self.language_model = SomeLanguageModel(SomeVLTextConfig)
+        #   self.vision_model = SomeVisionModel(SomeVLVisionConfig)
+        #
+        # SomeVLModelForMultimodalLM:
+        #   self.model = SomeVLModel(SomeVLConfig)
+        #   self.lm_head = nn.Linear()
+        #
+        # Therefore, tie_word_embeddings is defined in SomeVLConfig and is not present
+        # in SomeVLTextConfig*. In vLLM, the lm_head belongs to the language_model, so
+        # we must ensure that tie_word_embeddings is set in the language_model's config.
+        #
+        # *For some models, SomeVLTextConfig may also have a tie_word_embeddings field.
+        # This is only the case if SomeVLTextConfig is also used for a text only version
+        # of the same model. For example:
+        #
+        # SomeVLModelForCausalLM:
+        #   self.model = SomeLanguageModel(SomeVLTextConfig)
+        #   self.lm_head = nn.Linear()
+        #
+        # Therefore, the presence of tie_word_embeddings in SomeVLTextConfig cannot
+        # be used as a signal for whether tie_word_embeddings should be copied from
+        # hf_config to the language_model config.
+        if (
+            Version(version("transformers")) >= Version("5.0.0")
+            and model_config.is_multimodal_model
+            and hasattr(model_config.hf_config, "tie_word_embeddings")
         ):
-            # In Transformers v5, tie_word_embeddings belongs to the config of the class
-            # that can see both layers to be tied. For example:
-            #
-            # SomeVLModel:
-            #   self.language_model = SomeLanguageModel(SomeVLTextConfig)
-            #   self.vision_model = SomeVisionModel(SomeVLVisionConfig)
-            #
-            # SomeVLModelForMultimodalLM:
-            #   self.model = SomeVLModel(SomeVLConfig)
-            #   self.lm_head = nn.Linear()
-            #
-            # Therefore, tie_word_embeddings is defined in SomeVLConfig and is not
-            # present in SomeVLTextConfig*. In vLLM, the lm_head belongs to the
-            # language_model, so we must ensure that tie_word_embeddings is set in the
-            # language_model's config.
-            #
-            # *For some models, SomeVLTextConfig may also have a tie_word_embeddings
-            # field. This is only the case if SomeVLTextConfig is also used for a text
-            # only version of the same model. For example:
-            #
-            # SomeVLModelForCausalLM:
-            #   self.model = SomeLanguageModel(SomeVLTextConfig)
-            #   self.lm_head = nn.Linear()
-            #
-            # Therefore, the presence of tie_word_embeddings in SomeVLTextConfig cannot
-            # be used as a signal for whether tie_word_embeddings should be copied from
-            # hf_config to the language_model config.
             tie_word_embeddings = model_config.hf_config.tie_word_embeddings
             hf_config.get_text_config().tie_word_embeddings = tie_word_embeddings