From f60dbe80c8f1b0a12c4ac52c09a3933a84614e17 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Tue, 24 Mar 2026 22:06:22 +0100 Subject: [PATCH 1/2] Better weight tying check for multimodal models Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- vllm/config/vllm.py | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py index c7449840d525..995da38bc9e9 100644 --- a/vllm/config/vllm.py +++ b/vllm/config/vllm.py @@ -550,26 +550,36 @@ def with_hf_config( model_config = copy.deepcopy(self.model_config) - if ( - model_config.is_multimodal_model - and hasattr(model_config.hf_config, "tie_word_embeddings") - and not hasattr(hf_config.get_text_config(), "tie_word_embeddings") + if model_config.is_multimodal_model and hasattr( + model_config.hf_config, "tie_word_embeddings" ): # In Transformers v5, tie_word_embeddings belongs to the config of the class # that can see both layers to be tied. For example: # # SomeVLModel: - # self.language_model = SomeLanguageModel() - # self.vision_model = SomeVisionModel() + # self.language_model = SomeLanguageModel(SomeVLTextConfig) + # self.vision_model = SomeVisionModel(SomeVLVisionConfig) # # SomeVLModelForMultimodalLM: - # self.model = SomeVLModel() + # self.model = SomeVLModel(SomeVLConfig) + # self.lm_head = nn.Linear() + # + # Therefore, tie_word_embeddings is defined in SomeVLConfig and is not + # present in SomeVLTextConfig*. In vLLM, the lm_head belongs to the + # language_model, so we must ensure that tie_word_embeddings is set in the + # language_model's config. + # + # *For some models, SomeVLTextConfig may also have a tie_word_embeddings + # field. This is only the case if SomeVLTextConfig is also used for a text + # only version of the same model. For example: + # + # SomeVLModelForCausalLM: + # self.model = SomeLanguageModel(SomeVLTextConfig) # self.lm_head = nn.Linear() # - # Therefore, tie_word_embeddings is defined in SomeVLModelForMultimodalLM's - # config and is not present in SomeVLModel's config. In vLLM, the lm_head - # belongs to the language_model, so we must ensure that tie_word_embeddings - # is set in the language_model's config. + # Therefore, the presence of tie_word_embeddings in SomeVLTextConfig cannot + # be used as a signal for whether tie_word_embeddings should be copied from + # hf_config to the language_model config. tie_word_embeddings = model_config.hf_config.tie_word_embeddings hf_config.get_text_config().tie_word_embeddings = tie_word_embeddings From 0aeb9951b3eafab65a4f8ef7662f31a80288af8d Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Wed, 25 Mar 2026 11:43:42 +0100 Subject: [PATCH 2/2] Only do the `tie_word_embeddings` patch in v5 Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- vllm/config/vllm.py | 61 ++++++++++++++++++++++++--------------------- 1 file changed, 32 insertions(+), 29 deletions(-) diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py index 995da38bc9e9..88942fc8685e 100644 --- a/vllm/config/vllm.py +++ b/vllm/config/vllm.py @@ -13,10 +13,12 @@ from datetime import datetime from enum import IntEnum from functools import lru_cache +from importlib.metadata import version from pathlib import Path from typing import TYPE_CHECKING, Any, Literal, TypeVar, get_args import torch +from packaging.version import Version from pydantic import ConfigDict, Field, model_validator import vllm.envs as envs @@ -550,36 +552,37 @@ def with_hf_config( model_config = copy.deepcopy(self.model_config) - if model_config.is_multimodal_model and hasattr( - model_config.hf_config, "tie_word_embeddings" + # In Transformers v5, tie_word_embeddings belongs to the config of the class + # that can see both layers to be tied. For example: + # + # SomeVLModel: + # self.language_model = SomeLanguageModel(SomeVLTextConfig) + # self.vision_model = SomeVisionModel(SomeVLVisionConfig) + # + # SomeVLModelForMultimodalLM: + # self.model = SomeVLModel(SomeVLConfig) + # self.lm_head = nn.Linear() + # + # Therefore, tie_word_embeddings is defined in SomeVLConfig and is not present + # in SomeVLTextConfig*. In vLLM, the lm_head belongs to the language_model, so + # we must ensure that tie_word_embeddings is set in the language_model's config. + # + # *For some models, SomeVLTextConfig may also have a tie_word_embeddings field. + # This is only the case if SomeVLTextConfig is also used for a text only version + # of the same model. For example: + # + # SomeVLModelForCausalLM: + # self.model = SomeLanguageModel(SomeVLTextConfig) + # self.lm_head = nn.Linear() + # + # Therefore, the presence of tie_word_embeddings in SomeVLTextConfig cannot + # be used as a signal for whether tie_word_embeddings should be copied from + # hf_config to the language_model config. + if ( + Version(version("transformers")) >= Version("5.0.0") + and model_config.is_multimodal_model + and hasattr(model_config.hf_config, "tie_word_embeddings") ): - # In Transformers v5, tie_word_embeddings belongs to the config of the class - # that can see both layers to be tied. For example: - # - # SomeVLModel: - # self.language_model = SomeLanguageModel(SomeVLTextConfig) - # self.vision_model = SomeVisionModel(SomeVLVisionConfig) - # - # SomeVLModelForMultimodalLM: - # self.model = SomeVLModel(SomeVLConfig) - # self.lm_head = nn.Linear() - # - # Therefore, tie_word_embeddings is defined in SomeVLConfig and is not - # present in SomeVLTextConfig*. In vLLM, the lm_head belongs to the - # language_model, so we must ensure that tie_word_embeddings is set in the - # language_model's config. - # - # *For some models, SomeVLTextConfig may also have a tie_word_embeddings - # field. This is only the case if SomeVLTextConfig is also used for a text - # only version of the same model. For example: - # - # SomeVLModelForCausalLM: - # self.model = SomeLanguageModel(SomeVLTextConfig) - # self.lm_head = nn.Linear() - # - # Therefore, the presence of tie_word_embeddings in SomeVLTextConfig cannot - # be used as a signal for whether tie_word_embeddings should be copied from - # hf_config to the language_model config. tie_word_embeddings = model_config.hf_config.tie_word_embeddings hf_config.get_text_config().tie_word_embeddings = tie_word_embeddings