From 3e9b05b6e6dea02f22c630580964f64a116f1ac8 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Tue, 24 Mar 2026 20:16:05 +0100 Subject: [PATCH 1/2] Fix tie_word_embedding issues with `Qwen2VL` Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- .../colmodernvbert/configuration_colmodernvbert.py | 12 +++++++++--- .../models/colqwen2/configuration_colqwen2.py | 12 +++++++++--- .../models/modernvbert/modeling_modernvbert.py | 2 +- .../models/modernvbert/modular_modernvbert.py | 2 +- .../models/qwen2_5_vl/configuration_qwen2_5_vl.py | 1 - .../models/qwen2_vl/configuration_qwen2_vl.py | 1 - 6 files changed, 20 insertions(+), 10 deletions(-) diff --git a/src/transformers/models/colmodernvbert/configuration_colmodernvbert.py b/src/transformers/models/colmodernvbert/configuration_colmodernvbert.py index ad63892e890c..b8b03b2b42de 100755 --- a/src/transformers/models/colmodernvbert/configuration_colmodernvbert.py +++ b/src/transformers/models/colmodernvbert/configuration_colmodernvbert.py @@ -67,14 +67,20 @@ def __post_init__(self, **kwargs): "`vlm_config` is `None`. Initializing `vlm_config` with the `Qwen2VLConfig` with default values." ) elif isinstance(self.vlm_config, dict): + sub_sub_configs = [self.vlm_config["text_config"], self.vlm_config["vision_config"]] + tie_word_embeddings = {s_s_c.pop("tie_word_embeddings") for s_s_c in sub_sub_configs} + tie_word_embeddings.discard(None) + if len(tie_word_embeddings) > 1: + raise ValueError( + "`tie_word_embeddings` was specified in both text and vision configs but with different values." + ) + if tie_word_embeddings: + self.vlm_config["tie_word_embeddings"] = tie_word_embeddings.pop() self.vlm_config = CONFIG_MAPPING[self.vlm_config["model_type"]](**self.vlm_config) if not hasattr(self.vlm_config, "vocab_size"): self.vlm_config.vocab_size = self.vlm_config.get_text_config().vocab_size - # Move `tie_word_embeddings` under `vlm_config` for BC - if self.vlm_config.text_config.tie_word_embeddings and not self.vlm_config.tie_word_embeddings: - self.vlm_config.tie_word_embeddings = self.vlm_config.text_config.tie_word_embeddings super().__post_init__(**kwargs) def get_text_config(self, *args, **kwargs) -> PreTrainedConfig: diff --git a/src/transformers/models/colqwen2/configuration_colqwen2.py b/src/transformers/models/colqwen2/configuration_colqwen2.py index 19a4fc65b48c..d47a63673e13 100644 --- a/src/transformers/models/colqwen2/configuration_colqwen2.py +++ b/src/transformers/models/colqwen2/configuration_colqwen2.py @@ -51,14 +51,20 @@ def __post_init__(self, **kwargs): "`vlm_config` is `None`. Initializing `vlm_config` with the `Qwen2VLConfig` with default values." ) elif isinstance(self.vlm_config, dict): + sub_sub_configs = [self.vlm_config["text_config"], self.vlm_config["vision_config"]] + tie_word_embeddings = {s_s_c.pop("tie_word_embeddings") for s_s_c in sub_sub_configs} + tie_word_embeddings.discard(None) + if len(tie_word_embeddings) > 1: + raise ValueError( + "`tie_word_embeddings` was specified in both text and vision configs but with different values." + ) + if tie_word_embeddings: + self.vlm_config["tie_word_embeddings"] = tie_word_embeddings.pop() self.vlm_config = CONFIG_MAPPING[self.vlm_config["model_type"]](**self.vlm_config) if not hasattr(self.vlm_config, "vocab_size"): self.vlm_config.vocab_size = self.vlm_config.get_text_config().vocab_size - # Move `tie_word_embeddings` under `vlm_config` for BC - if self.vlm_config.text_config.tie_word_embeddings and not self.vlm_config.tie_word_embeddings: - self.vlm_config.tie_word_embeddings = self.vlm_config.text_config.tie_word_embeddings super().__post_init__(**kwargs) def get_text_config(self, *args, **kwargs) -> PreTrainedConfig: diff --git a/src/transformers/models/modernvbert/modeling_modernvbert.py b/src/transformers/models/modernvbert/modeling_modernvbert.py index 7e58830ec1f6..b12a6a57f5f6 100755 --- a/src/transformers/models/modernvbert/modeling_modernvbert.py +++ b/src/transformers/models/modernvbert/modeling_modernvbert.py @@ -406,7 +406,7 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: class ModernVBertForMaskedLM(ModernVBertPreTrainedModel): _tied_weights_keys = {"lm_head.weight": "model.text_model.embeddings.tok_embeddings.weight"} - def __init__(self, config): + def __init__(self, config: ModernVBertConfig): super().__init__(config) self.vocab_size = config.text_config.vocab_size diff --git a/src/transformers/models/modernvbert/modular_modernvbert.py b/src/transformers/models/modernvbert/modular_modernvbert.py index 3369bee145ec..e6eea13e7cfe 100755 --- a/src/transformers/models/modernvbert/modular_modernvbert.py +++ b/src/transformers/models/modernvbert/modular_modernvbert.py @@ -335,7 +335,7 @@ class ModernVBertPredictionHead(ModernBertPredictionHead): class ModernVBertForMaskedLM(ModernVBertPreTrainedModel): _tied_weights_keys = {"lm_head.weight": "model.text_model.embeddings.tok_embeddings.weight"} - def __init__(self, config): + def __init__(self, config: ModernVBertConfig): super().__init__(config) self.vocab_size = config.text_config.vocab_size diff --git a/src/transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py b/src/transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py index 09429d5b6ddb..911a5543ba48 100644 --- a/src/transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +++ b/src/transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py @@ -123,7 +123,6 @@ class Qwen2_5_VLTextConfig(PreTrainedConfig): bos_token_id: int | None = 151643 eos_token_id: int | list[int] | None = 151645 pad_token_id: int | None = None - tie_word_embeddings: bool = False def __post_init__(self, **kwargs): self.sliding_window = self.sliding_window if self.use_sliding_window else None diff --git a/src/transformers/models/qwen2_vl/configuration_qwen2_vl.py b/src/transformers/models/qwen2_vl/configuration_qwen2_vl.py index 1b35a5d08e55..536bca3be654 100644 --- a/src/transformers/models/qwen2_vl/configuration_qwen2_vl.py +++ b/src/transformers/models/qwen2_vl/configuration_qwen2_vl.py @@ -100,7 +100,6 @@ class Qwen2VLTextConfig(PreTrainedConfig): bos_token_id: int | None = 151643 eos_token_id: int | list[int] | None = 151645 pad_token_id: int | None = None - tie_word_embeddings: bool = False def __post_init__(self, **kwargs): self.sliding_window = self.sliding_window if self.use_sliding_window else None From d8fa49a1cd3671227ad6c779d31917ddf246dd16 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Tue, 24 Mar 2026 21:38:52 +0100 Subject: [PATCH 2/2] remove colqwen hack Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- .../colmodernvbert/configuration_colmodernvbert.py | 9 --------- .../models/colqwen2/configuration_colqwen2.py | 9 --------- 2 files changed, 18 deletions(-) diff --git a/src/transformers/models/colmodernvbert/configuration_colmodernvbert.py b/src/transformers/models/colmodernvbert/configuration_colmodernvbert.py index b8b03b2b42de..efa57dfc8640 100755 --- a/src/transformers/models/colmodernvbert/configuration_colmodernvbert.py +++ b/src/transformers/models/colmodernvbert/configuration_colmodernvbert.py @@ -67,15 +67,6 @@ def __post_init__(self, **kwargs): "`vlm_config` is `None`. Initializing `vlm_config` with the `Qwen2VLConfig` with default values." ) elif isinstance(self.vlm_config, dict): - sub_sub_configs = [self.vlm_config["text_config"], self.vlm_config["vision_config"]] - tie_word_embeddings = {s_s_c.pop("tie_word_embeddings") for s_s_c in sub_sub_configs} - tie_word_embeddings.discard(None) - if len(tie_word_embeddings) > 1: - raise ValueError( - "`tie_word_embeddings` was specified in both text and vision configs but with different values." - ) - if tie_word_embeddings: - self.vlm_config["tie_word_embeddings"] = tie_word_embeddings.pop() self.vlm_config = CONFIG_MAPPING[self.vlm_config["model_type"]](**self.vlm_config) if not hasattr(self.vlm_config, "vocab_size"): diff --git a/src/transformers/models/colqwen2/configuration_colqwen2.py b/src/transformers/models/colqwen2/configuration_colqwen2.py index d47a63673e13..ac9abcb5cfd9 100644 --- a/src/transformers/models/colqwen2/configuration_colqwen2.py +++ b/src/transformers/models/colqwen2/configuration_colqwen2.py @@ -51,15 +51,6 @@ def __post_init__(self, **kwargs): "`vlm_config` is `None`. Initializing `vlm_config` with the `Qwen2VLConfig` with default values." ) elif isinstance(self.vlm_config, dict): - sub_sub_configs = [self.vlm_config["text_config"], self.vlm_config["vision_config"]] - tie_word_embeddings = {s_s_c.pop("tie_word_embeddings") for s_s_c in sub_sub_configs} - tie_word_embeddings.discard(None) - if len(tie_word_embeddings) > 1: - raise ValueError( - "`tie_word_embeddings` was specified in both text and vision configs but with different values." - ) - if tie_word_embeddings: - self.vlm_config["tie_word_embeddings"] = tie_word_embeddings.pop() self.vlm_config = CONFIG_MAPPING[self.vlm_config["model_type"]](**self.vlm_config) if not hasattr(self.vlm_config, "vocab_size"):