huggingface · hmellor · Mar 24, 2026 · Mar 24, 2026 · Mar 24, 2026 · ArthurZucker
diff --git a/src/transformers/models/colmodernvbert/configuration_colmodernvbert.py b/src/transformers/models/colmodernvbert/configuration_colmodernvbert.py
@@ -67,14 +67,20 @@ def __post_init__(self, **kwargs):
                 "`vlm_config` is `None`. Initializing `vlm_config` with the `Qwen2VLConfig` with default values."
             )
         elif isinstance(self.vlm_config, dict):
+            sub_sub_configs = [self.vlm_config["text_config"], self.vlm_config["vision_config"]]
+            tie_word_embeddings = {s_s_c.pop("tie_word_embeddings") for s_s_c in sub_sub_configs}
+            tie_word_embeddings.discard(None)
+            if len(tie_word_embeddings) > 1:
+                raise ValueError(
+                    "`tie_word_embeddings` was specified in both text and vision configs but with different values."
+                )
+            if tie_word_embeddings:
+                self.vlm_config["tie_word_embeddings"] = tie_word_embeddings.pop()
             self.vlm_config = CONFIG_MAPPING[self.vlm_config["model_type"]](**self.vlm_config)
 
         if not hasattr(self.vlm_config, "vocab_size"):
             self.vlm_config.vocab_size = self.vlm_config.get_text_config().vocab_size
 
-        # Move `tie_word_embeddings` under `vlm_config` for BC
-        if self.vlm_config.text_config.tie_word_embeddings and not self.vlm_config.tie_word_embeddings:
-            self.vlm_config.tie_word_embeddings = self.vlm_config.text_config.tie_word_embeddings
         super().__post_init__(**kwargs)
 
     def get_text_config(self, *args, **kwargs) -> PreTrainedConfig:

diff --git a/src/transformers/models/colqwen2/configuration_colqwen2.py b/src/transformers/models/colqwen2/configuration_colqwen2.py
@@ -51,14 +51,20 @@ def __post_init__(self, **kwargs):
                 "`vlm_config` is `None`. Initializing `vlm_config` with the `Qwen2VLConfig` with default values."
             )
         elif isinstance(self.vlm_config, dict):
+            sub_sub_configs = [self.vlm_config["text_config"], self.vlm_config["vision_config"]]
+            tie_word_embeddings = {s_s_c.pop("tie_word_embeddings") for s_s_c in sub_sub_configs}
+            tie_word_embeddings.discard(None)
+            if len(tie_word_embeddings) > 1:
+                raise ValueError(
+                    "`tie_word_embeddings` was specified in both text and vision configs but with different values."
+                )
+            if tie_word_embeddings:
+                self.vlm_config["tie_word_embeddings"] = tie_word_embeddings.pop()
             self.vlm_config = CONFIG_MAPPING[self.vlm_config["model_type"]](**self.vlm_config)
 
         if not hasattr(self.vlm_config, "vocab_size"):
             self.vlm_config.vocab_size = self.vlm_config.get_text_config().vocab_size
 
-        # Move `tie_word_embeddings` under `vlm_config` for BC
-        if self.vlm_config.text_config.tie_word_embeddings and not self.vlm_config.tie_word_embeddings:
-            self.vlm_config.tie_word_embeddings = self.vlm_config.text_config.tie_word_embeddings
         super().__post_init__(**kwargs)
 
     def get_text_config(self, *args, **kwargs) -> PreTrainedConfig:

diff --git a/src/transformers/models/modernvbert/modeling_modernvbert.py b/src/transformers/models/modernvbert/modeling_modernvbert.py
@@ -406,7 +406,7 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
 class ModernVBertForMaskedLM(ModernVBertPreTrainedModel):
     _tied_weights_keys = {"lm_head.weight": "model.text_model.embeddings.tok_embeddings.weight"}
 
-    def __init__(self, config):
+    def __init__(self, config: ModernVBertConfig):
         super().__init__(config)
 
         self.vocab_size = config.text_config.vocab_size

diff --git a/src/transformers/models/modernvbert/modular_modernvbert.py b/src/transformers/models/modernvbert/modular_modernvbert.py
@@ -335,7 +335,7 @@ class ModernVBertPredictionHead(ModernBertPredictionHead):
 class ModernVBertForMaskedLM(ModernVBertPreTrainedModel):
     _tied_weights_keys = {"lm_head.weight": "model.text_model.embeddings.tok_embeddings.weight"}
 
-    def __init__(self, config):
+    def __init__(self, config: ModernVBertConfig):
         super().__init__(config)
 
         self.vocab_size = config.text_config.vocab_size

diff --git a/src/transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py b/src/transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py
@@ -123,7 +123,6 @@ class Qwen2_5_VLTextConfig(PreTrainedConfig):
     bos_token_id: int | None = 151643
     eos_token_id: int | list[int] | None = 151645
     pad_token_id: int | None = None
-    tie_word_embeddings: bool = False
 
     def __post_init__(self, **kwargs):
         self.sliding_window = self.sliding_window if self.use_sliding_window else None

diff --git a/src/transformers/models/qwen2_vl/configuration_qwen2_vl.py b/src/transformers/models/qwen2_vl/configuration_qwen2_vl.py
@@ -100,7 +100,6 @@ class Qwen2VLTextConfig(PreTrainedConfig):
     bos_token_id: int | None = 151643
     eos_token_id: int | list[int] | None = 151645
     pad_token_id: int | None = None
-    tie_word_embeddings: bool = False
 
     def __post_init__(self, **kwargs):
         self.sliding_window = self.sliding_window if self.use_sliding_window else None