vllm-project · DarkLight1337 · Jan 27, 2026 · Jan 27, 2026 · Jan 27, 2026
@@ -30,7 +30,12 @@ def create_dummy_model(repo: str, model_arch: str) -> PreTrainedModel:
     model_cls: PreTrainedModel = getattr(transformers, model_arch)
     config = AutoConfig.from_pretrained(repo)
     with torch.device("meta"):
-        return model_cls._from_config(config)
+        model = model_cls._from_config(config)
+    # TODO(hmellor): Remove this once Transformers has fixed tied weights on meta device
+    # https://github.com/huggingface/transformers/issues/43522
+    if getattr(config.get_text_config(), "tie_word_embeddings", False):
+        model.tie_weights()
+    return model
 
 
 def model_architectures_for_test() -> list[str]:

@@ -249,7 +249,8 @@ def pipeline_parallel(self):
         # Layers before module list
         for name in pp_plan[:module_list_idx]:
             if self.pp_group.is_first_rank or (
-                self.text_config.tie_word_embeddings and self.pp_group.is_last_rank
+                getattr(self.text_config, "tie_word_embeddings", False)
+                and self.pp_group.is_last_rank
             ):
                 continue
             setattr(self.model, name, PPMissingLayer())

@@ -38,7 +38,8 @@ def __init__(self, *, vllm_config: "VllmConfig", prefix: str = ""):
 
         # Tell `Base.load_weights` to skip
         # `lm_head` if the model has tied word embeddings
-        if self.text_config.tie_word_embeddings:
+        tie_word_embeddings = getattr(self.text_config, "tie_word_embeddings", False)
+        if tie_word_embeddings:
             self.skip_prefixes.append("lm_head.")
 
         if self.pp_group.is_last_rank:
@@ -48,7 +49,7 @@ def __init__(self, *, vllm_config: "VllmConfig", prefix: str = ""):
                 quant_config=self.quant_config,
                 prefix=maybe_prefix(prefix, "lm_head"),
             )
-            if self.text_config.tie_word_embeddings:
+            if tie_word_embeddings:
                 self.lm_head = self.lm_head.tie_weights(
                     self.model.get_input_embeddings()
                 )