vllm-project · laudney · Feb 11, 2026 · Feb 16, 2026 · gemini-code-assist · Feb 16, 2026
@@ -1979,6 +1979,12 @@ def apply(
     ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
         from vllm.model_executor.layers.fused_moe import fused_experts
 
+        # Lazy init: moe_quant_config may not yet be set if
+        # ensure_moe_quant_config_init() hasn't run (e.g. during the first
+        # compiled forward pass with piecewise backends).
+        if self.moe_quant_config is None:
+            self.moe_quant_config = self.get_fused_moe_quant_config(layer)
+
         return fused_experts(
             x,
             layer.w13_weight_packed,

@@ -376,6 +376,12 @@ def apply(
             f"Only SiLU activation is supported, not {layer.activation}."
         )
 
+        # Lazy init: moe_quant_config may not yet be set if
+        # ensure_moe_quant_config_init() hasn't run (e.g. during the first
+        # compiled forward pass with piecewise backends).
+        if self.moe_quant_config is None:
+            self.moe_quant_config = self.get_fused_moe_quant_config(layer)
+
         return fused_experts(
             x,
             layer.w13_qweight,

@@ -341,7 +341,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
             quant_config=self.quant_config,
             prefix=maybe_prefix(prefix, "lm_head"),
         )
-        if self.config.tie_word_embeddings:
+        if getattr(self.config, "tie_word_embeddings", False):
             self.lm_head.weight = self.model.embed_tokens.weight
         self.logits_processor = LogitsProcessor(self.config.vocab_size)
         self.make_empty_intermediate_tensors = (