diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py index 0fecc7bbcc85..8ff98cd8f104 100644 --- a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py +++ b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py @@ -1979,6 +1979,12 @@ def apply( ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]: from vllm.model_executor.layers.fused_moe import fused_experts + # Lazy init: moe_quant_config may not yet be set if + # ensure_moe_quant_config_init() hasn't run (e.g. during the first + # compiled forward pass with piecewise backends). + if self.moe_quant_config is None: + self.moe_quant_config = self.get_fused_moe_quant_config(layer) + return fused_experts( x, layer.w13_weight_packed, diff --git a/vllm/model_executor/layers/quantization/moe_wna16.py b/vllm/model_executor/layers/quantization/moe_wna16.py index f5c679840432..3c6bf6171d71 100644 --- a/vllm/model_executor/layers/quantization/moe_wna16.py +++ b/vllm/model_executor/layers/quantization/moe_wna16.py @@ -376,6 +376,12 @@ def apply( f"Only SiLU activation is supported, not {layer.activation}." ) + # Lazy init: moe_quant_config may not yet be set if + # ensure_moe_quant_config_init() hasn't run (e.g. during the first + # compiled forward pass with piecewise backends). + if self.moe_quant_config is None: + self.moe_quant_config = self.get_fused_moe_quant_config(layer) + return fused_experts( x, layer.w13_qweight, diff --git a/vllm/model_executor/models/qwen3_vl_moe.py b/vllm/model_executor/models/qwen3_vl_moe.py index 80815616bb7d..7abc0e8ba34d 100644 --- a/vllm/model_executor/models/qwen3_vl_moe.py +++ b/vllm/model_executor/models/qwen3_vl_moe.py @@ -341,7 +341,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): quant_config=self.quant_config, prefix=maybe_prefix(prefix, "lm_head"), ) - if self.config.tie_word_embeddings: + if getattr(self.config, "tie_word_embeddings", False): self.lm_head.weight = self.model.embed_tokens.weight self.logits_processor = LogitsProcessor(self.config.vocab_size) self.make_empty_intermediate_tensors = (