diff --git a/python/sglang/srt/models/qwen3_moe.py b/python/sglang/srt/models/qwen3_moe.py index 1373099fdf5a..010a73074759 100644 --- a/python/sglang/srt/models/qwen3_moe.py +++ b/python/sglang/srt/models/qwen3_moe.py @@ -1182,11 +1182,15 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): logger.warning(f"Parameter {name} not found in params_dict") if not hasattr(self, "routed_experts_weights_of_layer"): - self.routed_experts_weights_of_layer = { - layer_id: self.model.layers[layer_id].mlp.get_moe_weights() - for layer_id in range(self.start_layer, self.end_layer) - if isinstance(self.model.layers[layer_id].mlp, Qwen3MoeSparseMoeBlock) - } + self.routed_experts_weights_of_layer = LazyValue( + lambda: { + layer_id: self.model.layers[layer_id].mlp.get_moe_weights() + for layer_id in range(self.start_layer, self.end_layer) + if isinstance( + self.model.layers[layer_id].mlp, Qwen3MoeSparseMoeBlock + ) + } + ) @classmethod def get_model_config_for_expert_location(cls, config):