From f873a80841464dec2994e136f203fdf6c9f66f30 Mon Sep 17 00:00:00 2001 From: narutolhy <582909902@qq.com> Date: Sat, 21 Feb 2026 16:42:00 -0800 Subject: [PATCH 1/2] qwen3 vl skip layer id for pp --- python/sglang/srt/models/qwen3_vl_moe.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/python/sglang/srt/models/qwen3_vl_moe.py b/python/sglang/srt/models/qwen3_vl_moe.py index f98460665d37..c1544e9b34f2 100644 --- a/python/sglang/srt/models/qwen3_vl_moe.py +++ b/python/sglang/srt/models/qwen3_vl_moe.py @@ -26,6 +26,7 @@ from sglang.srt.eplb.expert_location import ModelConfigForExpertLocation from sglang.srt.layers.moe.fused_moe_triton.layer import FusedMoE from sglang.srt.layers.quantization.base_config import QuantizationConfig +from sglang.srt.layers.utils import get_layer_id from sglang.srt.model_executor.forward_batch_info import ForwardBatch, PPProxyTensors from sglang.srt.model_loader.weight_utils import default_weight_loader from sglang.srt.models.qwen3_moe import Qwen3MoeDecoderLayer, Qwen3MoeModel @@ -232,6 +233,16 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): params_dict = self._cached_params_dict for name, loaded_weight in weights: name = name.replace(r"model.language_model.", r"model.") + layer_id = get_layer_id(name) + if ( + layer_id is not None + and hasattr(self.model, "start_layer") + and ( + layer_id < self.model.start_layer + or layer_id >= self.model.end_layer + ) + ): + continue for param_name, weight_name, shard_id in stacked_params_mapping: if "experts.gate_up_proj" in name or "experts.down_proj" in name: From 27311a40cb1f72953a8780869d7b9f467a8cd317 Mon Sep 17 00:00:00 2001 From: narutolhy <582909902@qq.com> Date: Sat, 21 Feb 2026 19:34:16 -0800 Subject: [PATCH 2/2] fix --- python/sglang/srt/models/qwen3_vl_moe.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/sglang/srt/models/qwen3_vl_moe.py b/python/sglang/srt/models/qwen3_vl_moe.py index c1544e9b34f2..88f63dbcad72 100644 --- a/python/sglang/srt/models/qwen3_vl_moe.py +++ b/python/sglang/srt/models/qwen3_vl_moe.py @@ -235,7 +235,8 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): name = name.replace(r"model.language_model.", r"model.") layer_id = get_layer_id(name) if ( - layer_id is not None + "visual" not in name + and layer_id is not None and hasattr(self.model, "start_layer") and ( layer_id < self.model.start_layer