Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions python/sglang/srt/models/qwen3_vl_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from sglang.srt.eplb.expert_location import ModelConfigForExpertLocation
from sglang.srt.layers.moe.fused_moe_triton.layer import FusedMoE
from sglang.srt.layers.quantization.base_config import QuantizationConfig
from sglang.srt.layers.utils import get_layer_id
from sglang.srt.model_executor.forward_batch_info import ForwardBatch, PPProxyTensors
from sglang.srt.model_loader.weight_utils import default_weight_loader
from sglang.srt.models.qwen3_moe import Qwen3MoeDecoderLayer, Qwen3MoeModel
Expand Down Expand Up @@ -232,6 +233,17 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
params_dict = self._cached_params_dict
for name, loaded_weight in weights:
name = name.replace(r"model.language_model.", r"model.")
layer_id = get_layer_id(name)
if (
"visual" not in name
and layer_id is not None
and hasattr(self.model, "start_layer")
and (
layer_id < self.model.start_layer
or layer_id >= self.model.end_layer
)
):
continue

for param_name, weight_name, shard_id in stacked_params_mapping:
if "experts.gate_up_proj" in name or "experts.down_proj" in name:
Expand Down
Loading