diff --git a/python/sglang/srt/models/qwen3_omni_moe.py b/python/sglang/srt/models/qwen3_omni_moe.py index 8663e5ac5a05..ae5b8332d222 100644 --- a/python/sglang/srt/models/qwen3_omni_moe.py +++ b/python/sglang/srt/models/qwen3_omni_moe.py @@ -31,7 +31,6 @@ ) from sglang.srt.configs.qwen3_vl import Qwen3VLMoeConfig from sglang.srt.layers.attention.vision import VisionAttention -from sglang.srt.layers.layernorm import RMSNorm from sglang.srt.layers.linear import ColumnParallelLinear, RowParallelLinear from sglang.srt.layers.moe.fused_moe_triton.layer import FusedMoE from sglang.srt.layers.quantization.base_config import QuantizationConfig @@ -318,7 +317,7 @@ def __init__( super().__init__() self.hidden_size = context_dim * (spatial_merge_size**2) self.use_postshuffle_norm = use_postshuffle_norm - self.ln_q = RMSNorm( + self.ln_q = nn.LayerNorm( self.hidden_size if use_postshuffle_norm else context_dim, eps=1e-6 ) self.mlp = nn.ModuleList(