sgl-project · Kangyan-Zhou · Apr 16, 2026 · Mar 27, 2026 · Mar 27, 2026 · Mar 27, 2026
@@ -35,6 +35,7 @@ dependencies = [
   "modelscope",
   "msgspec",
   "ninja",
+  "easydict",  # Required by remote model code (e.g. DeepSeek-OCR) loaded via trust_remote_code; validated by transformers 5.4+ check_imports
   "numpy",
   "nvidia-cutlass-dsl>=4.4.1",
   "nvidia-ml-py",
@@ -70,8 +71,8 @@ dependencies = [
   "av ; sys_platform == 'linux' and (platform_machine == 'aarch64' or platform_machine == 'arm64' or platform_machine == 'armv7l')",
   "torchvision",
   "tqdm",
-  "mistral_common>=1.9.0",
-  "transformers==5.3.0",
+  "mistral_common>=1.11.0",
+  "transformers==5.5.4",
   "uvicorn",
   "uvloop",
   "watchfiles",

diff --git a/python/pyproject_cpu.toml b/python/pyproject_cpu.toml
@@ -31,6 +31,7 @@ dependencies = [
   "llguidance>=0.7.11,<0.8.0",
   "modelscope",
   "msgspec",
+  "easydict",
   "ninja",
   "numpy",
   "openai-harmony==0.0.4",
@@ -60,8 +61,8 @@ dependencies = [
   "torchaudio==2.9.0",
   "torchvision==0.24.0",
   "tqdm",
-  "mistral_common>=1.9.0",
-  "transformers==5.3.0",
+  "mistral_common>=1.11.0",
+  "transformers==5.5.4",
   "triton==3.5.0",
   "uvicorn",
   "uvloop",

diff --git a/python/pyproject_npu.toml b/python/pyproject_npu.toml
@@ -25,6 +25,7 @@ dependencies = [
   "datasets",
   "einops",
   "fastapi",
+  "easydict",
   "gguf",
   "hf_transfer",
   "huggingface_hub",
@@ -57,8 +58,8 @@ dependencies = [
   "timm==1.0.16",
   "torchao==0.9.0",
   "tqdm",
-  "mistral_common>=1.9.0",
-  "transformers==5.3.0",
+  "mistral_common>=1.11.0",
+  "transformers==5.5.4",
   "uvicorn",
   "uvloop",
   "xgrammar==0.1.32",

diff --git a/python/pyproject_other.toml b/python/pyproject_other.toml
@@ -25,6 +25,7 @@ runtime_common = [
   "build",
   "compressed-tensors",
   "datasets",
+  "easydict",
   "einops",
   "fastapi",
   "gguf",
@@ -57,8 +58,8 @@ runtime_common = [
   "timm==1.0.16",
   "torchao==0.9.0",
   "tqdm",
-  "mistral_common>=1.9.0",
-  "transformers==5.3.0",
+  "mistral_common>=1.11.0",
+  "transformers==5.5.4",
   "uvicorn",
   "uvloop",
   "xgrammar==0.1.32",

diff --git a/python/pyproject_xpu.toml b/python/pyproject_xpu.toml
@@ -27,7 +27,9 @@ dependencies = [
   "blobfile==3.0.0",
   "build",
   "compressed-tensors",
+  "addict",
   "datasets",
+  "easydict",
   "einops",
   "fastapi",
   "gguf",
@@ -60,8 +62,8 @@ dependencies = [
   "timm==1.0.16",
   "torchao==0.9.0+xpu",
   "tqdm",
-  "mistral_common>=1.9.0",
-  "transformers==5.3.0",
+  "mistral_common>=1.11.0",
+  "transformers==5.5.4",
   "uvicorn",
   "uvloop",
   # "xgrammar==0.1.24", , xgrammar depends on CUDA PyTorch and Triton only

diff --git a/python/sglang/srt/configs/qwen3_5.py b/python/sglang/srt/configs/qwen3_5.py
@@ -8,6 +8,9 @@ class Qwen3_5VisionConfig(Qwen3VLVisionConfig):
     model_type = "qwen3_5"
     base_config_key = "vision_config"
 
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+
 
 class Qwen3_5TextConfig(Qwen3NextConfig):
     model_type = "qwen3_5_text"
@@ -109,14 +112,27 @@ def __init__(
 class Qwen3_5MoeVisionConfig(Qwen3_5VisionConfig):
     model_type = "qwen3_5_moe"
 
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+
 
 class Qwen3_5MoeTextConfig(Qwen3_5TextConfig):
     model_type = "qwen3_5_moe_text"
 
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+
 
+# All Moe variant classes need explicit __init__ because the kw_only=True
+# dataclass decorator in transformers v5.5.3+ auto-generates __init__ for
+# subclasses, bypassing parent __init__ methods that set up attributes
+# (e.g. norm_topk_prob, rope_scaling) and convert sub-config dicts to objects.
 class Qwen3_5MoeConfig(Qwen3_5Config):
     model_type = "qwen3_5_moe"
     sub_configs = {
         "vision_config": Qwen3_5MoeVisionConfig,
         "text_config": Qwen3_5MoeTextConfig,
     }
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
diff --git a/python/sglang/srt/configs/step3p5.py b/python/sglang/srt/configs/step3p5.py
@@ -94,4 +94,13 @@ def __init__(
         self.moe_layers_enum = moe_layers_enum
         self.layer_types = layer_types
         self.sliding_window = sliding_window
+        # The upstream Step-3.5-Flash config has layer_types with 48 entries
+        # but num_hidden_layers=45. The extra 3 are for MTP/nextn predict
+        # layers (indices 45-47) used by Step3p5DecoderLayer during EAGLE
+        # speculative decoding. Temporarily align num_hidden_layers to pass
+        # the transformers v5.5.3+ validator, then restore the real value.
+        real_num_hidden_layers = self.num_hidden_layers
+        if layer_types is not None and len(layer_types) != self.num_hidden_layers:
+            self.num_hidden_layers = len(layer_types)
         super().__init__(**kwargs)
+        self.num_hidden_layers = real_num_hidden_layers
diff --git a/python/sglang/srt/models/qwen3_vl.py b/python/sglang/srt/models/qwen3_vl.py
@@ -1091,9 +1091,15 @@ def __init__(
         if language_model_cls is Qwen3LLMModel:
             self.config: Qwen3VLConfig = config  # for qwen3-vl
         else:
-            self.config = config.text_config  # for qwen3-omni
+            self.config = config.text_config  # for qwen3-omni / qwen3-vl-moe
             self.config.encoder_only = getattr(config, "encoder_only", False)
             self.config.language_only = getattr(config, "language_only", False)
+            # Propagate tie_word_embeddings from parent config. In transformers
+            # v5.5.3+, Qwen3VLMoeTextConfig sets tie_word_embeddings=True by
+            # default but the actual model checkpoint has a separate lm_head.
+            # The parent Qwen3VLMoeConfig correctly has tie_word_embeddings=False.
+            if hasattr(config, "tie_word_embeddings"):
+                self.config.tie_word_embeddings = config.tie_word_embeddings
 
         if not hasattr(config, "encoder_only") or not config.encoder_only:
             self.model = language_model_cls(

diff --git a/python/sglang/srt/utils/hf_transformers/__init__.py b/python/sglang/srt/utils/hf_transformers/__init__.py
@@ -0,0 +1,67 @@
+# Copyright 2023-2024 SGLang Team
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Hugging Face Transformers utilities.
+
+This package provides HF Transformers helpers, split into submodules
+(common, compat, config, tokenizer, processor, mistral_utils).
+All public symbols are re-exported here for convenience.  The old import
+path ``sglang.srt.utils.hf_transformers_utils`` is preserved by a
+separate shim module.
+"""
+
+from .compat import apply_all as _apply_compat
+
+_apply_compat()
+
+from .common import (  # noqa: E402
+    CONTEXT_LENGTH_KEYS,
+    AutoConfig,
+    attach_additional_stop_token_ids,
+    check_gguf_file,
+    download_from_hf,
+    get_context_length,
+    get_generation_config,
+    get_hf_text_config,
+    get_rope_config,
+    get_sparse_attention_config,
+    get_tokenizer_from_processor,
+)
+from .compat import normalize_rope_scaling_compat  # noqa: E402
+from .config import get_config  # noqa: E402
+from .processor import get_processor  # noqa: E402
+from .tokenizer import (  # noqa: E402
+    _fix_added_tokens_encoding,
+    _fix_v5_add_bos_eos_token,
+    get_tokenizer,
+)
+
+__all__ = [
+    "AutoConfig",
+    "CONTEXT_LENGTH_KEYS",
+    "_fix_added_tokens_encoding",
+    "_fix_v5_add_bos_eos_token",
+    "attach_additional_stop_token_ids",
+    "check_gguf_file",
+    "download_from_hf",
+    "get_config",
+    "get_context_length",
+    "get_generation_config",
+    "get_hf_text_config",
+    "get_processor",
+    "get_rope_config",
+    "get_sparse_attention_config",
+    "get_tokenizer",
+    "get_tokenizer_from_processor",
+    "normalize_rope_scaling_compat",
+]