ai-dynamo · ishandhanani · May 25, 2026 · May 18, 2026 · May 18, 2026 · May 21, 2026
@@ -1298,10 +1298,6 @@ def test_system_message(self, tokenizer):
         )
         assert len(with_system.prompt_token_ids) > len(without_system.prompt_token_ids)
 
-    @pytest.mark.skip(
-        reason="DYN-3049: deepseek_v4 dispatch path requires sglang 0.5.12 support; "
-        "Dynamo is pinned to sglang 0.5.11. Unskip after the 0.5.12 bump lands."
-    )
     def test_deepseek_v4_uses_sglang_encoder_when_chat_template_missing(
         self, monkeypatch
     ):
@@ -1358,7 +1354,7 @@ def encode(self, prompt):
             request,
             tokenizer=NoTemplateTokenizer(),
             tool_call_parser_name=None,
-            reasoning_parser_name="deepseek_v4",
+            reasoning_parser_name="deepseek-v4",
         )
 
         assert result.prompt_token_ids == [1, 2, 3]
@@ -1412,7 +1408,7 @@ def encode(self, prompt):
             request,
             tokenizer=NoTemplateTokenizer(),
             tool_call_parser_name=None,
-            reasoning_parser_name="deepseek_v4",
+            reasoning_parser_name="deepseek-v4",
         )
 
         tools = captured["messages"][0]["tools"]

@@ -128,8 +128,8 @@ python container/compliance/process_results.py \
 |-----------|------|------------|
 | `vllm` | 12.9 | `vllm/vllm-openai:v0.21.0-cu129-ubuntu2404` |
 | `vllm` | 13.0 | `vllm/vllm-openai:v0.21.0-ubuntu2404` |
-| `sglang` | 12.9 | `lmsysorg/sglang:v0.5.11-cu129-runtime` |
-| `sglang` | 13.0 | `lmsysorg/sglang:v0.5.11-cu130-runtime` |
+| `sglang` | 12.9 | `lmsysorg/sglang:v0.5.12.post1-cu129-runtime` |
+| `sglang` | 13.0 | `lmsysorg/sglang:v0.5.12.post1-cu130-runtime` |
 | `trtllm` | 13.1 | `nvcr.io/nvidia/cuda-dl-base:25.12-cuda13.1-runtime-ubuntu24.04` |
 | `dynamo` frontend | — | `nvcr.io/nvidia/base/ubuntu:noble-20250619` |
 

@@ -76,12 +76,12 @@ sglang:
     base_image: nvcr.io/nvidia/cuda-dl-base
     runtime_image: lmsysorg/sglang
     base_image_tag: 25.06-cuda12.9-devel-ubuntu24.04
-    runtime_image_tag: v0.5.11-cu129-runtime
+    runtime_image_tag: v0.5.12.post1-cu129-runtime
   cuda13.0:
     base_image: nvcr.io/nvidia/cuda-dl-base
     runtime_image: lmsysorg/sglang
     base_image_tag: 25.11-cuda13.0-devel-ubuntu24.04
-    runtime_image_tag: v0.5.11-cu130-runtime
+    runtime_image_tag: v0.5.12.post1-cu130-runtime
   # Builds the NIXL C++ SDK in wheel_builder so the dev stage can link nixl-sys
   # (see templates/dev.Dockerfile). Runtime stays on the upstream lmsysorg/sglang
   # NIXL Python stack — its wheel COPY is narrowed to ai_dynamo*.whl so the SDK

@@ -59,7 +59,7 @@ RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked \
     pip install --break-system-packages --no-deps "accelerate==1.13.0"
 
 # Install distro: openai>=1.x's _base_client imports it unconditionally, and
-# sglang 0.5.11's server_args eagerly imports sglang.srt.entrypoints.openai.protocol
+# sglang 0.5.12's server_args eagerly imports sglang.srt.entrypoints.openai.protocol
 # which pulls in openai.types.responses → triggers openai pkg init → import distro.
 # The upstream lmsysorg/sglang runtime installs openai with --no-deps so distro is
 # missing; without this any dynamo.sglang worker fails to import at startup.

diff --git a/pyproject.toml b/pyproject.toml
@@ -63,7 +63,9 @@ vllm = [
 
 sglang = [
     "uvloop",
-    "sglang[diffusion]==0.5.11",
+    "sglang[diffusion]==0.5.12.post1",
+    # sglang[diffusion] dropped accelerate in 0.5.12; diffusers still needs it.
+    "accelerate>=0.17.0",
     "nixl[cu12]>=1.0.0,<1.1.0",
     "cupy-cuda12x>=13.0.0",
 ]