Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1298,10 +1298,6 @@ def test_system_message(self, tokenizer):
)
assert len(with_system.prompt_token_ids) > len(without_system.prompt_token_ids)

@pytest.mark.skip(
reason="DYN-3049: deepseek_v4 dispatch path requires sglang 0.5.12 support; "
"Dynamo is pinned to sglang 0.5.11. Unskip after the 0.5.12 bump lands."
)
def test_deepseek_v4_uses_sglang_encoder_when_chat_template_missing(
self, monkeypatch
):
Expand Down Expand Up @@ -1358,7 +1354,7 @@ def encode(self, prompt):
request,
tokenizer=NoTemplateTokenizer(),
tool_call_parser_name=None,
reasoning_parser_name="deepseek_v4",
reasoning_parser_name="deepseek-v4",
)

assert result.prompt_token_ids == [1, 2, 3]
Expand Down Expand Up @@ -1412,7 +1408,7 @@ def encode(self, prompt):
request,
tokenizer=NoTemplateTokenizer(),
tool_call_parser_name=None,
reasoning_parser_name="deepseek_v4",
reasoning_parser_name="deepseek-v4",
)

tools = captured["messages"][0]["tools"]
Expand Down
4 changes: 2 additions & 2 deletions container/compliance/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,8 @@ python container/compliance/process_results.py \
|-----------|------|------------|
| `vllm` | 12.9 | `vllm/vllm-openai:v0.21.0-cu129-ubuntu2404` |
| `vllm` | 13.0 | `vllm/vllm-openai:v0.21.0-ubuntu2404` |
| `sglang` | 12.9 | `lmsysorg/sglang:v0.5.11-cu129-runtime` |
| `sglang` | 13.0 | `lmsysorg/sglang:v0.5.11-cu130-runtime` |
| `sglang` | 12.9 | `lmsysorg/sglang:v0.5.12.post1-cu129-runtime` |
| `sglang` | 13.0 | `lmsysorg/sglang:v0.5.12.post1-cu130-runtime` |
| `trtllm` | 13.1 | `nvcr.io/nvidia/cuda-dl-base:25.12-cuda13.1-runtime-ubuntu24.04` |
| `dynamo` frontend | — | `nvcr.io/nvidia/base/ubuntu:noble-20250619` |

Expand Down
4 changes: 2 additions & 2 deletions container/context.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -76,12 +76,12 @@ sglang:
base_image: nvcr.io/nvidia/cuda-dl-base
runtime_image: lmsysorg/sglang
base_image_tag: 25.06-cuda12.9-devel-ubuntu24.04
runtime_image_tag: v0.5.11-cu129-runtime
runtime_image_tag: v0.5.12.post1-cu129-runtime
cuda13.0:
base_image: nvcr.io/nvidia/cuda-dl-base
runtime_image: lmsysorg/sglang
base_image_tag: 25.11-cuda13.0-devel-ubuntu24.04
runtime_image_tag: v0.5.11-cu130-runtime
runtime_image_tag: v0.5.12.post1-cu130-runtime
# Builds the NIXL C++ SDK in wheel_builder so the dev stage can link nixl-sys
# (see templates/dev.Dockerfile). Runtime stays on the upstream lmsysorg/sglang
# NIXL Python stack — its wheel COPY is narrowed to ai_dynamo*.whl so the SDK
Expand Down
2 changes: 1 addition & 1 deletion container/templates/sglang_runtime.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked \
pip install --break-system-packages --no-deps "accelerate==1.13.0"

# Install distro: openai>=1.x's _base_client imports it unconditionally, and
# sglang 0.5.11's server_args eagerly imports sglang.srt.entrypoints.openai.protocol
# sglang 0.5.12's server_args eagerly imports sglang.srt.entrypoints.openai.protocol
# which pulls in openai.types.responses → triggers openai pkg init → import distro.
# The upstream lmsysorg/sglang runtime installs openai with --no-deps so distro is
# missing; without this any dynamo.sglang worker fails to import at startup.
Expand Down
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,9 @@ vllm = [

sglang = [
"uvloop",
"sglang[diffusion]==0.5.11",
"sglang[diffusion]==0.5.12.post1",
# sglang[diffusion] dropped accelerate in 0.5.12; diffusers still needs it.
"accelerate>=0.17.0",
"nixl[cu12]>=1.0.0,<1.1.0",
"cupy-cuda12x>=13.0.0",
]
Expand Down
Loading