chore: update dynamo and nixl versions for 0.3.1 (#1517)

nv-anants · web-flow · commit 99e67e607a02 · 2025-06-13T21:13:43.000Z
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -28,7 +28,7 @@ members = [
 resolver = "3"
 
 [workspace.package]
-version = "0.3.0"
+version = "0.3.1"
 edition = "2021"
 description = "Dynamo Inference Framework"
 authors = ["NVIDIA Inc. <sw-dl-dynamo@nvidia.com>"]
@@ -39,9 +39,9 @@ keywords = ["llm", "genai", "inference", "nvidia", "distributed", "dynamo"]
 
 [workspace.dependencies]
 # Local crates
-dynamo-runtime = { path = "lib/runtime", version = "0.3.0" }
-dynamo-llm = { path = "lib/llm", version = "0.3.0" }
-dynamo-tokens = { path = "lib/tokens", version = "0.3.0" }
+dynamo-runtime = { path = "lib/runtime", version = "0.3.1" }
+dynamo-llm = { path = "lib/llm", version = "0.3.1" }
+dynamo-tokens = { path = "lib/tokens", version = "0.3.1" }
 
 # External dependencies
 anyhow = { version = "1" }
diff --git a/container/Dockerfile.vllm b/container/Dockerfile.vllm
@@ -178,7 +178,7 @@ RUN uv pip install /workspace/wheels/nixl/*.whl
 ARG VLLM_REF="0.8.4"
 ARG VLLM_PATCH="vllm_v${VLLM_REF}-dynamo-kv-disagg-patch.patch"
 ARG VLLM_PATCHED_PACKAGE_NAME="ai_dynamo_vllm"
-ARG VLLM_PATCHED_PACKAGE_VERSION="0.8.4.post2"
+ARG VLLM_PATCHED_PACKAGE_VERSION="0.8.4.post3"
 ARG VLLM_MAX_JOBS=4
 RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
     --mount=type=cache,target=/root/.cache/uv \
diff --git a/container/build.sh b/container/build.sh
@@ -109,7 +109,7 @@ NONE_BASE_IMAGE_TAG="24.04"
 SGLANG_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
 SGLANG_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
 
-NIXL_COMMIT=f531404be4866d85ed618b3baf4008c636798d63
+NIXL_COMMIT=16348080f5bdeb9fe6058a23be140cec020ef3f3
 NIXL_REPO=ai-dynamo/nixl.git
 
 NIXL_UCX_EFA_REF=7ec95b95e524a87e81cac92f5ca8523e3966b16b
diff --git a/docs/support_matrix.md b/docs/support_matrix.md
@@ -68,24 +68,24 @@ If you are using a **GPU**, the following GPU models and architectures are suppo
 
 ### Runtime Dependency
 
-| **Python Package** | **Version**   | glibc version        | CUDA Version |
-| :----------------- | :------------ | :------------------- | :----------- |
-| ai-dynamo          | 0.3.0         | >=2.28               |              |
-| ai-dynamo-runtime  | 0.3.0         | >=2.28               |              |
-| ai-dynamo-vllm     | 0.8.4.post2¹  | >=2.28 (recommended) |              |
-| NIXL               | 0.3.0         | >=2.27               | >=11.8       |
+| **Python Package** | **Version**   | glibc version                        | CUDA Version |
+| :----------------- | :------------ | :----------------------------------- | :----------- |
+| ai-dynamo          | 0.3.1         | >=2.28                               |              |
+| ai-dynamo-runtime  | 0.3.1         | >=2.28 (Python 3.12 has known issues)|              |
+| ai-dynamo-vllm     | 0.8.4.post3¹  | >=2.28 (recommended)                 |              |
+| NIXL               | 0.3.1         | >=2.27                               | >=11.8       |
 
 ### Build Dependency
 
 | **Build Dependency** | **Version**                                                                      |
 | :------------------- | :------------------------------------------------------------------------------- |
 | **Base Container**   | [25.03](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/cuda-dl-base/tags) |
-| **ai-dynamo-vllm**   | 0.8.4.post2¹                                                                     |
-| **TensorRT-LLM**     | 0.19.0²                                                                          |
-| **NIXL**             | 0.3.0                                                                            |
+| **ai-dynamo-vllm**   | 0.8.4.post3¹                                                                     |
+| **TensorRT-LLM**     | 0.21.0rc²                                                                        |
+| **NIXL**             | 0.3.1                                                                            |
 
 > [!Important]
-> ¹ ai-dynamo-vllm `v0.8.4.post2` is a customized patch of `v0.8.4` from vLLM.
+> ¹ ai-dynamo-vllm `v0.8.4.post3` is a customized patch of `v0.8.4` from vLLM.
 >
 > ² Specific versions of TensorRT-LLM supported by Dynamo are subject to change.
 
diff --git a/lib/bindings/python/Cargo.lock b/lib/bindings/python/Cargo.lock
diff --git a/lib/bindings/python/Cargo.toml b/lib/bindings/python/Cargo.toml
@@ -19,7 +19,7 @@
 
 [package]
 name = "dynamo-py3"
-version = "0.3.0"
+version = "0.3.1"
 edition = "2021"
 authors = ["NVIDIA"]
 license = "Apache-2.0"
diff --git a/lib/runtime/examples/Cargo.lock b/lib/runtime/examples/Cargo.lock
diff --git a/lib/runtime/examples/Cargo.toml b/lib/runtime/examples/Cargo.toml
@@ -21,7 +21,7 @@ members = [
 resolver = "3"
 
 [workspace.package]
-version = "0.3.0"
+version = "0.3.1"
 edition = "2021"
 authors = ["NVIDIA"]
 license = "Apache-2.0"
diff --git a/pyproject.toml b/pyproject.toml
@@ -15,7 +15,7 @@
 
 [project]
 name = "ai-dynamo"
-version = "0.3.0"
+version = "0.3.1"
 description = "Distributed Inference Framework"
 readme = "README.md"
 authors = [
@@ -28,7 +28,7 @@ dependencies = [
     "pytest>=8.3.4",
     "types-psutil==7.0.0.20250218",
     "kubernetes==32.0.1",
-    "ai-dynamo-runtime==0.3.0",
+    "ai-dynamo-runtime==0.3.1",
     "fastapi==0.115.6",
     "distro",
     # filelock: required by planner