ai-dynamo
diff --git a/‎.cargo/config.toml‎
Lines changed: 6 additions & 0 deletions b/‎.cargo/config.toml‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎.github/workflows/trigger_ci.yml‎
Lines changed: 3 additions & 1 deletion b/‎.github/workflows/trigger_ci.yml‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎Cargo.lock‎
Lines changed: 60 additions & 0 deletions b/‎Cargo.lock‎
Lines changed: 60 additions & 0 deletions
diff --git a/‎Cargo.toml‎
Lines changed: 0 additions & 12 deletions b/‎Cargo.toml‎
Lines changed: 0 additions & 12 deletions
diff --git a/‎components/backends/trtllm/llama4_plus_eagle.md‎
Lines changed: 0 additions & 1 deletion b/‎components/backends/trtllm/llama4_plus_eagle.md‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎container/Dockerfile.sglang-wideep‎
Lines changed: 17 additions & 11 deletions b/‎container/Dockerfile.sglang-wideep‎
Lines changed: 17 additions & 11 deletions
diff --git a/‎container/Dockerfile.vllm‎
Lines changed: 38 additions & 14 deletions b/‎container/Dockerfile.vllm‎
Lines changed: 38 additions & 14 deletions
diff --git a/‎container/build.sh‎
Lines changed: 2 additions & 0 deletions b/‎container/build.sh‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎deploy/cloud/helm/uninstall.sh‎
Lines changed: 69 additions & 0 deletions b/‎deploy/cloud/helm/uninstall.sh‎
Lines changed: 69 additions & 0 deletions
diff --git a/‎deploy/cloud/operator/Earthfile‎
Lines changed: 1 addition & 1 deletion b/‎deploy/cloud/operator/Earthfile‎
Lines changed: 1 addition & 1 deletion
@@ -0,0 +1,6 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+[build]
+# tokio-console needs this
+rustflags = ["--cfg", "tokio_unstable"]
@@ -52,12 +52,14 @@ jobs:
             - 'examples/python_rs/llm/**'
             - 'container/deps/requirements.vllm.txt'
             - 'container/deps/vllm/**'
+            - 'tests/serve/test_vllm.py'
           trtllm:
             - 'container/Dockerfile.tensorrt_llm'
-            - 'examples/tensorrt_llm/**'
+            - 'components/backends/trtllm/**'
             - 'container/build.sh'
             - 'container/build_trtllm_wheel.sh'
             - 'container/deps/**'
+            - 'tests/serve/test_trtllm.py'
           sdk:
             - 'deploy/**'
     - name: Check if Validation Workflow has run
 
@@ -1,17 +1,5 @@
 # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
 
 [workspace]
 members = [
 
@@ -32,7 +32,6 @@ For advanced control over how requests are routed between prefill and decode wor
 ## Notes
 * To run Eagle Speculative Decoding with Llama 4, ensure the container meets the following criteria:
   * Built with a version of TensorRT-LLM based on the 0.21 release [Link](https://github.com/NVIDIA/TensorRT-LLM/tree/release/0.21)
-  * The TensorRT-LLM build includes the changes from this PR [Link](https://github.com/NVIDIA/TensorRT-LLM/pull/5975)
 * If you need to download model weights off huggingface, make sure you run the command `huggingface-cli login` and have access to the necessary gated models.
 
 
 
@@ -71,20 +71,11 @@ RUN rm -rf /opt/hpcx/ucx && \
 
 ENV LD_LIBRARY_PATH=/usr/lib:/usr/local/ucx/lib:$LD_LIBRARY_PATH
 
-# Pinning to NIXL 0.2.1 right now
-# There is a fix that was merged into SGLang after 0.4.8.post1
-# TODO: Investigate perf hit of that change before we bump to up to date NIXL
-ARG NIXL_COMMIT="5e4c179ee850d482a83cb2a211e0947e46281060"
-RUN git clone https://github.com/ai-dynamo/nixl.git && cd nixl && git checkout ${NIXL_COMMIT} && pip install --break-system-packages . --config-settings=setup-args="-Ducx_path=/usr/local/ucx"
+ARG NIXL_TAG=0.3.1
+RUN git clone https://github.com/ai-dynamo/nixl.git && cd nixl && git checkout ${NIXL_TAG} && pip install --break-system-packages . --config-settings=setup-args="-Ducx_path=/usr/local/ucx"
 
 WORKDIR /sgl-workspace
 
-RUN pip uninstall --break-system-packages -y sglang
-RUN rm -rf sglang
-# Pinning to 0.4.8.post1 for now which solves a TBO issue
-# https://github.com/sgl-project/sglang/issues/7511
-RUN pip install --break-system-packages "sglang==0.4.8.post1"
-
 # Allow forceful shutdown of inflight requests
 ENV SGL_FORCE_SHUTDOWN=1
 
@@ -149,6 +140,21 @@ RUN wget --tries=3 --waitretry=5 https://github.com/etcd-io/etcd/releases/downlo
     rm /tmp/etcd.tar.gz
 ENV PATH=/usr/local/bin/etcd/:$PATH
 
+ARG CMAKE_VERSION=3.31.8
+RUN mkdir /sgl-workspace/cmake_build
+WORKDIR /sgl-workspace/cmake_build
+
+# uninstall CMake
+RUN apt-get purge -y cmake
+# download newer version of CMake
+RUN wget https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-$(uname -m).tar.gz && \
+    tar -xvzf cmake-${CMAKE_VERSION}-linux-$(uname -m).tar.gz && \
+    mv cmake-${CMAKE_VERSION}-linux-$(uname -m) custom_cmake
+ENV PATH=/sgl-workspace/cmake_build/custom_cmake/bin:$PATH
+
+# should be 3.31.8
+RUN cmake --version
+
 # Install perf_analyzer and genai-perf
 RUN apt-get update -y && \
     apt-get install -y --no-install-recommends \
 
@@ -119,12 +119,13 @@ WORKDIR /workspace
 
 ### NIXL SETUP ###
 # Copy nixl source, and use commit hash as cache hint
+# TEMP: disable gds backend for arm64
 COPY --from=nixl_base /opt/nixl /opt/nixl
 COPY --from=nixl_base /opt/nixl/commit.txt /opt/nixl/commit.txt
 RUN if [ "$ARCH" = "arm64" ]; then \
         cd /opt/nixl && \
         mkdir build && \
-        meson setup build/ --buildtype=release --prefix=/usr/local/nixl -Dgds_path=/usr/local/cuda/targets/sbsa-linux && \
+        meson setup build/ --buildtype=release --prefix=/usr/local/nixl -Ddisable_gds_backend=true -Dgds_path=/usr/local/cuda/targets/sbsa-linux && \
         cd build/ && \
         ninja && \
         ninja install; \
@@ -163,8 +164,10 @@ ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
 
 # Install NIXL Python module
 # TODO: Move gds_path selection based on arch into NIXL build
+# TEMP: disable gds backend for arm64
 RUN if [ "$ARCH" = "arm64" ]; then \
         cd /opt/nixl && uv build . --out-dir /workspace/wheels/nixl \
+        --config-settings=setup-args="-Ddisable_gds_backend=true" \
         --config-settings=setup-args="-Dgds_path=/usr/local/cuda/targets/sbsa-linux"; \
     else \
         cd /opt/nixl && uv build . --out-dir /workspace/wheels/nixl; \
@@ -177,22 +180,43 @@ RUN uv pip install /workspace/wheels/nixl/*.whl
 # Install vllm - keep this early in Dockerfile to avoid
 # rebuilds from unrelated source code changes
 ARG VLLM_REF="059d4cd"
+ARG MAX_JOBS=16
+ENV MAX_JOBS=$MAX_JOBS
 ENV CUDA_HOME=/usr/local/cuda
 RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
     --mount=type=cache,target=/root/.cache/uv \
-    uv pip install pip cuda-python && \
-    mkdir /opt/vllm && \
-    cd /opt/vllm && \
-    git clone https://github.com/vllm-project/vllm.git && \
-    cd vllm && \
-    git checkout $VLLM_REF && \
-    VLLM_USE_PRECOMPILED=1 uv pip install -e . && \
-    cd tools/ep_kernels && \
-    bash install_python_libraries.sh && \
-    cd ep_kernels_workspace && \
-    git clone --recursive https://github.com/deepseek-ai/DeepGEMM.git && \
-    cd DeepGEMM && \
-    python setup.py install
+    if [ "$ARCH" = "arm64" ]; then \
+        uv pip install pip cuda-python && \
+        mkdir /opt/vllm && \
+        cd /opt/vllm && \
+        git clone https://github.com/vllm-project/vllm.git && \
+        cd vllm && \
+        git checkout $VLLM_REF && \
+        uv pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu128 && \
+        python use_existing_torch.py && \
+        uv pip install -r requirements/build.txt && \
+        MAX_JOBS=${MAX_JOBS} uv pip install --no-build-isolation -e . -v && \
+        cd tools/ep_kernels && \
+        bash install_python_libraries.sh && \
+        cd ep_kernels_workspace && \
+        git clone --recursive https://github.com/deepseek-ai/DeepGEMM.git && \
+        cd DeepGEMM && \
+        python setup.py install; \
+    else \
+        uv pip install pip cuda-python && \
+        mkdir /opt/vllm && \
+        cd /opt/vllm && \
+        git clone https://github.com/vllm-project/vllm.git && \
+        cd vllm && \
+        git checkout $VLLM_REF && \
+        VLLM_USE_PRECOMPILED=1 uv pip install -e . && \
+        cd tools/ep_kernels && \
+        bash install_python_libraries.sh && \
+        cd ep_kernels_workspace && \
+        git clone --recursive https://github.com/deepseek-ai/DeepGEMM.git && \
+        cd DeepGEMM && \
+        python setup.py install; \
+    fi
 
 # Common dependencies
 RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
 
@@ -389,6 +389,8 @@ ARCH="amd64"
 if [[ "$PLATFORM" == *"linux/arm64"* ]]; then
     ARCH="arm64"
     BUILD_ARGS+=" --build-arg ARCH=arm64 --build-arg ARCH_ALT=aarch64 "
+    # TEMP: Pin to nixl 0.3.1 for arm build, since 0.4.0 fails
+    NIXL_COMMIT=3503658e71143b56f9d5b1b440d84a94b9c41af8
 fi
 
 # Update DOCKERFILE if framework is VLLM
 
@@ -0,0 +1,69 @@
+#!/usr/bin/env bash
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -euo pipefail
+trap 'echo "Error at line $LINENO. Exiting."' ERR
+
+read -p "Are you sure you want to delete ALL Dynamo CRDs and their instances? (y/N): " confirm
+if [[ "$confirm" != "y" ]]; then
+  echo "Aborting."
+  exit 1
+fi
+
+# Step 1: Get all CRDs with the prefix
+DYNAMO_CRDS="$(kubectl get crds -o name | grep 'nvidia.com' | grep 'dynamo' | cut -d'/' -f2)"
+
+if [ -z "${DYNAMO_CRDS}" ]; then
+  echo "Dynamo CRDs not found"
+  exit 1
+fi
+
+# Step 2: Delete all custom resource instances for each CRD
+for CRD in ${DYNAMO_CRDS}; do
+  SCOPE=$(kubectl get crd "${CRD}" -o jsonpath='{.spec.scope}')
+
+  if [ "$SCOPE" == "Namespaced" ]; then
+    echo "Deleting all namespaced instances of ${CRD}..."
+    kubectl get "${CRD}" --all-namespaces -o name | xargs -r kubectl delete --wait=false
+  else
+    echo "Skipping cluster-scoped CRD: ${CRD}"
+  fi
+done
+
+
+# Step 3: Wait for the Operator to handle finalizer removal
+echo "Waiting for Dynamo Operator to handle the finalizer removal (30 seconds)..."
+sleep 30
+
+# Step 4: Verify all Custom Resources have been removed
+for CRD in ${DYNAMO_CRDS}; do
+  # Check CRs
+
+  echo "Checking instances of ${CRD}"
+  kubectl get "${CRD}" --all-namespaces -o name
+done
+
+# Step 5: Delete the CRDs themselves
+echo "Deleting CRDs..."
+
+for CRD in ${DYNAMO_CRDS}; do
+  # Delete all CRD's
+
+  echo "Deleting CRD: ${CRD}..."
+  kubectl delete crd "${CRD}"
+done
+
+
@@ -40,7 +40,7 @@ docker:
     ARG DOCKER_SERVER=my-registry
     ARG IMAGE_TAG=latest
     ARG IMAGE_SUFFIX=dynamo-operator
-    FROM nvcr.io/nvidia/distroless/go:v3.1.9-dev
+    FROM nvcr.io/nvidia/distroless/go:v3.1.10
     WORKDIR /
     COPY +build/manager .
     USER 65532:65532