Skip to content

Commit 919ac39

Browse files
committed
merge main
2 parents da05ca9 + f6f392c commit 919ac39

File tree

19 files changed

+324
-131
lines changed

19 files changed

+324
-131
lines changed

.cargo/config.toml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
[build]
5+
# tokio-console needs this
6+
rustflags = ["--cfg", "tokio_unstable"]

.github/workflows/trigger_ci.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,12 +52,14 @@ jobs:
5252
- 'examples/python_rs/llm/**'
5353
- 'container/deps/requirements.vllm.txt'
5454
- 'container/deps/vllm/**'
55+
- 'tests/serve/test_vllm.py'
5556
trtllm:
5657
- 'container/Dockerfile.tensorrt_llm'
57-
- 'examples/tensorrt_llm/**'
58+
- 'components/backends/trtllm/**'
5859
- 'container/build.sh'
5960
- 'container/build_trtllm_wheel.sh'
6061
- 'container/deps/**'
62+
- 'tests/serve/test_trtllm.py'
6163
sdk:
6264
- 'deploy/**'
6365
- name: Check if Validation Workflow has run

Cargo.lock

Lines changed: 60 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,5 @@
11
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
# SPDX-License-Identifier: Apache-2.0
3-
#
4-
# Licensed under the Apache License, Version 2.0 (the "License");
5-
# you may not use this file except in compliance with the License.
6-
# You may obtain a copy of the License at
7-
#
8-
# http://www.apache.org/licenses/LICENSE-2.0
9-
#
10-
# Unless required by applicable law or agreed to in writing, software
11-
# distributed under the License is distributed on an "AS IS" BASIS,
12-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13-
# See the License for the specific language governing permissions and
14-
# limitations under the License.
153

164
[workspace]
175
members = [

components/backends/trtllm/llama4_plus_eagle.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ For advanced control over how requests are routed between prefill and decode wor
3232
## Notes
3333
* To run Eagle Speculative Decoding with Llama 4, ensure the container meets the following criteria:
3434
* Built with a version of TensorRT-LLM based on the 0.21 release [Link](https://github.com/NVIDIA/TensorRT-LLM/tree/release/0.21)
35-
* The TensorRT-LLM build includes the changes from this PR [Link](https://github.com/NVIDIA/TensorRT-LLM/pull/5975)
3635
* If you need to download model weights off huggingface, make sure you run the command `huggingface-cli login` and have access to the necessary gated models.
3736

3837

container/Dockerfile.sglang-wideep

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -71,20 +71,11 @@ RUN rm -rf /opt/hpcx/ucx && \
7171

7272
ENV LD_LIBRARY_PATH=/usr/lib:/usr/local/ucx/lib:$LD_LIBRARY_PATH
7373

74-
# Pinning to NIXL 0.2.1 right now
75-
# There is a fix that was merged into SGLang after 0.4.8.post1
76-
# TODO: Investigate perf hit of that change before we bump to up to date NIXL
77-
ARG NIXL_COMMIT="5e4c179ee850d482a83cb2a211e0947e46281060"
78-
RUN git clone https://github.com/ai-dynamo/nixl.git && cd nixl && git checkout ${NIXL_COMMIT} && pip install --break-system-packages . --config-settings=setup-args="-Ducx_path=/usr/local/ucx"
74+
ARG NIXL_TAG=0.3.1
75+
RUN git clone https://github.com/ai-dynamo/nixl.git && cd nixl && git checkout ${NIXL_TAG} && pip install --break-system-packages . --config-settings=setup-args="-Ducx_path=/usr/local/ucx"
7976

8077
WORKDIR /sgl-workspace
8178

82-
RUN pip uninstall --break-system-packages -y sglang
83-
RUN rm -rf sglang
84-
# Pinning to 0.4.8.post1 for now which solves a TBO issue
85-
# https://github.com/sgl-project/sglang/issues/7511
86-
RUN pip install --break-system-packages "sglang==0.4.8.post1"
87-
8879
# Allow forceful shutdown of inflight requests
8980
ENV SGL_FORCE_SHUTDOWN=1
9081

@@ -149,6 +140,21 @@ RUN wget --tries=3 --waitretry=5 https://github.com/etcd-io/etcd/releases/downlo
149140
rm /tmp/etcd.tar.gz
150141
ENV PATH=/usr/local/bin/etcd/:$PATH
151142

143+
ARG CMAKE_VERSION=3.31.8
144+
RUN mkdir /sgl-workspace/cmake_build
145+
WORKDIR /sgl-workspace/cmake_build
146+
147+
# uninstall CMake
148+
RUN apt-get purge -y cmake
149+
# download newer version of CMake
150+
RUN wget https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-$(uname -m).tar.gz && \
151+
tar -xvzf cmake-${CMAKE_VERSION}-linux-$(uname -m).tar.gz && \
152+
mv cmake-${CMAKE_VERSION}-linux-$(uname -m) custom_cmake
153+
ENV PATH=/sgl-workspace/cmake_build/custom_cmake/bin:$PATH
154+
155+
# should be 3.31.8
156+
RUN cmake --version
157+
152158
# Install perf_analyzer and genai-perf
153159
RUN apt-get update -y && \
154160
apt-get install -y --no-install-recommends \

container/Dockerfile.vllm

Lines changed: 38 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -119,12 +119,13 @@ WORKDIR /workspace
119119

120120
### NIXL SETUP ###
121121
# Copy nixl source, and use commit hash as cache hint
122+
# TEMP: disable gds backend for arm64
122123
COPY --from=nixl_base /opt/nixl /opt/nixl
123124
COPY --from=nixl_base /opt/nixl/commit.txt /opt/nixl/commit.txt
124125
RUN if [ "$ARCH" = "arm64" ]; then \
125126
cd /opt/nixl && \
126127
mkdir build && \
127-
meson setup build/ --buildtype=release --prefix=/usr/local/nixl -Dgds_path=/usr/local/cuda/targets/sbsa-linux && \
128+
meson setup build/ --buildtype=release --prefix=/usr/local/nixl -Ddisable_gds_backend=true -Dgds_path=/usr/local/cuda/targets/sbsa-linux && \
128129
cd build/ && \
129130
ninja && \
130131
ninja install; \
@@ -163,8 +164,10 @@ ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
163164

164165
# Install NIXL Python module
165166
# TODO: Move gds_path selection based on arch into NIXL build
167+
# TEMP: disable gds backend for arm64
166168
RUN if [ "$ARCH" = "arm64" ]; then \
167169
cd /opt/nixl && uv build . --out-dir /workspace/wheels/nixl \
170+
--config-settings=setup-args="-Ddisable_gds_backend=true" \
168171
--config-settings=setup-args="-Dgds_path=/usr/local/cuda/targets/sbsa-linux"; \
169172
else \
170173
cd /opt/nixl && uv build . --out-dir /workspace/wheels/nixl; \
@@ -177,22 +180,43 @@ RUN uv pip install /workspace/wheels/nixl/*.whl
177180
# Install vllm - keep this early in Dockerfile to avoid
178181
# rebuilds from unrelated source code changes
179182
ARG VLLM_REF="059d4cd"
183+
ARG MAX_JOBS=16
184+
ENV MAX_JOBS=$MAX_JOBS
180185
ENV CUDA_HOME=/usr/local/cuda
181186
RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
182187
--mount=type=cache,target=/root/.cache/uv \
183-
uv pip install pip cuda-python && \
184-
mkdir /opt/vllm && \
185-
cd /opt/vllm && \
186-
git clone https://github.com/vllm-project/vllm.git && \
187-
cd vllm && \
188-
git checkout $VLLM_REF && \
189-
VLLM_USE_PRECOMPILED=1 uv pip install -e . && \
190-
cd tools/ep_kernels && \
191-
bash install_python_libraries.sh && \
192-
cd ep_kernels_workspace && \
193-
git clone --recursive https://github.com/deepseek-ai/DeepGEMM.git && \
194-
cd DeepGEMM && \
195-
python setup.py install
188+
if [ "$ARCH" = "arm64" ]; then \
189+
uv pip install pip cuda-python && \
190+
mkdir /opt/vllm && \
191+
cd /opt/vllm && \
192+
git clone https://github.com/vllm-project/vllm.git && \
193+
cd vllm && \
194+
git checkout $VLLM_REF && \
195+
uv pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu128 && \
196+
python use_existing_torch.py && \
197+
uv pip install -r requirements/build.txt && \
198+
MAX_JOBS=${MAX_JOBS} uv pip install --no-build-isolation -e . -v && \
199+
cd tools/ep_kernels && \
200+
bash install_python_libraries.sh && \
201+
cd ep_kernels_workspace && \
202+
git clone --recursive https://github.com/deepseek-ai/DeepGEMM.git && \
203+
cd DeepGEMM && \
204+
python setup.py install; \
205+
else \
206+
uv pip install pip cuda-python && \
207+
mkdir /opt/vllm && \
208+
cd /opt/vllm && \
209+
git clone https://github.com/vllm-project/vllm.git && \
210+
cd vllm && \
211+
git checkout $VLLM_REF && \
212+
VLLM_USE_PRECOMPILED=1 uv pip install -e . && \
213+
cd tools/ep_kernels && \
214+
bash install_python_libraries.sh && \
215+
cd ep_kernels_workspace && \
216+
git clone --recursive https://github.com/deepseek-ai/DeepGEMM.git && \
217+
cd DeepGEMM && \
218+
python setup.py install; \
219+
fi
196220

197221
# Common dependencies
198222
RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \

container/build.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,8 @@ ARCH="amd64"
389389
if [[ "$PLATFORM" == *"linux/arm64"* ]]; then
390390
ARCH="arm64"
391391
BUILD_ARGS+=" --build-arg ARCH=arm64 --build-arg ARCH_ALT=aarch64 "
392+
# TEMP: Pin to nixl 0.3.1 for arm build, since 0.4.0 fails
393+
NIXL_COMMIT=3503658e71143b56f9d5b1b440d84a94b9c41af8
392394
fi
393395

394396
# Update DOCKERFILE if framework is VLLM

deploy/cloud/helm/uninstall.sh

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
#!/usr/bin/env bash
2+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3+
# SPDX-License-Identifier: Apache-2.0
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
set -euo pipefail
18+
trap 'echo "Error at line $LINENO. Exiting."' ERR
19+
20+
read -p "Are you sure you want to delete ALL Dynamo CRDs and their instances? (y/N): " confirm
21+
if [[ "$confirm" != "y" ]]; then
22+
echo "Aborting."
23+
exit 1
24+
fi
25+
26+
# Step 1: Get all CRDs with the prefix
27+
DYNAMO_CRDS="$(kubectl get crds -o name | grep 'nvidia.com' | grep 'dynamo' | cut -d'/' -f2)"
28+
29+
if [ -z "${DYNAMO_CRDS}" ]; then
30+
echo "Dynamo CRDs not found"
31+
exit 1
32+
fi
33+
34+
# Step 2: Delete all custom resource instances for each CRD
35+
for CRD in ${DYNAMO_CRDS}; do
36+
SCOPE=$(kubectl get crd "${CRD}" -o jsonpath='{.spec.scope}')
37+
38+
if [ "$SCOPE" == "Namespaced" ]; then
39+
echo "Deleting all namespaced instances of ${CRD}..."
40+
kubectl get "${CRD}" --all-namespaces -o name | xargs -r kubectl delete --wait=false
41+
else
42+
echo "Skipping cluster-scoped CRD: ${CRD}"
43+
fi
44+
done
45+
46+
47+
# Step 3: Wait for the Operator to handle finalizer removal
48+
echo "Waiting for Dynamo Operator to handle the finalizer removal (30 seconds)..."
49+
sleep 30
50+
51+
# Step 4: Verify all Custom Resources have been removed
52+
for CRD in ${DYNAMO_CRDS}; do
53+
# Check CRs
54+
55+
echo "Checking instances of ${CRD}"
56+
kubectl get "${CRD}" --all-namespaces -o name
57+
done
58+
59+
# Step 5: Delete the CRDs themselves
60+
echo "Deleting CRDs..."
61+
62+
for CRD in ${DYNAMO_CRDS}; do
63+
# Delete all CRD's
64+
65+
echo "Deleting CRD: ${CRD}..."
66+
kubectl delete crd "${CRD}"
67+
done
68+
69+

deploy/cloud/operator/Earthfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ docker:
4040
ARG DOCKER_SERVER=my-registry
4141
ARG IMAGE_TAG=latest
4242
ARG IMAGE_SUFFIX=dynamo-operator
43-
FROM nvcr.io/nvidia/distroless/go:v3.1.9-dev
43+
FROM nvcr.io/nvidia/distroless/go:v3.1.10
4444
WORKDIR /
4545
COPY +build/manager .
4646
USER 65532:65532

0 commit comments

Comments
 (0)