Skip to content

Commit dc75cf1

Browse files
chore: Move NIXL repo clone to Dockerfiles (#2009)
1 parent f6f392c commit dc75cf1

File tree

4 files changed

+32
-86
lines changed

4 files changed

+32
-86
lines changed

container/Dockerfile.sglang

Lines changed: 5 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -29,20 +29,6 @@ ARG ARCH_ALT=x86_64
2929
ARG SGLANG_VERSION="0.4.9.post1"
3030
ARG SGL_KERNEL_VERSION="0.2.4"
3131

32-
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS nixl_base
33-
34-
# Redeclare ARCH and ARCH_ALT so they're available in this stage
35-
ARG ARCH
36-
ARG ARCH_ALT
37-
38-
WORKDIR /opt/nixl
39-
# Add a cache hint that only changes when the nixl commit changes
40-
ARG NIXL_COMMIT
41-
# This line acts as a cache key - it only changes when NIXL_COMMIT changes
42-
RUN echo "NIXL commit: ${NIXL_COMMIT}" > /opt/nixl/commit.txt
43-
# Copy the nixl source
44-
COPY --from=nixl . .
45-
4632
##################################
4733
########## Base Image ############
4834
##################################
@@ -52,6 +38,7 @@ FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS base
5238
# Redeclare ARCH and ARCH_ALT so they're available in this stage
5339
ARG ARCH
5440
ARG ARCH_ALT
41+
ARG NIXL_REF=3c47a48955e6f96bd5d4fb43a9d80bb64722f8e4
5542

5643
USER root
5744
ARG PYTHON_VERSION=3.12
@@ -111,9 +98,10 @@ SHELL ["/bin/bash", "-c"]
11198
WORKDIR /workspace
11299

113100
### NIXL SETUP ###
114-
# Copy nixl source, and use commit hash as cache hint
115-
COPY --from=nixl_base /opt/nixl /opt/nixl
116-
COPY --from=nixl_base /opt/nixl/commit.txt /opt/nixl/commit.txt
101+
# Clone nixl source, and checkout the nixl ref
102+
RUN git clone "https://github.com/ai-dynamo/nixl.git" /opt/nixl && \
103+
cd /opt/nixl && \
104+
git checkout ${NIXL_REF}
117105
RUN if [ "$ARCH" = "arm64" ]; then \
118106
cd /opt/nixl && \
119107
mkdir build && \

container/Dockerfile.tensorrt_llm

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -34,16 +34,6 @@ ARG RUNTIME_IMAGE_TAG="12.9.0-runtime-ubuntu24.04"
3434
ARG ARCH=amd64
3535
ARG ARCH_ALT=x86_64
3636

37-
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS nixl_base
38-
39-
WORKDIR /opt/nixl
40-
# Add a cache hint that only changes when the nixl commit changes
41-
ARG NIXL_COMMIT
42-
# This line acts as a cache key - it only changes when NIXL_COMMIT changes
43-
RUN echo "NIXL commit: ${NIXL_COMMIT}" > /opt/nixl/commit.txt
44-
# Copy the nixl source
45-
COPY --from=nixl . .
46-
4737
##################################
4838
########## Build Image ###########
4939
##################################
@@ -53,6 +43,7 @@ FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS build
5343
# Redeclare ARCH and ARCH_ALT so they're available in this build stage
5444
ARG ARCH
5545
ARG ARCH_ALT
46+
ARG NIXL_REF=3c47a48955e6f96bd5d4fb43a9d80bb64722f8e4
5647

5748
USER root
5849

@@ -101,9 +92,10 @@ ENV PKG_CONFIG_PATH=/usr/lib/pkgconfig:$PKG_CONFIG_PATH
10192
SHELL ["/bin/bash", "-c"]
10293

10394
# NIXL SETUP
104-
# Copy nixl source, and use commit hash as cache hint
105-
COPY --from=nixl_base /opt/nixl /opt/nixl
106-
COPY --from=nixl_base /opt/nixl/commit.txt /opt/nixl/commit.txt
95+
# Clone nixl source, and checkout the nixl ref
96+
RUN git clone "https://github.com/ai-dynamo/nixl.git" /opt/nixl && \
97+
cd /opt/nixl && \
98+
git checkout ${NIXL_REF}
10799
RUN if [ "$ARCH" = "arm64" ]; then \
108100
cd /opt/nixl && \
109101
mkdir build && \

container/Dockerfile.vllm

Lines changed: 17 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -26,20 +26,6 @@ ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
2626
ARG ARCH=amd64
2727
ARG ARCH_ALT=x86_64
2828

29-
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS nixl_base
30-
31-
# Redeclare ARCH and ARCH_ALT so they're available in this stage
32-
ARG ARCH
33-
ARG ARCH_ALT
34-
35-
WORKDIR /opt/nixl
36-
# Add a cache hint that only changes when the nixl commit changes
37-
ARG NIXL_COMMIT
38-
# This line acts as a cache key - it only changes when NIXL_COMMIT changes
39-
RUN echo "NIXL commit: ${NIXL_COMMIT}" > /opt/nixl/commit.txt
40-
# Copy the nixl source
41-
COPY --from=nixl . .
42-
4329
##################################
4430
########## Base Image ############
4531
##################################
@@ -80,6 +66,7 @@ RUN apt-get update -y && \
8066
libnuma-dev librdmacm-dev ibverbs-providers
8167

8268
ARG NIXL_UCX_REF=v1.19.x
69+
ARG NIXL_REF=3c47a48955e6f96bd5d4fb43a9d80bb64722f8e4
8370

8471
WORKDIR /workspace
8572

@@ -118,10 +105,11 @@ SHELL ["/bin/bash", "-c"]
118105
WORKDIR /workspace
119106

120107
### NIXL SETUP ###
121-
# Copy nixl source, and use commit hash as cache hint
108+
# Clone nixl source
122109
# TEMP: disable gds backend for arm64
123-
COPY --from=nixl_base /opt/nixl /opt/nixl
124-
COPY --from=nixl_base /opt/nixl/commit.txt /opt/nixl/commit.txt
110+
RUN git clone "https://github.com/ai-dynamo/nixl.git" /opt/nixl && \
111+
cd /opt/nixl && \
112+
git checkout ${NIXL_REF}
125113
RUN if [ "$ARCH" = "arm64" ]; then \
126114
cd /opt/nixl && \
127115
mkdir build && \
@@ -199,9 +187,13 @@ RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
199187
cd tools/ep_kernels && \
200188
bash install_python_libraries.sh && \
201189
cd ep_kernels_workspace && \
202-
git clone --recursive https://github.com/deepseek-ai/DeepGEMM.git && \
190+
git clone https://github.com/deepseek-ai/DeepGEMM.git && \
203191
cd DeepGEMM && \
204-
python setup.py install; \
192+
sed -i 's|[email protected]:|https://github.com/|g' .gitmodules && \
193+
git submodule sync --recursive && \
194+
git submodule update --init --recursive && \
195+
cat install.sh && \
196+
./install.sh; \
205197
else \
206198
uv pip install pip cuda-python && \
207199
mkdir /opt/vllm && \
@@ -213,9 +205,13 @@ RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
213205
cd tools/ep_kernels && \
214206
bash install_python_libraries.sh && \
215207
cd ep_kernels_workspace && \
216-
git clone --recursive https://github.com/deepseek-ai/DeepGEMM.git && \
208+
git clone https://github.com/deepseek-ai/DeepGEMM.git && \
217209
cd DeepGEMM && \
218-
python setup.py install; \
210+
sed -i 's|[email protected]:|https://github.com/|g' .gitmodules && \
211+
git submodule sync --recursive && \
212+
git submodule update --init --recursive && \
213+
cat install.sh && \
214+
./install.sh; \
219215
fi
220216

221217
# Common dependencies

container/build.sh

Lines changed: 5 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -111,9 +111,7 @@ NONE_BASE_IMAGE_TAG="24.04"
111111
SGLANG_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
112112
SGLANG_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
113113

114-
NIXL_COMMIT=3c47a48955e6f96bd5d4fb43a9d80bb64722f8e4
115-
NIXL_REPO=ai-dynamo/nixl.git
116-
114+
NIXL_REF=3c47a48955e6f96bd5d4fb43a9d80bb64722f8e4
117115
NIXL_UCX_EFA_REF=7ec95b95e524a87e81cac92f5ca8523e3966b16b
118116

119117
NO_CACHE=""
@@ -390,7 +388,7 @@ if [[ "$PLATFORM" == *"linux/arm64"* ]]; then
390388
ARCH="arm64"
391389
BUILD_ARGS+=" --build-arg ARCH=arm64 --build-arg ARCH_ALT=aarch64 "
392390
# TEMP: Pin to nixl 0.3.1 for arm build, since 0.4.0 fails
393-
NIXL_COMMIT=3503658e71143b56f9d5b1b440d84a94b9c41af8
391+
NIXL_REF=3503658e71143b56f9d5b1b440d84a94b9c41af8
394392
fi
395393

396394
# Update DOCKERFILE if framework is VLLM
@@ -404,36 +402,8 @@ elif [[ $FRAMEWORK == "SGLANG" ]]; then
404402
DOCKERFILE=${SOURCE_DIR}/Dockerfile.sglang
405403
fi
406404

407-
NIXL_DIR="/tmp/nixl/nixl_src"
408-
409-
# Clone original NIXL to temp directory
410-
if [ -d "$NIXL_DIR" ]; then
411-
echo "Warning: $NIXL_DIR already exists, skipping clone"
412-
else
413-
if [ -n "${GITHUB_TOKEN}" ]; then
414-
git clone "https://oauth2:${GITHUB_TOKEN}@github.com/${NIXL_REPO}" "$NIXL_DIR"
415-
else
416-
# Try HTTPS first with credential prompting disabled, fall back to SSH if it fails
417-
if ! GIT_TERMINAL_PROMPT=0 git clone https://github.com/${NIXL_REPO} "$NIXL_DIR"; then
418-
echo "HTTPS clone failed, falling back to SSH..."
419-
git clone [email protected]:${NIXL_REPO} "$NIXL_DIR"
420-
fi
421-
fi
422-
fi
423-
424-
pushd "$NIXL_DIR" || exit
425-
if ! git checkout ${NIXL_COMMIT}; then
426-
echo "ERROR: Failed to checkout NIXL commit ${NIXL_COMMIT}. The cached directory may be out of date."
427-
echo "Please delete $NIXL_DIR and re-run the build script."
428-
exit 1
429-
fi
430-
431-
popd
432-
433-
BUILD_CONTEXT_ARG+=" --build-context nixl=$NIXL_DIR"
434-
435-
# Add NIXL_COMMIT as a build argument to enable caching
436-
BUILD_ARGS+=" --build-arg NIXL_COMMIT=${NIXL_COMMIT} "
405+
# Add NIXL_REF as a build argument
406+
BUILD_ARGS+=" --build-arg NIXL_REF=${NIXL_REF} "
437407

438408
if [[ $TARGET == "local-dev" ]]; then
439409
BUILD_ARGS+=" --build-arg USER_UID=$(id -u) --build-arg USER_GID=$(id -g) "
@@ -519,7 +489,7 @@ if [[ $FRAMEWORK == "TENSORRTLLM" ]]; then
519489
echo "Checking for TensorRT-LLM wheel in ${TENSORRTLLM_PIP_WHEEL_DIR}"
520490
if ! check_wheel_file "${TENSORRTLLM_PIP_WHEEL_DIR}" "${ARCH}_${TRTLLM_COMMIT}"; then
521491
echo "WARN: Valid trtllm wheel file not found in ${TENSORRTLLM_PIP_WHEEL_DIR}, attempting to build from source"
522-
if ! env -i ${SOURCE_DIR}/build_trtllm_wheel.sh -o ${TENSORRTLLM_PIP_WHEEL_DIR} -c ${TRTLLM_COMMIT} -a ${ARCH} -n ${NIXL_COMMIT}; then
492+
if ! env -i ${SOURCE_DIR}/build_trtllm_wheel.sh -o ${TENSORRTLLM_PIP_WHEEL_DIR} -c ${TRTLLM_COMMIT} -a ${ARCH} -n ${NIXL_REF}; then
523493
error "ERROR: Failed to build TensorRT-LLM wheel"
524494
fi
525495
fi

0 commit comments

Comments
 (0)