Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 17 additions & 19 deletions container/Dockerfile.tensorrt_llm
Original file line number Diff line number Diff line change
Expand Up @@ -373,12 +373,25 @@ CMD []

FROM ${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG} AS runtime

WORKDIR /workspace

ARG ARCH_ALT

WORKDIR /workspace
ENV DYNAMO_HOME=/workspace
ENV VIRTUAL_ENV=/opt/dynamo/venv
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu
ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins
ENV LD_LIBRARY_PATH=\
$NIXL_LIB_DIR:\
$NIXL_PLUGIN_DIR:\
/usr/local/ucx/lib:\
/usr/local/ucx/lib/ucx:\
/opt/hpcx/ompi/lib:\
$LD_LIBRARY_PATH
ENV PATH=/opt/hpcx/ompi/bin:/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:$PATH
ENV OPAL_PREFIX=/opt/hpcx/ompi

# Install apt dependencies
# openssh-client, openssh-server are needed for OpenMPI
Expand Down Expand Up @@ -473,21 +486,6 @@ COPY --from=build /usr/local/lib/python3.12/dist-packages/flash_attn /usr/local/
COPY --from=build /usr/local/lib/python3.12/dist-packages/flash_attn-${FLASH_ATTN_VER}.dist-info /usr/local/lib/python3.12/dist-packages/flash_attn-${FLASH_ATTN_VER}.dist-info
COPY --from=build /usr/local/lib/python3.12/dist-packages/flash_attn_2_cuda.cpython-312-*-linux-gnu.so /usr/local/lib/python3.12/dist-packages/

# Setup environment variables
ARG ARCH_ALT
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu
ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins

ENV LD_LIBRARY_PATH=\
$NIXL_LIB_DIR:\
$NIXL_PLUGIN_DIR:\
/usr/local/ucx/lib:\
/usr/local/ucx/lib/ucx:\
/opt/hpcx/ompi/lib:\
$LD_LIBRARY_PATH
ENV PATH=/opt/hpcx/ompi/bin:/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:$PATH
ENV OPAL_PREFIX=/opt/hpcx/ompi

# Install TensorRT-LLM (same as in build stage)
ARG HAS_TRTLLM_CONTEXT=0
Expand All @@ -496,8 +494,8 @@ ARG TENSORRTLLM_INDEX_URL="https://pypi.python.org/simple"

# Copy Dynamo wheels into wheelhouse
# Copy metrics binary from wheel_builder image, not part of ai-dynamo wheel
COPY --from=dev /workspace/wheels/nixl/*.whl wheelhouse/
COPY --from=wheel_builder /workspace/dist/*.whl wheelhouse/
COPY --from=dev /workspace/wheels/nixl/*.whl /workspace/wheelhouse/
COPY --from=wheel_builder /workspace/dist/*.whl /workspace/wheelhouse/
COPY --from=dev /workspace/target/release/metrics /usr/local/bin/metrics

# NOTE: If a package (tensorrt_llm) exists on both --index-url and --extra-index-url,
Expand All @@ -508,7 +506,7 @@ RUN uv pip install --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${TENSORRTLLM_P
if [ "$ARCH" = "amd64" ]; then \
pip install "triton==3.3.1"; \
fi; \
uv pip install ai-dynamo nixl --find-links wheelhouse
uv pip install ai-dynamo nixl --find-links /workspace/wheelhouse

# Setup TRTLLM environment variables, same as in dev image
ENV TRTLLM_USE_UCX_KVCACHE=1
Expand Down
Loading