Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
3c8ac96
Add and use install.sh
v-shobhit Aug 29, 2025
4b5b5fa
Clear the pip constraints
v-shobhit Aug 30, 2025
ead95a6
fail on error
v-shobhit Aug 30, 2025
846ca6f
Make install.sh executable
v-shobhit Sep 1, 2025
1a2b1bc
update enroot/Makefile
v-shobhit Sep 2, 2025
be25635
Address comment
v-shobhit Sep 2, 2025
719d46a
fix end of files
v-shobhit Sep 2, 2025
d444aa4
allow unset variables
v-shobhit Sep 2, 2025
bc55715
Add run_sqsh target
v-shobhit Sep 2, 2025
3877e05
Document enroot flow
v-shobhit Sep 2, 2025
38409bd
fix typo
v-shobhit Sep 2, 2025
4f4bb4d
fix docker build layering
v-shobhit Sep 2, 2025
85bca7c
address comments
v-shobhit Sep 2, 2025
f65f93b
remove temp workaround
v-shobhit Sep 2, 2025
91fc4e1
add vars
v-shobhit Sep 2, 2025
8b0171f
clean up srun command
v-shobhit Sep 2, 2025
981d489
clean up
v-shobhit Sep 2, 2025
762a66b
fix variable export
v-shobhit Sep 2, 2025
82bddf6
Merge branch 'main' into dev/shobhitv/shell-build-ctx
nvzhihanj Sep 3, 2025
ec3cf54
address comment
v-shobhit Sep 3, 2025
67fcaba
attempt fix CI
v-shobhit Sep 3, 2025
134d831
Merge branch 'main' into dev/shobhitv/shell-build-ctx
v-shobhit Sep 6, 2025
349e17d
Merge branch 'main' into dev/shobhitv/shell-build-ctx
nvzhihanj Sep 8, 2025
3ee39e5
Update current_image_tags.properties
v-shobhit Sep 9, 2025
80b542e
Merge branch 'main' into dev/shobhitv/shell-build-ctx
v-shobhit Sep 9, 2025
faf0a66
Update current_image_tags.properties
v-shobhit Sep 9, 2025
6ba3720
address review
v-shobhit Sep 9, 2025
0148f97
review - keep pytorch comment
v-shobhit Sep 9, 2025
a387c11
Update install.sh
v-shobhit Sep 9, 2025
39d02b3
Update install.sh
v-shobhit Sep 9, 2025
42f44b0
Update Dockerfile.multi
v-shobhit Sep 9, 2025
b27a7b8
TensorRT-LLM -> TensorRT LLM
v-shobhit Sep 9, 2025
6c66a1c
Update Dockerfile.multi
v-shobhit Sep 9, 2025
a80a5c4
Update Dockerfile.multi
v-shobhit Sep 9, 2025
6318484
remove hyphen
v-shobhit Sep 10, 2025
39b1df3
add missing 'rm'
v-shobhit Sep 10, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -75,3 +75,6 @@ compile_commands.json
.dir-locals.el
.devcontainer/devcontainer.env.user
.devcontainer/docker-compose.override.yml

# Enroot sqsh files
enroot/tensorrt_llm.devel.sqsh
85 changes: 34 additions & 51 deletions docker/Dockerfile.multi
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,15 @@ ARG TRITON_BASE_TAG=25.06-py3
ARG DEVEL_IMAGE=devel

FROM ${BASE_IMAGE}:${BASE_TAG} AS base
COPY docker/common/install.sh install.sh
COPY docker/common/install_base.sh install_base.sh
COPY docker/common/install_cmake.sh install_cmake.sh
COPY docker/common/install_ccache.sh install_ccache.sh
COPY docker/common/install_cuda_toolkit.sh install_cuda_toolkit.sh
COPY docker/common/install_tensorrt.sh install_tensorrt.sh
COPY docker/common/install_polygraphy.sh install_polygraphy.sh
COPY docker/common/install_mpi4py.sh install_mpi4py.sh
COPY docker/common/install_pytorch.sh install_pytorch.sh

# Add NVIDIA EULA and AI Terms labels
LABEL com.nvidia.eula="https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-software-license-agreement/"
Expand All @@ -19,66 +28,40 @@ ARG GITHUB_MIRROR=""
RUN echo "Using GitHub mirror: $GITHUB_MIRROR"
SHELL ["/bin/bash", "-c"]

# Clean up the pip constraint file from the base NGC PyTorch image.
RUN [ -f /etc/pip/constraint.txt ] && : > /etc/pip/constraint.txt || true

FROM base AS devel

ARG PYTHON_VERSION="3.12.3"
RUN echo "Using Python version: $PYTHON_VERSION"
COPY docker/common/install_base.sh install_base.sh
RUN GITHUB_MIRROR=$GITHUB_MIRROR bash ./install_base.sh $PYTHON_VERSION && rm install_base.sh

COPY docker/common/install_cmake.sh install_cmake.sh
RUN GITHUB_MIRROR=$GITHUB_MIRROR bash ./install_cmake.sh && rm install_cmake.sh

COPY docker/common/install_ccache.sh install_ccache.sh
RUN GITHUB_MIRROR=$GITHUB_MIRROR bash ./install_ccache.sh && rm install_ccache.sh

# Only take effect when the base image is Rocky Linux 8 with old CUDA version.
COPY docker/common/install_cuda_toolkit.sh install_cuda_toolkit.sh
RUN bash ./install_cuda_toolkit.sh && rm install_cuda_toolkit.sh

# Download & install latest TRT release
ARG TRT_VER
ARG CUDA_VER
ARG CUDNN_VER
ARG NCCL_VER
ARG CUBLAS_VER
COPY docker/common/install_tensorrt.sh install_tensorrt.sh
RUN bash ./install_tensorrt.sh \
--TRT_VER=${TRT_VER} \
--CUDA_VER=${CUDA_VER} \
--CUDNN_VER=${CUDNN_VER} \
--NCCL_VER=${NCCL_VER} \
--CUBLAS_VER=${CUBLAS_VER} && \
rm install_tensorrt.sh

# Install latest Polygraphy
COPY docker/common/install_polygraphy.sh install_polygraphy.sh
RUN bash ./install_polygraphy.sh && rm install_polygraphy.sh

# Install mpi4py
COPY docker/common/install_mpi4py.sh install_mpi4py.sh
RUN GITHUB_MIRROR=$GITHUB_MIRROR bash ./install_mpi4py.sh && rm install_mpi4py.sh

# Install PyTorch
ARG TORCH_INSTALL_TYPE="skip"
COPY docker/common/install_pytorch.sh install_pytorch.sh
RUN bash ./install_pytorch.sh $TORCH_INSTALL_TYPE && rm install_pytorch.sh
#
# NB: PyTorch requires this to be < 1.0
ENV PYTORCH_CUDA_ALLOC_CONF="garbage_collection_threshold:0.99999"

# Install OpenCV with FFMPEG support
RUN pip3 uninstall -y opencv && \
rm -rf /usr/local/lib/python3*/dist-packages/cv2/ && \
pip3 install opencv-python-headless --force-reinstall --no-deps --no-cache-dir

# WARs against security issues inherited from pytorch:25.06
# * https://github.com/advisories/GHSA-8qvm-5x2c-j2w7
RUN pip3 install --upgrade --no-cache-dir \
"protobuf>=4.25.8"
RUN bash ./install.sh --base
RUN bash ./install.sh --cmake
RUN bash ./install.sh --ccache
RUN bash ./install.sh --cuda_toolkit
RUN export TRT_VER=${TRT_VER} \
&& export CUDA_VER=${CUDA_VER} \
&& export CUDNN_VER=${CUDNN_VER} \
&& export NCCL_VER=${NCCL_VER} \
&& export CUBLAS_VER=${CUBLAS_VER} \
&& bash ./install.sh --tensorrt
RUN bash ./install.sh --polygraphy
RUN bash ./install.sh --mpi4py
RUN bash ./install.sh --pytorch
RUN bash ./install.sh --opencv
RUN bash ./install.sh --protobuf

RUN rm install.sh \
install_base.sh \
install_cmake.sh \
install_ccache.sh \
install_cuda_toolkit.sh \
install_tensorrt.sh \
install_polygraphy.sh \
install_mpi4py.sh \
install_pytorch.sh

FROM ${TRITON_IMAGE}:${TRITON_BASE_TAG} AS triton

Expand Down
149 changes: 149 additions & 0 deletions docker/common/install.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
#!/bin/bash
set -Eeuo pipefail
shopt -s nullglob
trap 'echo "[install.sh] Error on line $LINENO" >&2' ERR

# Resolve script directory for robust relative pathing
SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"

# Default values
base=0
cmake=0
ccache=0
cuda_toolkit=0
tensorrt=0
polygraphy=0
mpi4py=0
pytorch=0
opencv=0
protobuf=0

while [[ $# -gt 0 ]]; do
case $1 in
--base)
base=1
shift 1
;;
--cmake)
cmake=1
shift 1
;;
--ccache)
ccache=1
shift 1
;;
--cuda_toolkit)
cuda_toolkit=1
shift 1
;;
--tensorrt)
tensorrt=1
shift 1
;;
--polygraphy)
polygraphy=1
shift 1
;;
--mpi4py)
mpi4py=1
shift 1
;;
--pytorch)
pytorch=1
shift 1
;;
--opencv)
opencv=1
shift 1
;;
--protobuf)
protobuf=1
shift 1
;;
--all)
base=1
cmake=1
ccache=1
cuda_toolkit=1
tensorrt=1
polygraphy=1
mpi4py=1
pytorch=1
opencv=1
protobuf=1
shift 1
;;
*)
echo "Unknown option: $1"
exit 1
;;
esac
done

GITHUB_MIRROR=""
TORCH_INSTALL_TYPE="skip"
export PYTORCH_CUDA_ALLOC_CONF="garbage_collection_threshold:0.99999"

if [ $base -eq 1 ]; then
echo "Installing base dependencies..."
# Clean up the pip constraint file from the base NGC PyTorch image.
[ -f /etc/pip/constraint.txt ] && : > /etc/pip/constraint.txt || true

PYTHON_VERSION="3.12.3"
echo "Using Python version: $PYTHON_VERSION"

GITHUB_MIRROR=$GITHUB_MIRROR bash $SCRIPT_DIR/install_base.sh $PYTHON_VERSION
fi

if [ $cmake -eq 1 ]; then
echo "Installing CMake..."
GITHUB_MIRROR=$GITHUB_MIRROR bash $SCRIPT_DIR/install_cmake.sh
fi

if [ $ccache -eq 1 ]; then
echo "Installing ccache..."
GITHUB_MIRROR=$GITHUB_MIRROR bash $SCRIPT_DIR/install_ccache.sh
fi

if [ $cuda_toolkit -eq 1 ]; then
echo "Installing CUDA toolkit..."
GITHUB_MIRROR=$GITHUB_MIRROR bash $SCRIPT_DIR/install_cuda_toolkit.sh
fi

if [ $tensorrt -eq 1 ]; then
echo "Installing TensorRT..."
bash $SCRIPT_DIR/install_tensorrt.sh \
--TRT_VER=${TRT_VER} \
--CUDA_VER=${CUDA_VER} \
--CUDNN_VER=${CUDNN_VER} \
--NCCL_VER=${NCCL_VER} \
--CUBLAS_VER=${CUBLAS_VER}
fi

if [ $polygraphy -eq 1 ]; then
echo "Installing Polygraphy..."
bash $SCRIPT_DIR/install_polygraphy.sh
fi

if [ $mpi4py -eq 1 ]; then
echo "Installing mpi4py..."
GITHUB_MIRROR=$GITHUB_MIRROR bash $SCRIPT_DIR/install_mpi4py.sh
fi

if [ $pytorch -eq 1 ]; then
echo "Installing PyTorch..."
bash $SCRIPT_DIR/install_pytorch.sh $TORCH_INSTALL_TYPE
fi

if [ $opencv -eq 1 ]; then
echo "Installing OpenCV..."
pip3 uninstall -y opencv && \
rm -rf /usr/local/lib/python3*/dist-packages/cv2/ && \
pip3 install opencv-python-headless --force-reinstall --no-deps --no-cache-dir
fi

if [ $protobuf -eq 1 ]; then
pip3 install --upgrade --no-cache-dir \
"protobuf>=4.25.8"
fi

25 changes: 25 additions & 0 deletions enroot/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
BASE_IMAGE ?= $(shell grep '^ARG BASE_IMAGE=' ../docker/Dockerfile.multi | grep -o '=.*' | tr -d '="')
BASE_TAG ?= $(shell grep '^ARG BASE_TAG=' ../docker/Dockerfile.multi | grep -o '=.*' | tr -d '="')
SQSH_PATH ?= tensorrt_llm.devel.sqsh
SOURCE_DIR ?= $(shell readlink -f ..)

ifndef MAKEFILE_PYXIS_INCLUDED
MAKEFILE_PYXIS_INCLUDED := 1


build_sqsh:
@echo "Building trtllm sqsh image. Base image: $(BASE_IMAGE):$(BASE_TAG). Location: $(SQSH_PATH)"

srun \
--container-image $(BASE_IMAGE):$(BASE_TAG) \
--container-save $(SQSH_PATH) \
--container-mounts $(SOURCE_DIR):/code/tensorrt_llm --container-workdir /code/tensorrt_llm/docker/common \
--container-mount-home --container-remap-root \
--container-env TRT_VER \
--container-env CUDA_VER \
--container-env CUDNN_VER \
--container-env NCCL_VER \
--container-env CUBLAS_VER \
./install.sh --all

endif
Loading