diff --git a/Dockerfile.ubi b/Dockerfile.ubi new file mode 100644 index 000000000000..3226a24ba1ea --- /dev/null +++ b/Dockerfile.ubi @@ -0,0 +1,213 @@ +## Global Args ################################################################# +ARG BASE_UBI_IMAGE_TAG=9.4 +ARG PYTHON_VERSION=3.11 + +ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX" + + +## Base Layer ################################################################## +FROM registry.access.redhat.com/ubi9/ubi-minimal:${BASE_UBI_IMAGE_TAG} as base +ARG PYTHON_VERSION + +RUN microdnf install -y \ + python${PYTHON_VERSION}-pip python${PYTHON_VERSION}-wheel \ + && microdnf clean all + +WORKDIR /workspace + +ENV LANG=C.UTF-8 \ + LC_ALL=C.UTF-8 + +# Some utils for dev purposes - tar required for kubectl cp +RUN microdnf install -y \ + which procps findutils tar vim git\ + && microdnf clean all + + +## Python Installer ############################################################ +FROM base as python-install + +ARG PYTHON_VERSION + +ENV VIRTUAL_ENV=/opt/vllm +ENV PATH="$VIRTUAL_ENV/bin:$PATH" +RUN microdnf install -y \ + python${PYTHON_VERSION}-devel python${PYTHON_VERSION}-pip python${PYTHON_VERSION}-wheel && \ + python${PYTHON_VERSION} -m venv $VIRTUAL_ENV && pip install --no-cache -U pip wheel && microdnf clean all + + +## CUDA Base ################################################################### +FROM python-install as cuda-base + +# The Nvidia operator won't allow deploying on CUDA 12.0 hosts if +# this env var is set to 12.2.0, even though it's compatible +#ENV CUDA_VERSION=12.2.0 \ +ENV CUDA_VERSION=12.0.0 \ + NV_CUDA_LIB_VERSION=12.2.0-1 \ + NVIDIA_VISIBLE_DEVICES=all \ + NVIDIA_DRIVER_CAPABILITIES=compute,utility \ + NV_CUDA_CUDART_VERSION=12.2.53-1 \ + NV_CUDA_COMPAT_VERSION=535.104.12 + +RUN curl -Lo /etc/yum.repos.d/cuda-rhel9.repo \ + https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo + +RUN microdnf install -y \ + cuda-cudart-12-2-${NV_CUDA_CUDART_VERSION} \ + cuda-compat-12-2-${NV_CUDA_COMPAT_VERSION} \ + && microdnf clean all + + +ARG CUDA_HOME="/usr/local/cuda" +ENV CUDA_HOME=${CUDA_HOME}\ + PATH="${CUDA_HOME}/bin:${PATH}" \ + LD_LIBRARY_PATH="${CUDA_HOME}/lib64:${CUDA_HOME}/extras/CUPTI/lib64:${LD_LIBRARY_PATH}" + + +## CUDA Development ############################################################ +FROM cuda-base as cuda-devel + +ENV NV_CUDA_CUDART_DEV_VERSION=12.2.53-1 \ + NV_NVML_DEV_VERSION=12.2.81-1 \ + NV_LIBCUBLAS_DEV_VERSION=12.2.1.16-1 \ + NV_LIBNPP_DEV_VERSION=12.1.1.14-1 \ + NV_LIBNCCL_DEV_PACKAGE_VERSION=2.18.5-1+cuda12.2 + +RUN microdnf install -y \ + cuda-command-line-tools-12-2-${NV_CUDA_LIB_VERSION} \ + cuda-libraries-devel-12-2-${NV_CUDA_LIB_VERSION} \ + cuda-minimal-build-12-2-${NV_CUDA_LIB_VERSION} \ + cuda-cudart-devel-12-2-${NV_CUDA_CUDART_DEV_VERSION} \ + cuda-nvml-devel-12-2-${NV_NVML_DEV_VERSION} \ + libcublas-devel-12-2-${NV_LIBCUBLAS_DEV_VERSION} \ + libnpp-devel-12-2-${NV_LIBNPP_DEV_VERSION} \ + libnccl-devel-${NV_LIBNCCL_DEV_PACKAGE_VERSION} \ + && microdnf clean all + +ENV LIBRARY_PATH="$CUDA_HOME/lib64/stubs" + +# Workaround for https://github.com/openai/triton/issues/2507 and +# https://github.com/pytorch/pytorch/issues/107960 -- hopefully +# this won't be needed for future versions of this docker image +# or future versions of triton. +RUN ldconfig /usr/local/cuda-12.2/compat/ + +## Python cuda base ################################################################# +FROM cuda-devel AS python-cuda-base + +ENV VIRTUAL_ENV=/opt/vllm +ENV PATH="$VIRTUAL_ENV/bin:$PATH" + +# install cuda and common dependencies +RUN --mount=type=cache,target=/root/.cache/pip \ + --mount=type=bind,source=requirements-common.txt,target=requirements-common.txt \ + --mount=type=bind,source=requirements-cuda.txt,target=requirements-cuda.txt \ + pip install \ + -r requirements-cuda.txt + +## Development ################################################################# +FROM python-cuda-base AS dev + +# install build and runtime dependencies +RUN --mount=type=cache,target=/root/.cache/pip \ + --mount=type=bind,source=requirements-common.txt,target=requirements-common.txt \ + --mount=type=bind,source=requirements-cuda.txt,target=requirements-cuda.txt \ + --mount=type=bind,source=requirements-dev.txt,target=requirements-dev.txt \ + pip3 install \ + -r requirements-cuda.txt \ + -r requirements-dev.txt + +## Builder ##################################################################### +FROM dev AS build + +# install build dependencies +RUN --mount=type=cache,target=/root/.cache/pip \ + --mount=type=bind,source=requirements-build.txt,target=requirements-build.txt \ + pip install -r requirements-build.txt + +# install compiler cache to speed up compilation leveraging local or remote caching +RUN rpm -ivh https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && rpm -ql epel-release && microdnf install -y ccache && microdnf clean all +# install build dependencies + +# copy input files +COPY csrc csrc +COPY setup.py setup.py +COPY cmake cmake +COPY CMakeLists.txt CMakeLists.txt +COPY requirements-common.txt requirements-common.txt +COPY requirements-cuda.txt requirements-cuda.txt +COPY pyproject.toml pyproject.toml + +ARG TORCH_CUDA_ARCH_LIST +ENV TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST + +# max jobs used by Ninja to build extensions +ARG max_jobs=2 +ENV MAX_JOBS=${max_jobs} +# number of threads used by nvcc +ARG nvcc_threads=8 +ENV NVCC_THREADS=$nvcc_threads +# make sure punica kernels are built (for LoRA) +ENV VLLM_INSTALL_PUNICA_KERNELS=1 + +# Make sure the cuda environment is in the PATH +ENV PATH=/usr/local/cuda/bin:$PATH +ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH + +# Copy the entire directory before building wheel +COPY vllm vllm + +ENV CCACHE_DIR=/root/.cache/ccache +RUN --mount=type=cache,target=/root/.cache/ccache \ + --mount=type=cache,target=/root/.cache/pip \ + CMAKE_BUILD_TYPE=Release python3 setup.py bdist_wheel --dist-dir=dist + +## Release ##################################################################### +# Note from the non-UBI Dockerfile: +# We used base cuda image because pytorch installs its own cuda libraries. +# However pynccl depends on cuda libraries so we had to switch to the runtime image +# In the future it would be nice to get a container with pytorch and cuda without duplicating cuda +FROM python-install AS vllm-openai + +WORKDIR /workspace + +ENV VIRTUAL_ENV=/opt/vllm +ENV PATH=$VIRTUAL_ENV/bin/:$PATH + +# Triton needs a CC compiler +RUN microdnf install -y gcc \ + && microdnf clean all + +# install vllm wheel first, so that torch etc will be installed +RUN --mount=type=bind,from=build,src=/workspace/dist,target=/workspace/dist \ + --mount=type=cache,target=/root/.cache/pip \ + pip install dist/*.whl --verbose + +# vllm requires a specific nccl version built from source distribution +# See https://github.com/NVIDIA/nccl/issues/1234 +RUN pip install \ + -v \ + --force-reinstall \ + --no-binary="all" \ + --no-cache-dir \ + "vllm-nccl-cu12==2.18.1.0.4.0" && \ + mv /root/.config/vllm/nccl/cu12/libnccl.so.2.18.1 /opt/vllm/lib/ && \ + chmod 0755 /opt/vllm/lib/libnccl.so.2.18.1 + + +ENV HF_HUB_OFFLINE=1 \ + PORT=8000 \ + HOME=/home/vllm \ + VLLM_NCCL_SO_PATH=/opt/vllm/lib/libnccl.so.2.18.1 \ + VLLM_USAGE_SOURCE=production-docker-image \ + VLLM_WORKER_MULTIPROC_METHOD=fork + +# setup non-root user for OpenShift +RUN umask 002 \ + && useradd --uid 2000 --gid 0 vllm \ + && chmod g+rwx $HOME /usr/src /workspace + +COPY LICENSE /licenses/vllm.md + +USER 2000 +ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]