From a58d5f263e3c668019f13cff54ca7c01fa6c6618 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniele=20Trifir=C3=B2?= Date: Mon, 12 Aug 2024 11:09:51 +0200 Subject: [PATCH 1/5] deps: bump vllm-tgis-adapter to 0.2.4 --- Dockerfile.ubi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile.ubi b/Dockerfile.ubi index 75082aa77502..4fe9771ed4bc 100644 --- a/Dockerfile.ubi +++ b/Dockerfile.ubi @@ -190,7 +190,7 @@ FROM vllm-openai as vllm-grpc-adapter USER root RUN --mount=type=cache,target=/root/.cache/pip \ - pip install vllm-tgis-adapter==0.2.3 + pip install vllm-tgis-adapter==0.2.4 ENV GRPC_PORT=8033 USER 2000 From 5f322d49d159b578f0a0b998feded9c9fd47ce33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniele=20Trifir=C3=B2?= Date: Tue, 13 Aug 2024 00:08:56 +0200 Subject: [PATCH 2/5] Dockerfile: use uv pip everywhere (it's faster) --- Dockerfile.ubi | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/Dockerfile.ubi b/Dockerfile.ubi index 4fe9771ed4bc..7ee47106a9f0 100644 --- a/Dockerfile.ubi +++ b/Dockerfile.ubi @@ -32,7 +32,7 @@ ENV VIRTUAL_ENV=/opt/vllm ENV PATH="$VIRTUAL_ENV/bin:$PATH" RUN microdnf install -y \ python${PYTHON_VERSION}-devel python${PYTHON_VERSION}-pip python${PYTHON_VERSION}-wheel && \ - python${PYTHON_VERSION} -m venv $VIRTUAL_ENV && pip install --no-cache -U pip wheel && microdnf clean all + python${PYTHON_VERSION} -m venv $VIRTUAL_ENV && pip install --no-cache -U pip wheel uv && microdnf clean all ## CUDA Base ################################################################### @@ -57,9 +57,10 @@ ENV PATH="$VIRTUAL_ENV/bin:$PATH" # install cuda and common dependencies RUN --mount=type=cache,target=/root/.cache/pip \ + --mount=type=cache,target=/root/.cache/uv \ --mount=type=bind,source=requirements-common.txt,target=requirements-common.txt \ --mount=type=bind,source=requirements-cuda.txt,target=requirements-cuda.txt \ - pip install \ + uv pip install \ -r requirements-cuda.txt ## Development ################################################################# @@ -67,13 +68,14 @@ FROM python-cuda-base AS dev # install build and runtime dependencies RUN --mount=type=cache,target=/root/.cache/pip \ + --mount=type=cache,target=/root/.cache/uv \ --mount=type=bind,source=requirements-common.txt,target=requirements-common.txt \ --mount=type=bind,source=requirements-cuda.txt,target=requirements-cuda.txt \ --mount=type=bind,source=requirements-dev.txt,target=requirements-dev.txt \ --mount=type=bind,source=requirements-lint.txt,target=requirements-lint.txt \ --mount=type=bind,source=requirements-adag.txt,target=requirements-adag.txt \ --mount=type=bind,source=requirements-test.txt,target=requirements-test.txt \ - pip3 install \ + uv pip install \ -r requirements-cuda.txt \ -r requirements-dev.txt @@ -82,8 +84,9 @@ FROM dev AS build # install build dependencies RUN --mount=type=cache,target=/root/.cache/pip \ + --mount=type=cache,target=/root/.cache/uv \ --mount=type=bind,source=requirements-build.txt,target=requirements-build.txt \ - pip install -r requirements-build.txt + uv pip install -r requirements-build.txt # install compiler cache to speed up compilation leveraging local or remote caching # git is required for the cutlass kernels @@ -121,6 +124,7 @@ COPY vllm vllm ENV CCACHE_DIR=/root/.cache/ccache RUN --mount=type=cache,target=/root/.cache/ccache \ --mount=type=cache,target=/root/.cache/pip \ + --mount=type=cache,target=/root/.cache/uv \ --mount=type=bind,src=.git,target=/workspace/.git \ env CFLAGS="-march=haswell" \ CXXFLAGS="$CFLAGS $CXXFLAGS" \ @@ -158,7 +162,8 @@ RUN microdnf install -y gcc \ # install vllm wheel first, so that torch etc will be installed RUN --mount=type=bind,from=build,src=/workspace/dist,target=/workspace/dist \ --mount=type=cache,target=/root/.cache/pip \ - pip install $(echo dist/*.whl)'[tensorizer]' --verbose + --mount=type=cache,target=/root/.cache/uv \ + uv pip install $(echo dist/*.whl)'[tensorizer]' --verbose # Install libsodium for Tensorizer encryption RUN --mount=type=bind,from=libsodium-builder,src=/usr/src/libsodium,target=/usr/src/libsodium \ @@ -166,7 +171,8 @@ RUN --mount=type=bind,from=libsodium-builder,src=/usr/src/libsodium,target=/usr/ && make install RUN --mount=type=cache,target=/root/.cache/pip \ - pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.0.9/flashinfer-0.0.9+cu121torch2.3-cp311-cp311-linux_x86_64.whl + --mount=type=cache,target=/root/.cache/uv \ + uv pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.0.9/flashinfer-0.0.9+cu121torch2.3-cp311-cp311-linux_x86_64.whl ENV HF_HUB_OFFLINE=1 \ PORT=8000 \ From 6100606065a56f6eadd34ff46b8b5c85ad8bd16d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniele=20Trifir=C3=B2?= Date: Tue, 13 Aug 2024 01:42:17 +0200 Subject: [PATCH 3/5] Dockerfile.ubi: cuda-base: add missing runtime deps --- Dockerfile.ubi | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/Dockerfile.ubi b/Dockerfile.ubi index 7ee47106a9f0..04baad853c07 100644 --- a/Dockerfile.ubi +++ b/Dockerfile.ubi @@ -42,8 +42,13 @@ RUN curl -Lo /etc/yum.repos.d/cuda-rhel9.repo \ https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo RUN microdnf install -y \ - cuda-nvcc-12-4 cuda-nvtx-12-4 cuda-libraries-devel-12-4 && \ - microdnf clean all + microdnf install -y \ + cuda-nvcc-12-4 \ + cuda-nvtx-12-4 \ + cuda-cudart-12-4 \ + cuda-compat-12-4 \ + cuda-libraries-devel-12-4 \ + && microdnf clean all ENV CUDA_HOME="/usr/local/cuda" \ PATH="${CUDA_HOME}/bin:${PATH}" \ From ad5028efdbec2b7728dc272ffa2f368825186cc0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniele=20Trifir=C3=B2?= Date: Tue, 13 Aug 2024 00:09:59 +0200 Subject: [PATCH 4/5] Dockerfile.ubi: change release stage base to python-cuda-base --- Dockerfile.ubi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile.ubi b/Dockerfile.ubi index 04baad853c07..7620d06b78af 100644 --- a/Dockerfile.ubi +++ b/Dockerfile.ubi @@ -153,7 +153,7 @@ RUN curl -LO https://github.com/jedisct1/libsodium/releases/download/${LIBSODIUM RUN ./configure --prefix="/usr/" && make && make check ## Release ##################################################################### -FROM python-install AS vllm-openai +FROM python-cuda-base AS vllm-openai WORKDIR /workspace From c09e8f922652d9609c7741d9df8a3bec88bb45fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniele=20Trifir=C3=B2?= Date: Tue, 13 Aug 2024 00:34:24 +0200 Subject: [PATCH 5/5] Dockerfile.ubi: add yum cache bind mount where possible --- Dockerfile.ubi | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/Dockerfile.ubi b/Dockerfile.ubi index 7620d06b78af..3fca4f419a2f 100644 --- a/Dockerfile.ubi +++ b/Dockerfile.ubi @@ -8,8 +8,9 @@ ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX" FROM registry.access.redhat.com/ubi9/ubi-minimal:${BASE_UBI_IMAGE_TAG} as base ARG PYTHON_VERSION -RUN microdnf install -y \ - python${PYTHON_VERSION}-pip python${PYTHON_VERSION}-wheel \ +RUN --mount=type=cache,target=/var/cache/yum \ + microdnf install -y \ + python${PYTHON_VERSION}-pip python${PYTHON_VERSION}-wheel \ && microdnf clean all WORKDIR /workspace @@ -18,7 +19,8 @@ ENV LANG=C.UTF-8 \ LC_ALL=C.UTF-8 # Some utils for dev purposes - tar required for kubectl cp -RUN microdnf install -y \ +RUN --mount=type=cache,target=/var/cache/yum \ + microdnf install -y \ which procps findutils tar vim git\ && microdnf clean all @@ -30,9 +32,12 @@ ARG PYTHON_VERSION ENV VIRTUAL_ENV=/opt/vllm ENV PATH="$VIRTUAL_ENV/bin:$PATH" -RUN microdnf install -y \ - python${PYTHON_VERSION}-devel python${PYTHON_VERSION}-pip python${PYTHON_VERSION}-wheel && \ - python${PYTHON_VERSION} -m venv $VIRTUAL_ENV && pip install --no-cache -U pip wheel uv && microdnf clean all + +RUN --mount=type=cache,target=/var/cache/yum \ + --mount=type=cache,target=/root/.cache/pip \ + microdnf install -y \ + python${PYTHON_VERSION}-devel python${PYTHON_VERSION}-pip python${PYTHON_VERSION}-wheel && \ + python${PYTHON_VERSION} -m venv $VIRTUAL_ENV && pip install --no-cache -U pip wheel uv && microdnf clean all ## CUDA Base ################################################################### @@ -41,7 +46,7 @@ FROM python-install as cuda-base RUN curl -Lo /etc/yum.repos.d/cuda-rhel9.repo \ https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo -RUN microdnf install -y \ +RUN --mount=type=cache,target=/var/cache/yum \ microdnf install -y \ cuda-nvcc-12-4 \ cuda-nvtx-12-4 \ @@ -95,7 +100,11 @@ RUN --mount=type=cache,target=/root/.cache/pip \ # install compiler cache to speed up compilation leveraging local or remote caching # git is required for the cutlass kernels -RUN rpm -ivh https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && rpm -ql epel-release && microdnf install -y git ccache && microdnf clean all +RUN rpm -ivh https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && \ + rpm -ql epel-release && \ + microdnf install -y git ccache && \ + microdnf clean all + # install build dependencies # copy input files