From a58d5f263e3c668019f13cff54ca7c01fa6c6618 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniele=20Trifir=C3=B2?= <dtrifiro@redhat.com>
Date: Mon, 12 Aug 2024 11:09:51 +0200
Subject: [PATCH 1/5] deps: bump vllm-tgis-adapter to 0.2.4

---
 Dockerfile.ubi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Dockerfile.ubi b/Dockerfile.ubi
index 75082aa77502..4fe9771ed4bc 100644
--- a/Dockerfile.ubi
+++ b/Dockerfile.ubi
@@ -190,7 +190,7 @@ FROM vllm-openai as vllm-grpc-adapter
 USER root
 
 RUN --mount=type=cache,target=/root/.cache/pip \
-    pip install vllm-tgis-adapter==0.2.3
+    pip install vllm-tgis-adapter==0.2.4
 
 ENV GRPC_PORT=8033
 USER 2000

From 5f322d49d159b578f0a0b998feded9c9fd47ce33 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniele=20Trifir=C3=B2?= <dtrifiro@redhat.com>
Date: Tue, 13 Aug 2024 00:08:56 +0200
Subject: [PATCH 2/5] Dockerfile: use uv pip everywhere (it's faster)

---
 Dockerfile.ubi | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/Dockerfile.ubi b/Dockerfile.ubi
index 4fe9771ed4bc..7ee47106a9f0 100644
--- a/Dockerfile.ubi
+++ b/Dockerfile.ubi
@@ -32,7 +32,7 @@ ENV VIRTUAL_ENV=/opt/vllm
 ENV PATH="$VIRTUAL_ENV/bin:$PATH"
 RUN microdnf install -y \
     python${PYTHON_VERSION}-devel python${PYTHON_VERSION}-pip python${PYTHON_VERSION}-wheel && \
-    python${PYTHON_VERSION} -m venv $VIRTUAL_ENV && pip install --no-cache -U pip wheel && microdnf clean all
+    python${PYTHON_VERSION} -m venv $VIRTUAL_ENV && pip install --no-cache -U pip wheel uv && microdnf clean all
 
 
 ## CUDA Base ###################################################################
@@ -57,9 +57,10 @@ ENV PATH="$VIRTUAL_ENV/bin:$PATH"
 
 # install cuda and common dependencies
 RUN --mount=type=cache,target=/root/.cache/pip \
+    --mount=type=cache,target=/root/.cache/uv \
     --mount=type=bind,source=requirements-common.txt,target=requirements-common.txt \
     --mount=type=bind,source=requirements-cuda.txt,target=requirements-cuda.txt \
-    pip install \
+    uv pip install \
         -r requirements-cuda.txt
 
 ## Development #################################################################
@@ -67,13 +68,14 @@ FROM python-cuda-base AS dev
 
 # install build and runtime dependencies
 RUN --mount=type=cache,target=/root/.cache/pip \
+    --mount=type=cache,target=/root/.cache/uv \
     --mount=type=bind,source=requirements-common.txt,target=requirements-common.txt \
     --mount=type=bind,source=requirements-cuda.txt,target=requirements-cuda.txt \
     --mount=type=bind,source=requirements-dev.txt,target=requirements-dev.txt \
     --mount=type=bind,source=requirements-lint.txt,target=requirements-lint.txt \
     --mount=type=bind,source=requirements-adag.txt,target=requirements-adag.txt \
     --mount=type=bind,source=requirements-test.txt,target=requirements-test.txt \
-    pip3 install \
+    uv pip install \
         -r requirements-cuda.txt \
         -r requirements-dev.txt
 
@@ -82,8 +84,9 @@ FROM dev AS build
 
 # install build dependencies
 RUN --mount=type=cache,target=/root/.cache/pip \
+    --mount=type=cache,target=/root/.cache/uv \
     --mount=type=bind,source=requirements-build.txt,target=requirements-build.txt \
-    pip install -r requirements-build.txt
+    uv pip install -r requirements-build.txt
 
 # install compiler cache to speed up compilation leveraging local or remote caching
 # git is required for the cutlass kernels
@@ -121,6 +124,7 @@ COPY vllm vllm
 ENV CCACHE_DIR=/root/.cache/ccache
 RUN --mount=type=cache,target=/root/.cache/ccache \
     --mount=type=cache,target=/root/.cache/pip \
+    --mount=type=cache,target=/root/.cache/uv \
     --mount=type=bind,src=.git,target=/workspace/.git \
     env CFLAGS="-march=haswell" \
         CXXFLAGS="$CFLAGS $CXXFLAGS" \
@@ -158,7 +162,8 @@ RUN microdnf install -y gcc \
 # install vllm wheel first, so that torch etc will be installed
 RUN --mount=type=bind,from=build,src=/workspace/dist,target=/workspace/dist \
     --mount=type=cache,target=/root/.cache/pip \
-    pip install $(echo dist/*.whl)'[tensorizer]' --verbose
+    --mount=type=cache,target=/root/.cache/uv \
+    uv pip install $(echo dist/*.whl)'[tensorizer]' --verbose
 
 # Install libsodium for Tensorizer encryption
 RUN --mount=type=bind,from=libsodium-builder,src=/usr/src/libsodium,target=/usr/src/libsodium \
@@ -166,7 +171,8 @@ RUN --mount=type=bind,from=libsodium-builder,src=/usr/src/libsodium,target=/usr/
     && make install
 
 RUN --mount=type=cache,target=/root/.cache/pip \
-    pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.0.9/flashinfer-0.0.9+cu121torch2.3-cp311-cp311-linux_x86_64.whl
+    --mount=type=cache,target=/root/.cache/uv \
+    uv pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.0.9/flashinfer-0.0.9+cu121torch2.3-cp311-cp311-linux_x86_64.whl
 
 ENV HF_HUB_OFFLINE=1 \
     PORT=8000 \

From 6100606065a56f6eadd34ff46b8b5c85ad8bd16d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniele=20Trifir=C3=B2?= <dtrifiro@redhat.com>
Date: Tue, 13 Aug 2024 01:42:17 +0200
Subject: [PATCH 3/5] Dockerfile.ubi: cuda-base: add missing runtime deps

---
 Dockerfile.ubi | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/Dockerfile.ubi b/Dockerfile.ubi
index 7ee47106a9f0..04baad853c07 100644
--- a/Dockerfile.ubi
+++ b/Dockerfile.ubi
@@ -42,8 +42,13 @@ RUN curl -Lo /etc/yum.repos.d/cuda-rhel9.repo \
         https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo
 
 RUN microdnf install -y \
-        cuda-nvcc-12-4 cuda-nvtx-12-4 cuda-libraries-devel-12-4 && \
-    microdnf clean all
+    microdnf install -y \
+        cuda-nvcc-12-4 \
+        cuda-nvtx-12-4 \
+        cuda-cudart-12-4 \
+        cuda-compat-12-4 \
+        cuda-libraries-devel-12-4 \
+    && microdnf clean all
 
 ENV CUDA_HOME="/usr/local/cuda" \
     PATH="${CUDA_HOME}/bin:${PATH}" \

From ad5028efdbec2b7728dc272ffa2f368825186cc0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniele=20Trifir=C3=B2?= <dtrifiro@redhat.com>
Date: Tue, 13 Aug 2024 00:09:59 +0200
Subject: [PATCH 4/5] Dockerfile.ubi: change release stage base to
 python-cuda-base

---
 Dockerfile.ubi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Dockerfile.ubi b/Dockerfile.ubi
index 04baad853c07..7620d06b78af 100644
--- a/Dockerfile.ubi
+++ b/Dockerfile.ubi
@@ -153,7 +153,7 @@ RUN curl -LO https://github.com/jedisct1/libsodium/releases/download/${LIBSODIUM
 RUN ./configure --prefix="/usr/" && make && make check
 
 ## Release #####################################################################
-FROM python-install AS vllm-openai
+FROM python-cuda-base AS vllm-openai
 
 WORKDIR /workspace
 

From c09e8f922652d9609c7741d9df8a3bec88bb45fc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniele=20Trifir=C3=B2?= <dtrifiro@redhat.com>
Date: Tue, 13 Aug 2024 00:34:24 +0200
Subject: [PATCH 5/5] Dockerfile.ubi: add yum cache bind mount where possible

---
 Dockerfile.ubi | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/Dockerfile.ubi b/Dockerfile.ubi
index 7620d06b78af..3fca4f419a2f 100644
--- a/Dockerfile.ubi
+++ b/Dockerfile.ubi
@@ -8,8 +8,9 @@ ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX"
 FROM registry.access.redhat.com/ubi9/ubi-minimal:${BASE_UBI_IMAGE_TAG} as base
 ARG PYTHON_VERSION
 
-RUN microdnf install -y \
-    python${PYTHON_VERSION}-pip python${PYTHON_VERSION}-wheel \
+RUN --mount=type=cache,target=/var/cache/yum \
+    microdnf install -y \
+        python${PYTHON_VERSION}-pip python${PYTHON_VERSION}-wheel \
     && microdnf clean all
 
 WORKDIR /workspace
@@ -18,7 +19,8 @@ ENV LANG=C.UTF-8 \
     LC_ALL=C.UTF-8
 
 # Some utils for dev purposes - tar required for kubectl cp
-RUN microdnf install -y \
+RUN --mount=type=cache,target=/var/cache/yum \
+    microdnf install -y \
         which procps findutils tar vim git\
     && microdnf clean all
 
@@ -30,9 +32,12 @@ ARG PYTHON_VERSION
 
 ENV VIRTUAL_ENV=/opt/vllm
 ENV PATH="$VIRTUAL_ENV/bin:$PATH"
-RUN microdnf install -y \
-    python${PYTHON_VERSION}-devel python${PYTHON_VERSION}-pip python${PYTHON_VERSION}-wheel && \
-    python${PYTHON_VERSION} -m venv $VIRTUAL_ENV && pip install --no-cache -U pip wheel uv && microdnf clean all
+
+RUN --mount=type=cache,target=/var/cache/yum \
+    --mount=type=cache,target=/root/.cache/pip \
+    microdnf install -y \
+        python${PYTHON_VERSION}-devel python${PYTHON_VERSION}-pip python${PYTHON_VERSION}-wheel && \
+        python${PYTHON_VERSION} -m venv $VIRTUAL_ENV && pip install --no-cache -U pip wheel uv && microdnf clean all
 
 
 ## CUDA Base ###################################################################
@@ -41,7 +46,7 @@ FROM python-install as cuda-base
 RUN curl -Lo /etc/yum.repos.d/cuda-rhel9.repo \
         https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo
 
-RUN microdnf install -y \
+RUN --mount=type=cache,target=/var/cache/yum \
     microdnf install -y \
         cuda-nvcc-12-4 \
         cuda-nvtx-12-4 \
@@ -95,7 +100,11 @@ RUN --mount=type=cache,target=/root/.cache/pip \
 
 # install compiler cache to speed up compilation leveraging local or remote caching
 # git is required for the cutlass kernels
-RUN rpm -ivh https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && rpm -ql epel-release && microdnf install -y git ccache && microdnf clean all
+RUN rpm -ivh https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && \
+    rpm -ql epel-release && \
+    microdnf install -y git ccache && \
+    microdnf clean all
+
 # install build dependencies
 
 # copy input files