From 3217143bdc3ee532a82ba1f9649f914ac6a3d4b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniele=20Trifir=C3=B2?= Date: Tue, 18 Jun 2024 15:44:52 +0200 Subject: [PATCH 1/2] Dockerfile.ubi: add missing requirements-*.txt bind mounts --- Dockerfile.ubi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Dockerfile.ubi b/Dockerfile.ubi index 6c2bd732c5df..6d85a0869622 100644 --- a/Dockerfile.ubi +++ b/Dockerfile.ubi @@ -113,6 +113,8 @@ RUN --mount=type=cache,target=/root/.cache/pip \ --mount=type=bind,source=requirements-common.txt,target=requirements-common.txt \ --mount=type=bind,source=requirements-cuda.txt,target=requirements-cuda.txt \ --mount=type=bind,source=requirements-dev.txt,target=requirements-dev.txt \ + --mount=type=bind,source=requirements-lint.txt,target=requirements-lint.txt \ + --mount=type=bind,source=requirements-test.txt,target=requirements-test.txt \ pip3 install \ -r requirements-cuda.txt \ -r requirements-dev.txt From 3aef43e8a48675f1f6cac964da7bf06c9057060a Mon Sep 17 00:00:00 2001 From: Thomas Parnell Date: Wed, 29 May 2024 11:01:40 +0000 Subject: [PATCH 2/2] add triton CustomCacheManger fixes RHOAIENG-8043 Co-authored-by: Chih-Chieh-Yang Signed-off-by: Thomas Parnell --- Dockerfile.ubi | 6 +++++- extras/custom_cache_manager.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) create mode 100644 extras/custom_cache_manager.py diff --git a/Dockerfile.ubi b/Dockerfile.ubi index 6d85a0869622..294399be24c4 100644 --- a/Dockerfile.ubi +++ b/Dockerfile.ubi @@ -180,6 +180,9 @@ ENV PATH=$VIRTUAL_ENV/bin/:$PATH RUN microdnf install -y gcc \ && microdnf clean all +# Custom cache manager (fix for https://issues.redhat.com/browse/RHOAIENG-8043) +COPY extras/custom_cache_manager.py /opt/vllm/lib/python3.11/site-packages/custom_cache_manager.py + # install vllm wheel first, so that torch etc will be installed RUN --mount=type=bind,from=build,src=/workspace/dist,target=/workspace/dist \ --mount=type=cache,target=/root/.cache/pip \ @@ -189,7 +192,8 @@ ENV HF_HUB_OFFLINE=1 \ PORT=8000 \ HOME=/home/vllm \ VLLM_USAGE_SOURCE=production-docker-image \ - VLLM_WORKER_MULTIPROC_METHOD=fork + VLLM_WORKER_MULTIPROC_METHOD=fork \ + TRITON_CACHE_MANAGER="custom_cache_manager:CustomCacheManager" # setup non-root user for OpenShift RUN umask 002 \ diff --git a/extras/custom_cache_manager.py b/extras/custom_cache_manager.py new file mode 100644 index 000000000000..c83ed5b6e865 --- /dev/null +++ b/extras/custom_cache_manager.py @@ -0,0 +1,32 @@ +import os + +from triton.runtime.cache import (FileCacheManager, default_cache_dir, + default_dump_dir, default_override_dir) + + +class CustomCacheManager(FileCacheManager): + + def __init__(self, key, override=False, dump=False): + self.key = key + self.lock_path = None + if dump: + self.cache_dir = default_dump_dir() + self.cache_dir = os.path.join(self.cache_dir, self.key) + self.lock_path = os.path.join(self.cache_dir, "lock") + os.makedirs(self.cache_dir, exist_ok=True) + elif override: + self.cache_dir = default_override_dir() + self.cache_dir = os.path.join(self.cache_dir, self.key) + else: + # create cache directory if it doesn't exist + self.cache_dir = os.getenv("TRITON_CACHE_DIR", + "").strip() or default_cache_dir() + if self.cache_dir: + self.cache_dir = f"{self.cache_dir}_{os.getpid()}" + self.cache_dir = os.path.join(self.cache_dir, self.key) + self.lock_path = os.path.join(self.cache_dir, "lock") + os.makedirs(self.cache_dir, exist_ok=True) + else: + raise RuntimeError("Could not create or locate cache dir") + + print(f"Triton cache dir: {self.cache_dir=}")