diff --git a/Dockerfile.ubi b/Dockerfile.ubi index 6c2bd732c5df..294399be24c4 100644 --- a/Dockerfile.ubi +++ b/Dockerfile.ubi @@ -113,6 +113,8 @@ RUN --mount=type=cache,target=/root/.cache/pip \ --mount=type=bind,source=requirements-common.txt,target=requirements-common.txt \ --mount=type=bind,source=requirements-cuda.txt,target=requirements-cuda.txt \ --mount=type=bind,source=requirements-dev.txt,target=requirements-dev.txt \ + --mount=type=bind,source=requirements-lint.txt,target=requirements-lint.txt \ + --mount=type=bind,source=requirements-test.txt,target=requirements-test.txt \ pip3 install \ -r requirements-cuda.txt \ -r requirements-dev.txt @@ -178,6 +180,9 @@ ENV PATH=$VIRTUAL_ENV/bin/:$PATH RUN microdnf install -y gcc \ && microdnf clean all +# Custom cache manager (fix for https://issues.redhat.com/browse/RHOAIENG-8043) +COPY extras/custom_cache_manager.py /opt/vllm/lib/python3.11/site-packages/custom_cache_manager.py + # install vllm wheel first, so that torch etc will be installed RUN --mount=type=bind,from=build,src=/workspace/dist,target=/workspace/dist \ --mount=type=cache,target=/root/.cache/pip \ @@ -187,7 +192,8 @@ ENV HF_HUB_OFFLINE=1 \ PORT=8000 \ HOME=/home/vllm \ VLLM_USAGE_SOURCE=production-docker-image \ - VLLM_WORKER_MULTIPROC_METHOD=fork + VLLM_WORKER_MULTIPROC_METHOD=fork \ + TRITON_CACHE_MANAGER="custom_cache_manager:CustomCacheManager" # setup non-root user for OpenShift RUN umask 002 \ diff --git a/extras/custom_cache_manager.py b/extras/custom_cache_manager.py new file mode 100644 index 000000000000..c83ed5b6e865 --- /dev/null +++ b/extras/custom_cache_manager.py @@ -0,0 +1,32 @@ +import os + +from triton.runtime.cache import (FileCacheManager, default_cache_dir, + default_dump_dir, default_override_dir) + + +class CustomCacheManager(FileCacheManager): + + def __init__(self, key, override=False, dump=False): + self.key = key + self.lock_path = None + if dump: + self.cache_dir = default_dump_dir() + self.cache_dir = os.path.join(self.cache_dir, self.key) + self.lock_path = os.path.join(self.cache_dir, "lock") + os.makedirs(self.cache_dir, exist_ok=True) + elif override: + self.cache_dir = default_override_dir() + self.cache_dir = os.path.join(self.cache_dir, self.key) + else: + # create cache directory if it doesn't exist + self.cache_dir = os.getenv("TRITON_CACHE_DIR", + "").strip() or default_cache_dir() + if self.cache_dir: + self.cache_dir = f"{self.cache_dir}_{os.getpid()}" + self.cache_dir = os.path.join(self.cache_dir, self.key) + self.lock_path = os.path.join(self.cache_dir, "lock") + os.makedirs(self.cache_dir, exist_ok=True) + else: + raise RuntimeError("Could not create or locate cache dir") + + print(f"Triton cache dir: {self.cache_dir=}")