From f5d8dd14239bd16341bd4c760a84698586f6edac Mon Sep 17 00:00:00 2001 From: Andreas Karatzas Date: Thu, 12 Mar 2026 23:54:48 -0500 Subject: [PATCH 1/4] [CI] Add persistent cache mounts for all CI test downloads and media URLs Signed-off-by: Andreas Karatzas --- .../scripts/hardware_ci/run-amd-test.sh | 19 ++++++++ tests/conftest.py | 8 ++-- .../openai/test_token_in_token_out.py | 7 +-- tests/evals/gpt_oss/test_gpqa_correctness.py | 4 +- tests/evals/gsm8k/gsm8k_eval.py | 4 +- .../prithvi_io_processor/prithvi_processor.py | 24 ++++++++-- .../test_extraction.py | 2 +- vllm/envs.py | 5 ++ vllm/multimodal/media/connector.py | 47 +++++++++++++++++++ 9 files changed, 103 insertions(+), 17 deletions(-) diff --git a/.buildkite/scripts/hardware_ci/run-amd-test.sh b/.buildkite/scripts/hardware_ci/run-amd-test.sh index 8895771f0a40..c0e566cbe1f4 100755 --- a/.buildkite/scripts/hardware_ci/run-amd-test.sh +++ b/.buildkite/scripts/hardware_ci/run-amd-test.sh @@ -382,6 +382,18 @@ HF_CACHE="$(realpath ~)/huggingface" mkdir -p "${HF_CACHE}" HF_MOUNT="/root/.cache/huggingface" +MODELSCOPE_CACHE="$(realpath ~)/modelscope" +mkdir -p "${MODELSCOPE_CACHE}" +MODELSCOPE_MOUNT="/root/.cache/modelscope" + +VLLM_TEST_CACHE="$(realpath ~)/vllm-test-cache" +mkdir -p "${VLLM_TEST_CACHE}" +VLLM_TEST_CACHE_MOUNT="/root/.cache/vllm-test-cache" + +VLLM_CACHE="$(realpath ~)/vllm-cache" +mkdir -p "${VLLM_CACHE}" +VLLM_CACHE_MOUNT="/root/.cache/vllm" + # ---- Command source selection ---- # Prefer VLLM_TEST_COMMANDS (preserves all inner quoting intact). # Fall back to $* for backward compatibility, but warn that inner @@ -493,7 +505,14 @@ else -e AWS_ACCESS_KEY_ID \ -e AWS_SECRET_ACCESS_KEY \ -v "${HF_CACHE}:${HF_MOUNT}" \ + -v "${MODELSCOPE_CACHE}:${MODELSCOPE_MOUNT}" \ + -v "${VLLM_TEST_CACHE}:${VLLM_TEST_CACHE_MOUNT}" \ + -v "${VLLM_CACHE}:${VLLM_CACHE_MOUNT}" \ -e "HF_HOME=${HF_MOUNT}" \ + -e "MODELSCOPE_CACHE=${MODELSCOPE_MOUNT}" \ + -e "VLLM_TEST_CACHE=${VLLM_TEST_CACHE_MOUNT}" \ + -e "VLLM_CACHE_ROOT=${VLLM_CACHE_MOUNT}" \ + -e "VLLM_MEDIA_CACHE=${VLLM_CACHE_MOUNT}/media_cache" \ -e "PYTHONPATH=${MYPYTHONPATH}" \ --name "${container_name}" \ "${image_name}" \ diff --git a/tests/conftest.py b/tests/conftest.py index 719bfa5ed1f0..a0f8e24816ef 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1288,10 +1288,10 @@ def num_gpus_available(): return current_platform.device_count() -temp_dir = tempfile.gettempdir() -_dummy_opt_path = os.path.join(temp_dir, "dummy_opt") -_dummy_llava_path = os.path.join(temp_dir, "dummy_llava") -_dummy_gemma2_embedding_path = os.path.join(temp_dir, "dummy_gemma2_embedding") +_test_cache_dir = os.environ.get("VLLM_TEST_CACHE", tempfile.gettempdir()) +_dummy_opt_path = os.path.join(_test_cache_dir, "dummy_opt") +_dummy_llava_path = os.path.join(_test_cache_dir, "dummy_llava") +_dummy_gemma2_embedding_path = os.path.join(_test_cache_dir, "dummy_gemma2_embedding") @pytest.fixture diff --git a/tests/entrypoints/openai/test_token_in_token_out.py b/tests/entrypoints/openai/test_token_in_token_out.py index c7f8abe27e6e..d3b40655ee25 100644 --- a/tests/entrypoints/openai/test_token_in_token_out.py +++ b/tests/entrypoints/openai/test_token_in_token_out.py @@ -1,9 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import os -import tempfile - import pytest from vllm.model_executor.model_loader.weight_utils import download_weights_from_hf @@ -12,7 +9,7 @@ from ...utils import RemoteOpenAIServer MODEL_NAME = "Qwen/Qwen3-0.6B" -MODEL_PATH = os.path.join(tempfile.gettempdir(), "qwen3_06b") +MODEL_PATH = MODEL_NAME @pytest.fixture(scope="module") @@ -21,7 +18,7 @@ def server(): MODEL_PATH = download_weights_from_hf( MODEL_NAME, allow_patterns=["*"], - cache_dir=MODEL_PATH, + cache_dir=None, ignore_patterns=["tokenizer*", "vocab*", "*.safetensors"], ) args = [ diff --git a/tests/evals/gpt_oss/test_gpqa_correctness.py b/tests/evals/gpt_oss/test_gpqa_correctness.py index 63188ec40767..3fb6cb9ded3d 100644 --- a/tests/evals/gpt_oss/test_gpqa_correctness.py +++ b/tests/evals/gpt_oss/test_gpqa_correctness.py @@ -23,7 +23,9 @@ TOL = 0.05 # Absolute tolerance for accuracy comparison # Path to tiktoken encoding files -TIKTOKEN_DATA_DIR = Path(__file__).parent / "data" +TIKTOKEN_DATA_DIR = ( + Path(os.environ.get("VLLM_TEST_CACHE", str(Path(__file__).parent))) / "tiktoken" +) # Tiktoken encoding files to download TIKTOKEN_FILES = { diff --git a/tests/evals/gsm8k/gsm8k_eval.py b/tests/evals/gsm8k/gsm8k_eval.py index 647c149ef5fd..db9bbcb96e8a 100644 --- a/tests/evals/gsm8k/gsm8k_eval.py +++ b/tests/evals/gsm8k/gsm8k_eval.py @@ -25,7 +25,9 @@ def download_and_cache_file(url: str, filename: str | None = None) -> str: """Download and cache a file from a URL.""" if filename is None: - filename = os.path.join("/tmp", url.split("/")[-1]) + cache_dir = os.environ.get("VLLM_TEST_CACHE", "/tmp") + os.makedirs(os.path.join(cache_dir, "gsm8k"), exist_ok=True) + filename = os.path.join(cache_dir, "gsm8k", url.split("/")[-1]) if os.path.exists(filename): return filename diff --git a/tests/plugins/prithvi_io_processor_plugin/prithvi_io_processor/prithvi_processor.py b/tests/plugins/prithvi_io_processor_plugin/prithvi_io_processor/prithvi_processor.py index b22239fcc267..ba4c2c6c8c8b 100644 --- a/tests/plugins/prithvi_io_processor_plugin/prithvi_io_processor/prithvi_processor.py +++ b/tests/plugins/prithvi_io_processor_plugin/prithvi_io_processor/prithvi_processor.py @@ -3,6 +3,7 @@ import base64 import datetime +import hashlib import os import tempfile import urllib.request @@ -113,11 +114,24 @@ def read_geotiff( write_to_file = file_data elif file_path is not None and path_type == "url": - resp = urllib.request.urlopen(file_path) - # with tempfile.NamedTemporaryFile() as tmpfile: - # tmpfile.write(resp.read()) - # path = tmpfile.name - write_to_file = resp.read() + # Cache URL downloads to avoid re-downloading in CI + cache_base = os.environ.get("VLLM_TEST_CACHE") + if cache_base: + cache_dir = os.path.join(cache_base, "prithvi") + os.makedirs(cache_dir, exist_ok=True) + url_hash = hashlib.sha256(file_path.encode()).hexdigest()[:16] + ext = os.path.splitext(file_path)[1] or ".tiff" + cached_path = os.path.join(cache_dir, f"{url_hash}{ext}") + if os.path.exists(cached_path): + path = cached_path + else: + resp = urllib.request.urlopen(file_path) + with open(cached_path, "wb") as f: + f.write(resp.read()) + path = cached_path + else: + resp = urllib.request.urlopen(file_path) + write_to_file = resp.read() elif file_path is not None and path_type == "path": path = file_path elif file_path is not None and path_type == "b64_json": diff --git a/tests/v1/kv_connector/extract_hidden_states_integration/test_extraction.py b/tests/v1/kv_connector/extract_hidden_states_integration/test_extraction.py index 6a8c64152fec..695e9ea9c00c 100644 --- a/tests/v1/kv_connector/extract_hidden_states_integration/test_extraction.py +++ b/tests/v1/kv_connector/extract_hidden_states_integration/test_extraction.py @@ -63,7 +63,7 @@ def predictable_llama_config_path(tmp_path_factory): # Create a simple tokenizer tokenizer = LlamaTokenizerFast.from_pretrained( "TinyLlama/TinyLlama-1.1B-Chat-v1.0", - cache_dir=os.path.expanduser("~/.cache/huggingface"), + cache_dir=os.environ.get("HF_HOME", None), ) tokenizer.save_pretrained(config_dir) diff --git a/vllm/envs.py b/vllm/envs.py index 3b7312a4f378..8facacde4105 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -63,6 +63,7 @@ VLLM_IMAGE_FETCH_TIMEOUT: int = 5 VLLM_VIDEO_FETCH_TIMEOUT: int = 30 VLLM_AUDIO_FETCH_TIMEOUT: int = 10 + VLLM_MEDIA_CACHE: str = "" VLLM_MEDIA_URL_ALLOW_REDIRECTS: bool = True VLLM_MEDIA_LOADING_THREAD_COUNT: int = 8 VLLM_MAX_AUDIO_CLIP_FILESIZE_MB: int = 25 @@ -759,6 +760,9 @@ def _get_or_set_default() -> str: "VLLM_AUDIO_FETCH_TIMEOUT": lambda: int( os.getenv("VLLM_AUDIO_FETCH_TIMEOUT", "10") ), + # Directory for caching media downloads (images, video, audio fetched + # from URLs during inference). Empty string disables caching. + "VLLM_MEDIA_CACHE": lambda: os.getenv("VLLM_MEDIA_CACHE", ""), # Whether to allow HTTP redirects when fetching from media URLs. # Default to True "VLLM_MEDIA_URL_ALLOW_REDIRECTS": lambda: bool( @@ -1750,6 +1754,7 @@ def compile_factors() -> dict[str, object]: "VLLM_IMAGE_FETCH_TIMEOUT", "VLLM_VIDEO_FETCH_TIMEOUT", "VLLM_AUDIO_FETCH_TIMEOUT", + "VLLM_MEDIA_CACHE", "VLLM_MEDIA_URL_ALLOW_REDIRECTS", "VLLM_MEDIA_LOADING_THREAD_COUNT", "VLLM_MAX_AUDIO_CLIP_FILESIZE_MB", diff --git a/vllm/multimodal/media/connector.py b/vllm/multimodal/media/connector.py index 80aaa2a8293e..4bb66aa35333 100644 --- a/vllm/multimodal/media/connector.py +++ b/vllm/multimodal/media/connector.py @@ -3,6 +3,8 @@ import asyncio import atexit +import hashlib +import os from concurrent.futures import ThreadPoolExecutor from pathlib import Path from typing import Any, TypeVar @@ -16,6 +18,7 @@ import vllm.envs as envs from vllm.connections import HTTPConnection, global_http_connection +from vllm.logger import init_logger from vllm.utils.registry import ExtensionManager from .audio import AudioEmbeddingMediaIO, AudioMediaIO @@ -23,6 +26,8 @@ from .image import ImageEmbeddingMediaIO, ImageMediaIO from .video import VideoMediaIO +logger = init_logger(__name__) + _M = TypeVar("_M") global_thread_pool = ThreadPoolExecutor( @@ -116,6 +121,34 @@ def __init__( allowed_media_domains = [] self.allowed_media_domains = allowed_media_domains + # Media download cache directory (opt-in via VLLM_MEDIA_CACHE) + self._media_cache_dir: str | None = None + media_cache = envs.VLLM_MEDIA_CACHE + if media_cache: + self._media_cache_dir = media_cache + os.makedirs(media_cache, exist_ok=True) + + def _get_cached_bytes(self, url: str) -> bytes | None: + """Return cached bytes for a URL, or None if not cached.""" + if not self._media_cache_dir: + return None + cache_path = self._media_cache_path(url) + if cache_path.exists(): + return cache_path.read_bytes() + return None + + def _put_cached_bytes(self, url: str, data: bytes) -> None: + """Store downloaded bytes in the cache.""" + if not self._media_cache_dir: + return + cache_path = self._media_cache_path(url) + cache_path.write_bytes(data) + + def _media_cache_path(self, url: str) -> Path: + url_hash = hashlib.sha256(url.encode()).hexdigest()[:20] + ext = Path(url.split("?")[0]).suffix or "" + return Path(self._media_cache_dir) / f"{url_hash}{ext}" # type: ignore[arg-type] + def _load_data_url( self, url_spec: Url, @@ -178,6 +211,10 @@ def load_from_url( if url_spec.scheme and url_spec.scheme.startswith("http"): self._assert_url_in_allowed_media_domains(url_spec) + cached = self._get_cached_bytes(url) + if cached is not None: + return media_io.load_bytes(cached) + connection = self.connection data = connection.get_bytes( url_spec.url, @@ -185,6 +222,7 @@ def load_from_url( allow_redirects=envs.VLLM_MEDIA_URL_ALLOW_REDIRECTS, ) + self._put_cached_bytes(url, data) return media_io.load_bytes(data) if url_spec.scheme == "data": @@ -209,12 +247,21 @@ async def load_from_url_async( if url_spec.scheme and url_spec.scheme.startswith("http"): self._assert_url_in_allowed_media_domains(url_spec) + cached = self._get_cached_bytes(url) + if cached is not None: + future = loop.run_in_executor( + global_thread_pool, media_io.load_bytes, cached + ) + return await future + connection = self.connection data = await connection.async_get_bytes( url_spec.url, timeout=fetch_timeout, allow_redirects=envs.VLLM_MEDIA_URL_ALLOW_REDIRECTS, ) + + self._put_cached_bytes(url, data) future = loop.run_in_executor(global_thread_pool, media_io.load_bytes, data) return await future From e4d3e6cba3b68d1e9c30bfb1536688be19e02c96 Mon Sep 17 00:00:00 2001 From: Andreas Karatzas Date: Fri, 13 Mar 2026 00:02:36 -0500 Subject: [PATCH 2/4] [CI] Add persistent cache mounts for all CI test downloads and media URLs Signed-off-by: Andreas Karatzas --- .../prithvi_io_processor/prithvi_processor.py | 14 ++++++++++++-- vllm/multimodal/media/connector.py | 14 +++++++++++++- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/tests/plugins/prithvi_io_processor_plugin/prithvi_io_processor/prithvi_processor.py b/tests/plugins/prithvi_io_processor_plugin/prithvi_io_processor/prithvi_processor.py index ba4c2c6c8c8b..80528fe8ea4e 100644 --- a/tests/plugins/prithvi_io_processor_plugin/prithvi_io_processor/prithvi_processor.py +++ b/tests/plugins/prithvi_io_processor_plugin/prithvi_io_processor/prithvi_processor.py @@ -126,8 +126,18 @@ def read_geotiff( path = cached_path else: resp = urllib.request.urlopen(file_path) - with open(cached_path, "wb") as f: - f.write(resp.read()) + # Write to a temporary file and atomically rename + # to prevent race conditions with parallel tests. + with tempfile.NamedTemporaryFile( + mode="wb", dir=cache_dir, delete=False + ) as tmp_file: + tmp_file.write(resp.read()) + tmp_path = tmp_file.name + try: + os.rename(tmp_path, cached_path) + except OSError: + # Another process may have already written the file. + os.remove(tmp_path) path = cached_path else: resp = urllib.request.urlopen(file_path) diff --git a/vllm/multimodal/media/connector.py b/vllm/multimodal/media/connector.py index 4bb66aa35333..93c805b19604 100644 --- a/vllm/multimodal/media/connector.py +++ b/vllm/multimodal/media/connector.py @@ -5,6 +5,7 @@ import atexit import hashlib import os +import tempfile from concurrent.futures import ThreadPoolExecutor from pathlib import Path from typing import Any, TypeVar @@ -142,7 +143,18 @@ def _put_cached_bytes(self, url: str, data: bytes) -> None: if not self._media_cache_dir: return cache_path = self._media_cache_path(url) - cache_path.write_bytes(data) + # Write to a temporary file and atomically rename to prevent + # race conditions when multiple processes cache the same URL. + with tempfile.NamedTemporaryFile( + mode="wb", dir=self._media_cache_dir, delete=False + ) as tmp_file: + tmp_file.write(data) + tmp_path = tmp_file.name + try: + os.rename(tmp_path, str(cache_path)) + except OSError: + # Another process may have already written the file. + os.remove(tmp_path) def _media_cache_path(self, url: str) -> Path: url_hash = hashlib.sha256(url.encode()).hexdigest()[:20] From a54c8edbeeca7fae7e57560204438e89385c30d4 Mon Sep 17 00:00:00 2001 From: Andreas Karatzas Date: Sun, 15 Mar 2026 15:18:13 -0500 Subject: [PATCH 3/4] [ROCm][CI] Split out media cache changes into separate PR Signed-off-by: Andreas Karatzas --- .../scripts/hardware_ci/run-amd-test.sh | 1 - vllm/envs.py | 5 -- vllm/multimodal/media/connector.py | 59 ------------------- 3 files changed, 65 deletions(-) diff --git a/.buildkite/scripts/hardware_ci/run-amd-test.sh b/.buildkite/scripts/hardware_ci/run-amd-test.sh index c83f2a34713c..723523e81f2d 100755 --- a/.buildkite/scripts/hardware_ci/run-amd-test.sh +++ b/.buildkite/scripts/hardware_ci/run-amd-test.sh @@ -526,7 +526,6 @@ else -e "MODELSCOPE_CACHE=${MODELSCOPE_MOUNT}" \ -e "VLLM_TEST_CACHE=${VLLM_TEST_CACHE_MOUNT}" \ -e "VLLM_CACHE_ROOT=${VLLM_CACHE_MOUNT}" \ - -e "VLLM_MEDIA_CACHE=${VLLM_CACHE_MOUNT}/media_cache" \ -e "PYTHONPATH=${MYPYTHONPATH}" \ --name "${container_name}" \ "${image_name}" \ diff --git a/vllm/envs.py b/vllm/envs.py index 641affdccf65..d310e9e1307d 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -63,7 +63,6 @@ VLLM_IMAGE_FETCH_TIMEOUT: int = 5 VLLM_VIDEO_FETCH_TIMEOUT: int = 30 VLLM_AUDIO_FETCH_TIMEOUT: int = 10 - VLLM_MEDIA_CACHE: str = "" VLLM_MEDIA_URL_ALLOW_REDIRECTS: bool = True VLLM_MEDIA_LOADING_THREAD_COUNT: int = 8 VLLM_MAX_AUDIO_CLIP_FILESIZE_MB: int = 25 @@ -761,9 +760,6 @@ def _get_or_set_default() -> str: "VLLM_AUDIO_FETCH_TIMEOUT": lambda: int( os.getenv("VLLM_AUDIO_FETCH_TIMEOUT", "10") ), - # Directory for caching media downloads (images, video, audio fetched - # from URLs during inference). Empty string disables caching. - "VLLM_MEDIA_CACHE": lambda: os.getenv("VLLM_MEDIA_CACHE", ""), # Whether to allow HTTP redirects when fetching from media URLs. # Default to True "VLLM_MEDIA_URL_ALLOW_REDIRECTS": lambda: bool( @@ -1759,7 +1755,6 @@ def compile_factors() -> dict[str, object]: "VLLM_IMAGE_FETCH_TIMEOUT", "VLLM_VIDEO_FETCH_TIMEOUT", "VLLM_AUDIO_FETCH_TIMEOUT", - "VLLM_MEDIA_CACHE", "VLLM_MEDIA_URL_ALLOW_REDIRECTS", "VLLM_MEDIA_LOADING_THREAD_COUNT", "VLLM_MAX_AUDIO_CLIP_FILESIZE_MB", diff --git a/vllm/multimodal/media/connector.py b/vllm/multimodal/media/connector.py index 93c805b19604..80aaa2a8293e 100644 --- a/vllm/multimodal/media/connector.py +++ b/vllm/multimodal/media/connector.py @@ -3,9 +3,6 @@ import asyncio import atexit -import hashlib -import os -import tempfile from concurrent.futures import ThreadPoolExecutor from pathlib import Path from typing import Any, TypeVar @@ -19,7 +16,6 @@ import vllm.envs as envs from vllm.connections import HTTPConnection, global_http_connection -from vllm.logger import init_logger from vllm.utils.registry import ExtensionManager from .audio import AudioEmbeddingMediaIO, AudioMediaIO @@ -27,8 +23,6 @@ from .image import ImageEmbeddingMediaIO, ImageMediaIO from .video import VideoMediaIO -logger = init_logger(__name__) - _M = TypeVar("_M") global_thread_pool = ThreadPoolExecutor( @@ -122,45 +116,6 @@ def __init__( allowed_media_domains = [] self.allowed_media_domains = allowed_media_domains - # Media download cache directory (opt-in via VLLM_MEDIA_CACHE) - self._media_cache_dir: str | None = None - media_cache = envs.VLLM_MEDIA_CACHE - if media_cache: - self._media_cache_dir = media_cache - os.makedirs(media_cache, exist_ok=True) - - def _get_cached_bytes(self, url: str) -> bytes | None: - """Return cached bytes for a URL, or None if not cached.""" - if not self._media_cache_dir: - return None - cache_path = self._media_cache_path(url) - if cache_path.exists(): - return cache_path.read_bytes() - return None - - def _put_cached_bytes(self, url: str, data: bytes) -> None: - """Store downloaded bytes in the cache.""" - if not self._media_cache_dir: - return - cache_path = self._media_cache_path(url) - # Write to a temporary file and atomically rename to prevent - # race conditions when multiple processes cache the same URL. - with tempfile.NamedTemporaryFile( - mode="wb", dir=self._media_cache_dir, delete=False - ) as tmp_file: - tmp_file.write(data) - tmp_path = tmp_file.name - try: - os.rename(tmp_path, str(cache_path)) - except OSError: - # Another process may have already written the file. - os.remove(tmp_path) - - def _media_cache_path(self, url: str) -> Path: - url_hash = hashlib.sha256(url.encode()).hexdigest()[:20] - ext = Path(url.split("?")[0]).suffix or "" - return Path(self._media_cache_dir) / f"{url_hash}{ext}" # type: ignore[arg-type] - def _load_data_url( self, url_spec: Url, @@ -223,10 +178,6 @@ def load_from_url( if url_spec.scheme and url_spec.scheme.startswith("http"): self._assert_url_in_allowed_media_domains(url_spec) - cached = self._get_cached_bytes(url) - if cached is not None: - return media_io.load_bytes(cached) - connection = self.connection data = connection.get_bytes( url_spec.url, @@ -234,7 +185,6 @@ def load_from_url( allow_redirects=envs.VLLM_MEDIA_URL_ALLOW_REDIRECTS, ) - self._put_cached_bytes(url, data) return media_io.load_bytes(data) if url_spec.scheme == "data": @@ -259,21 +209,12 @@ async def load_from_url_async( if url_spec.scheme and url_spec.scheme.startswith("http"): self._assert_url_in_allowed_media_domains(url_spec) - cached = self._get_cached_bytes(url) - if cached is not None: - future = loop.run_in_executor( - global_thread_pool, media_io.load_bytes, cached - ) - return await future - connection = self.connection data = await connection.async_get_bytes( url_spec.url, timeout=fetch_timeout, allow_redirects=envs.VLLM_MEDIA_URL_ALLOW_REDIRECTS, ) - - self._put_cached_bytes(url, data) future = loop.run_in_executor(global_thread_pool, media_io.load_bytes, data) return await future From 8e2c7151cd16454d7373989dbe4ea861aed1b18f Mon Sep 17 00:00:00 2001 From: Andreas Karatzas Date: Sun, 15 Mar 2026 15:21:06 -0500 Subject: [PATCH 4/4] [ROCm][CI] Split out media cache changes into separate PR Signed-off-by: Andreas Karatzas --- .buildkite/scripts/hardware_ci/run-amd-test.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/.buildkite/scripts/hardware_ci/run-amd-test.sh b/.buildkite/scripts/hardware_ci/run-amd-test.sh index 723523e81f2d..c83f2a34713c 100755 --- a/.buildkite/scripts/hardware_ci/run-amd-test.sh +++ b/.buildkite/scripts/hardware_ci/run-amd-test.sh @@ -526,6 +526,7 @@ else -e "MODELSCOPE_CACHE=${MODELSCOPE_MOUNT}" \ -e "VLLM_TEST_CACHE=${VLLM_TEST_CACHE_MOUNT}" \ -e "VLLM_CACHE_ROOT=${VLLM_CACHE_MOUNT}" \ + -e "VLLM_MEDIA_CACHE=${VLLM_CACHE_MOUNT}/media_cache" \ -e "PYTHONPATH=${MYPYTHONPATH}" \ --name "${container_name}" \ "${image_name}" \