vllm-project · AndreasKaratzas · Mar 13, 2026 · Mar 13, 2026 · Mar 15, 2026 · Mar 15, 2026
diff --git a/.buildkite/scripts/hardware_ci/run-amd-test.sh b/.buildkite/scripts/hardware_ci/run-amd-test.sh
@@ -394,6 +394,18 @@ HF_CACHE="$(realpath ~)/huggingface"
 mkdir -p "${HF_CACHE}"
 HF_MOUNT="/root/.cache/huggingface"
 
+MODELSCOPE_CACHE="$(realpath ~)/modelscope"
+mkdir -p "${MODELSCOPE_CACHE}"
+MODELSCOPE_MOUNT="/root/.cache/modelscope"
+
+VLLM_TEST_CACHE="$(realpath ~)/vllm-test-cache"
+mkdir -p "${VLLM_TEST_CACHE}"
+VLLM_TEST_CACHE_MOUNT="/root/.cache/vllm-test-cache"
+
+VLLM_CACHE="$(realpath ~)/vllm-cache"
+mkdir -p "${VLLM_CACHE}"
+VLLM_CACHE_MOUNT="/root/.cache/vllm"
+
 # ---- Command source selection ----
 # Prefer VLLM_TEST_COMMANDS (preserves all inner quoting intact).
 # Fall back to $* for backward compatibility, but warn that inner
@@ -507,7 +519,14 @@ else
     -e BUILDKITE_PARALLEL_JOB \
     -e BUILDKITE_PARALLEL_JOB_COUNT \
     -v "${HF_CACHE}:${HF_MOUNT}" \
+    -v "${MODELSCOPE_CACHE}:${MODELSCOPE_MOUNT}" \
+    -v "${VLLM_TEST_CACHE}:${VLLM_TEST_CACHE_MOUNT}" \
+    -v "${VLLM_CACHE}:${VLLM_CACHE_MOUNT}" \
     -e "HF_HOME=${HF_MOUNT}" \
+    -e "MODELSCOPE_CACHE=${MODELSCOPE_MOUNT}" \
+    -e "VLLM_TEST_CACHE=${VLLM_TEST_CACHE_MOUNT}" \
+    -e "VLLM_CACHE_ROOT=${VLLM_CACHE_MOUNT}" \
+    -e "VLLM_MEDIA_CACHE=${VLLM_CACHE_MOUNT}/media_cache" \
     -e "PYTHONPATH=${MYPYTHONPATH}" \
     --name "${container_name}" \
     "${image_name}" \

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -1288,10 +1288,10 @@ def num_gpus_available():
     return current_platform.device_count()
 
 
-temp_dir = tempfile.gettempdir()
-_dummy_opt_path = os.path.join(temp_dir, "dummy_opt")
-_dummy_llava_path = os.path.join(temp_dir, "dummy_llava")
-_dummy_gemma2_embedding_path = os.path.join(temp_dir, "dummy_gemma2_embedding")
+_test_cache_dir = os.environ.get("VLLM_TEST_CACHE", tempfile.gettempdir())
+_dummy_opt_path = os.path.join(_test_cache_dir, "dummy_opt")
+_dummy_llava_path = os.path.join(_test_cache_dir, "dummy_llava")
+_dummy_gemma2_embedding_path = os.path.join(_test_cache_dir, "dummy_gemma2_embedding")
 
 
 @pytest.fixture

@@ -1,9 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
-import os
-import tempfile
-
 import pytest
 
 from vllm.model_executor.model_loader.weight_utils import download_weights_from_hf
@@ -12,7 +9,7 @@
 from ...utils import RemoteOpenAIServer
 
 MODEL_NAME = "Qwen/Qwen3-0.6B"
-MODEL_PATH = os.path.join(tempfile.gettempdir(), "qwen3_06b")
+MODEL_PATH = MODEL_NAME
 
 
 @pytest.fixture(scope="module")
@@ -21,7 +18,7 @@ def server():
     MODEL_PATH = download_weights_from_hf(
         MODEL_NAME,
         allow_patterns=["*"],
-        cache_dir=MODEL_PATH,
+        cache_dir=None,
         ignore_patterns=["tokenizer*", "vocab*", "*.safetensors"],
     )
     args = [

@@ -23,7 +23,9 @@
 TOL = 0.05  # Absolute tolerance for accuracy comparison
 
 # Path to tiktoken encoding files
-TIKTOKEN_DATA_DIR = Path(__file__).parent / "data"
+TIKTOKEN_DATA_DIR = (
+    Path(os.environ.get("VLLM_TEST_CACHE", str(Path(__file__).parent))) / "tiktoken"
+)
 
 # Tiktoken encoding files to download
 TIKTOKEN_FILES = {

@@ -25,7 +25,9 @@
 def download_and_cache_file(url: str, filename: str | None = None) -> str:
     """Download and cache a file from a URL."""
     if filename is None:
-        filename = os.path.join("/tmp", url.split("/")[-1])
+        cache_dir = os.environ.get("VLLM_TEST_CACHE", "/tmp")
+        os.makedirs(os.path.join(cache_dir, "gsm8k"), exist_ok=True)
+        filename = os.path.join(cache_dir, "gsm8k", url.split("/")[-1])
 
     if os.path.exists(filename):
         return filename

diff --git a/tests/plugins/prithvi_io_processor_plugin/prithvi_io_processor/prithvi_processor.py b/tests/plugins/prithvi_io_processor_plugin/prithvi_io_processor/prithvi_processor.py
@@ -3,6 +3,7 @@
 
 import base64
 import datetime
+import hashlib
 import os
 import tempfile
 import urllib.request
@@ -113,11 +114,34 @@ def read_geotiff(
 
         write_to_file = file_data
     elif file_path is not None and path_type == "url":
-        resp = urllib.request.urlopen(file_path)
-        # with tempfile.NamedTemporaryFile() as tmpfile:
-        #     tmpfile.write(resp.read())
-        #     path = tmpfile.name
-        write_to_file = resp.read()
+        # Cache URL downloads to avoid re-downloading in CI
+        cache_base = os.environ.get("VLLM_TEST_CACHE")
+        if cache_base:
+            cache_dir = os.path.join(cache_base, "prithvi")
+            os.makedirs(cache_dir, exist_ok=True)
+            url_hash = hashlib.sha256(file_path.encode()).hexdigest()[:16]
+            ext = os.path.splitext(file_path)[1] or ".tiff"
+            cached_path = os.path.join(cache_dir, f"{url_hash}{ext}")
+            if os.path.exists(cached_path):
+                path = cached_path
+            else:
+                resp = urllib.request.urlopen(file_path)
+                # Write to a temporary file and atomically rename
+                # to prevent race conditions with parallel tests.
+                with tempfile.NamedTemporaryFile(
+                    mode="wb", dir=cache_dir, delete=False
+                ) as tmp_file:
+                    tmp_file.write(resp.read())
+                    tmp_path = tmp_file.name
+                try:
+                    os.rename(tmp_path, cached_path)
+                except OSError:
+                    # Another process may have already written the file.
+                    os.remove(tmp_path)
+                path = cached_path
-                resp = urllib.request.urlopen(file_path)
-                with open(cached_path, "wb") as f:
-                    f.write(resp.read())
-                path = cached_path
+                resp = urllib.request.urlopen(file_path)
+                # To prevent race conditions, write to a temporary file and then atomically rename.
+                with tempfile.NamedTemporaryFile(mode="wb", dir=cache_dir, delete=False) as tmp_file:
+                    tmp_file.write(resp.read())
+                    tmp_path = tmp_file.name
+                try:
+                    os.rename(tmp_path, cached_path)
+                except OSError:
+                    # Another process might have already written the file.
+                    os.remove(tmp_path)
+                path = cached_path
-                resp = urllib.request.urlopen(file_path)
-                with open(cached_path, "wb") as f:
-                    f.write(resp.read())
-                path = cached_path
+                resp = urllib.request.urlopen(file_path)
+                # To prevent race conditions, write to a temporary file and then atomically rename.
+                with tempfile.NamedTemporaryFile(mode="wb", dir=cache_dir, delete=False) as tmp_file:
+                    tmp_file.write(resp.read())
+                    tmp_path = tmp_file.name
+                try:
+                    os.rename(tmp_path, cached_path)
+                except OSError:
+                    # Another process might have already written the file.
+                    os.remove(tmp_path)
+                path = cached_path
+        else:
+            resp = urllib.request.urlopen(file_path)
+            write_to_file = resp.read()
     elif file_path is not None and path_type == "path":
         path = file_path
     elif file_path is not None and path_type == "b64_json":

@@ -63,7 +63,7 @@ def predictable_llama_config_path(tmp_path_factory):
     # Create a simple tokenizer
     tokenizer = LlamaTokenizerFast.from_pretrained(
         "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
-        cache_dir=os.path.expanduser("~/.cache/huggingface"),
+        cache_dir=os.environ.get("HF_HOME", None),
     )
     tokenizer.save_pretrained(config_dir)