diff --git a/.buildkite/scripts/hardware_ci/run-amd-test.sh b/.buildkite/scripts/hardware_ci/run-amd-test.sh index 1c43c404d247..c83f2a34713c 100755 --- a/.buildkite/scripts/hardware_ci/run-amd-test.sh +++ b/.buildkite/scripts/hardware_ci/run-amd-test.sh @@ -394,6 +394,18 @@ HF_CACHE="$(realpath ~)/huggingface" mkdir -p "${HF_CACHE}" HF_MOUNT="/root/.cache/huggingface" +MODELSCOPE_CACHE="$(realpath ~)/modelscope" +mkdir -p "${MODELSCOPE_CACHE}" +MODELSCOPE_MOUNT="/root/.cache/modelscope" + +VLLM_TEST_CACHE="$(realpath ~)/vllm-test-cache" +mkdir -p "${VLLM_TEST_CACHE}" +VLLM_TEST_CACHE_MOUNT="/root/.cache/vllm-test-cache" + +VLLM_CACHE="$(realpath ~)/vllm-cache" +mkdir -p "${VLLM_CACHE}" +VLLM_CACHE_MOUNT="/root/.cache/vllm" + # ---- Command source selection ---- # Prefer VLLM_TEST_COMMANDS (preserves all inner quoting intact). # Fall back to $* for backward compatibility, but warn that inner @@ -507,7 +519,14 @@ else -e BUILDKITE_PARALLEL_JOB \ -e BUILDKITE_PARALLEL_JOB_COUNT \ -v "${HF_CACHE}:${HF_MOUNT}" \ + -v "${MODELSCOPE_CACHE}:${MODELSCOPE_MOUNT}" \ + -v "${VLLM_TEST_CACHE}:${VLLM_TEST_CACHE_MOUNT}" \ + -v "${VLLM_CACHE}:${VLLM_CACHE_MOUNT}" \ -e "HF_HOME=${HF_MOUNT}" \ + -e "MODELSCOPE_CACHE=${MODELSCOPE_MOUNT}" \ + -e "VLLM_TEST_CACHE=${VLLM_TEST_CACHE_MOUNT}" \ + -e "VLLM_CACHE_ROOT=${VLLM_CACHE_MOUNT}" \ + -e "VLLM_MEDIA_CACHE=${VLLM_CACHE_MOUNT}/media_cache" \ -e "PYTHONPATH=${MYPYTHONPATH}" \ --name "${container_name}" \ "${image_name}" \ diff --git a/tests/conftest.py b/tests/conftest.py index 719bfa5ed1f0..a0f8e24816ef 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1288,10 +1288,10 @@ def num_gpus_available(): return current_platform.device_count() -temp_dir = tempfile.gettempdir() -_dummy_opt_path = os.path.join(temp_dir, "dummy_opt") -_dummy_llava_path = os.path.join(temp_dir, "dummy_llava") -_dummy_gemma2_embedding_path = os.path.join(temp_dir, "dummy_gemma2_embedding") +_test_cache_dir = os.environ.get("VLLM_TEST_CACHE", tempfile.gettempdir()) +_dummy_opt_path = os.path.join(_test_cache_dir, "dummy_opt") +_dummy_llava_path = os.path.join(_test_cache_dir, "dummy_llava") +_dummy_gemma2_embedding_path = os.path.join(_test_cache_dir, "dummy_gemma2_embedding") @pytest.fixture diff --git a/tests/entrypoints/openai/test_token_in_token_out.py b/tests/entrypoints/openai/test_token_in_token_out.py index c7f8abe27e6e..d3b40655ee25 100644 --- a/tests/entrypoints/openai/test_token_in_token_out.py +++ b/tests/entrypoints/openai/test_token_in_token_out.py @@ -1,9 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import os -import tempfile - import pytest from vllm.model_executor.model_loader.weight_utils import download_weights_from_hf @@ -12,7 +9,7 @@ from ...utils import RemoteOpenAIServer MODEL_NAME = "Qwen/Qwen3-0.6B" -MODEL_PATH = os.path.join(tempfile.gettempdir(), "qwen3_06b") +MODEL_PATH = MODEL_NAME @pytest.fixture(scope="module") @@ -21,7 +18,7 @@ def server(): MODEL_PATH = download_weights_from_hf( MODEL_NAME, allow_patterns=["*"], - cache_dir=MODEL_PATH, + cache_dir=None, ignore_patterns=["tokenizer*", "vocab*", "*.safetensors"], ) args = [ diff --git a/tests/evals/gpt_oss/test_gpqa_correctness.py b/tests/evals/gpt_oss/test_gpqa_correctness.py index 63188ec40767..3fb6cb9ded3d 100644 --- a/tests/evals/gpt_oss/test_gpqa_correctness.py +++ b/tests/evals/gpt_oss/test_gpqa_correctness.py @@ -23,7 +23,9 @@ TOL = 0.05 # Absolute tolerance for accuracy comparison # Path to tiktoken encoding files -TIKTOKEN_DATA_DIR = Path(__file__).parent / "data" +TIKTOKEN_DATA_DIR = ( + Path(os.environ.get("VLLM_TEST_CACHE", str(Path(__file__).parent))) / "tiktoken" +) # Tiktoken encoding files to download TIKTOKEN_FILES = { diff --git a/tests/evals/gsm8k/gsm8k_eval.py b/tests/evals/gsm8k/gsm8k_eval.py index 647c149ef5fd..db9bbcb96e8a 100644 --- a/tests/evals/gsm8k/gsm8k_eval.py +++ b/tests/evals/gsm8k/gsm8k_eval.py @@ -25,7 +25,9 @@ def download_and_cache_file(url: str, filename: str | None = None) -> str: """Download and cache a file from a URL.""" if filename is None: - filename = os.path.join("/tmp", url.split("/")[-1]) + cache_dir = os.environ.get("VLLM_TEST_CACHE", "/tmp") + os.makedirs(os.path.join(cache_dir, "gsm8k"), exist_ok=True) + filename = os.path.join(cache_dir, "gsm8k", url.split("/")[-1]) if os.path.exists(filename): return filename diff --git a/tests/plugins/prithvi_io_processor_plugin/prithvi_io_processor/prithvi_processor.py b/tests/plugins/prithvi_io_processor_plugin/prithvi_io_processor/prithvi_processor.py index b22239fcc267..80528fe8ea4e 100644 --- a/tests/plugins/prithvi_io_processor_plugin/prithvi_io_processor/prithvi_processor.py +++ b/tests/plugins/prithvi_io_processor_plugin/prithvi_io_processor/prithvi_processor.py @@ -3,6 +3,7 @@ import base64 import datetime +import hashlib import os import tempfile import urllib.request @@ -113,11 +114,34 @@ def read_geotiff( write_to_file = file_data elif file_path is not None and path_type == "url": - resp = urllib.request.urlopen(file_path) - # with tempfile.NamedTemporaryFile() as tmpfile: - # tmpfile.write(resp.read()) - # path = tmpfile.name - write_to_file = resp.read() + # Cache URL downloads to avoid re-downloading in CI + cache_base = os.environ.get("VLLM_TEST_CACHE") + if cache_base: + cache_dir = os.path.join(cache_base, "prithvi") + os.makedirs(cache_dir, exist_ok=True) + url_hash = hashlib.sha256(file_path.encode()).hexdigest()[:16] + ext = os.path.splitext(file_path)[1] or ".tiff" + cached_path = os.path.join(cache_dir, f"{url_hash}{ext}") + if os.path.exists(cached_path): + path = cached_path + else: + resp = urllib.request.urlopen(file_path) + # Write to a temporary file and atomically rename + # to prevent race conditions with parallel tests. + with tempfile.NamedTemporaryFile( + mode="wb", dir=cache_dir, delete=False + ) as tmp_file: + tmp_file.write(resp.read()) + tmp_path = tmp_file.name + try: + os.rename(tmp_path, cached_path) + except OSError: + # Another process may have already written the file. + os.remove(tmp_path) + path = cached_path + else: + resp = urllib.request.urlopen(file_path) + write_to_file = resp.read() elif file_path is not None and path_type == "path": path = file_path elif file_path is not None and path_type == "b64_json": diff --git a/tests/v1/kv_connector/extract_hidden_states_integration/test_extraction.py b/tests/v1/kv_connector/extract_hidden_states_integration/test_extraction.py index 6a8c64152fec..695e9ea9c00c 100644 --- a/tests/v1/kv_connector/extract_hidden_states_integration/test_extraction.py +++ b/tests/v1/kv_connector/extract_hidden_states_integration/test_extraction.py @@ -63,7 +63,7 @@ def predictable_llama_config_path(tmp_path_factory): # Create a simple tokenizer tokenizer = LlamaTokenizerFast.from_pretrained( "TinyLlama/TinyLlama-1.1B-Chat-v1.0", - cache_dir=os.path.expanduser("~/.cache/huggingface"), + cache_dir=os.environ.get("HF_HOME", None), ) tokenizer.save_pretrained(config_dir)