Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions .buildkite/scripts/hardware_ci/run-amd-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,18 @@ HF_CACHE="$(realpath ~)/huggingface"
mkdir -p "${HF_CACHE}"
HF_MOUNT="/root/.cache/huggingface"

MODELSCOPE_CACHE="$(realpath ~)/modelscope"
mkdir -p "${MODELSCOPE_CACHE}"
MODELSCOPE_MOUNT="/root/.cache/modelscope"

VLLM_TEST_CACHE="$(realpath ~)/vllm-test-cache"
mkdir -p "${VLLM_TEST_CACHE}"
VLLM_TEST_CACHE_MOUNT="/root/.cache/vllm-test-cache"

VLLM_CACHE="$(realpath ~)/vllm-cache"
mkdir -p "${VLLM_CACHE}"
VLLM_CACHE_MOUNT="/root/.cache/vllm"

# ---- Command source selection ----
# Prefer VLLM_TEST_COMMANDS (preserves all inner quoting intact).
# Fall back to $* for backward compatibility, but warn that inner
Expand Down Expand Up @@ -507,7 +519,14 @@ else
-e BUILDKITE_PARALLEL_JOB \
-e BUILDKITE_PARALLEL_JOB_COUNT \
-v "${HF_CACHE}:${HF_MOUNT}" \
-v "${MODELSCOPE_CACHE}:${MODELSCOPE_MOUNT}" \
-v "${VLLM_TEST_CACHE}:${VLLM_TEST_CACHE_MOUNT}" \
-v "${VLLM_CACHE}:${VLLM_CACHE_MOUNT}" \
-e "HF_HOME=${HF_MOUNT}" \
-e "MODELSCOPE_CACHE=${MODELSCOPE_MOUNT}" \
-e "VLLM_TEST_CACHE=${VLLM_TEST_CACHE_MOUNT}" \
-e "VLLM_CACHE_ROOT=${VLLM_CACHE_MOUNT}" \
-e "VLLM_MEDIA_CACHE=${VLLM_CACHE_MOUNT}/media_cache" \
-e "PYTHONPATH=${MYPYTHONPATH}" \
--name "${container_name}" \
"${image_name}" \
Expand Down
8 changes: 4 additions & 4 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1288,10 +1288,10 @@ def num_gpus_available():
return current_platform.device_count()


temp_dir = tempfile.gettempdir()
_dummy_opt_path = os.path.join(temp_dir, "dummy_opt")
_dummy_llava_path = os.path.join(temp_dir, "dummy_llava")
_dummy_gemma2_embedding_path = os.path.join(temp_dir, "dummy_gemma2_embedding")
_test_cache_dir = os.environ.get("VLLM_TEST_CACHE", tempfile.gettempdir())
_dummy_opt_path = os.path.join(_test_cache_dir, "dummy_opt")
_dummy_llava_path = os.path.join(_test_cache_dir, "dummy_llava")
_dummy_gemma2_embedding_path = os.path.join(_test_cache_dir, "dummy_gemma2_embedding")


@pytest.fixture
Expand Down
7 changes: 2 additions & 5 deletions tests/entrypoints/openai/test_token_in_token_out.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

import os
import tempfile

import pytest

from vllm.model_executor.model_loader.weight_utils import download_weights_from_hf
Expand All @@ -12,7 +9,7 @@
from ...utils import RemoteOpenAIServer

MODEL_NAME = "Qwen/Qwen3-0.6B"
MODEL_PATH = os.path.join(tempfile.gettempdir(), "qwen3_06b")
MODEL_PATH = MODEL_NAME


@pytest.fixture(scope="module")
Expand All @@ -21,7 +18,7 @@ def server():
MODEL_PATH = download_weights_from_hf(
MODEL_NAME,
allow_patterns=["*"],
cache_dir=MODEL_PATH,
cache_dir=None,
ignore_patterns=["tokenizer*", "vocab*", "*.safetensors"],
)
args = [
Expand Down
4 changes: 3 additions & 1 deletion tests/evals/gpt_oss/test_gpqa_correctness.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@
TOL = 0.05 # Absolute tolerance for accuracy comparison

# Path to tiktoken encoding files
TIKTOKEN_DATA_DIR = Path(__file__).parent / "data"
TIKTOKEN_DATA_DIR = (
Path(os.environ.get("VLLM_TEST_CACHE", str(Path(__file__).parent))) / "tiktoken"
)

# Tiktoken encoding files to download
TIKTOKEN_FILES = {
Expand Down
4 changes: 3 additions & 1 deletion tests/evals/gsm8k/gsm8k_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@
def download_and_cache_file(url: str, filename: str | None = None) -> str:
"""Download and cache a file from a URL."""
if filename is None:
filename = os.path.join("/tmp", url.split("/")[-1])
cache_dir = os.environ.get("VLLM_TEST_CACHE", "/tmp")
os.makedirs(os.path.join(cache_dir, "gsm8k"), exist_ok=True)
filename = os.path.join(cache_dir, "gsm8k", url.split("/")[-1])

if os.path.exists(filename):
return filename
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import base64
import datetime
import hashlib
import os
import tempfile
import urllib.request
Expand Down Expand Up @@ -113,11 +114,34 @@ def read_geotiff(

write_to_file = file_data
elif file_path is not None and path_type == "url":
resp = urllib.request.urlopen(file_path)
# with tempfile.NamedTemporaryFile() as tmpfile:
# tmpfile.write(resp.read())
# path = tmpfile.name
write_to_file = resp.read()
# Cache URL downloads to avoid re-downloading in CI
cache_base = os.environ.get("VLLM_TEST_CACHE")
if cache_base:
cache_dir = os.path.join(cache_base, "prithvi")
os.makedirs(cache_dir, exist_ok=True)
url_hash = hashlib.sha256(file_path.encode()).hexdigest()[:16]
ext = os.path.splitext(file_path)[1] or ".tiff"
cached_path = os.path.join(cache_dir, f"{url_hash}{ext}")
if os.path.exists(cached_path):
path = cached_path
else:
resp = urllib.request.urlopen(file_path)
# Write to a temporary file and atomically rename
# to prevent race conditions with parallel tests.
with tempfile.NamedTemporaryFile(
mode="wb", dir=cache_dir, delete=False
) as tmp_file:
tmp_file.write(resp.read())
tmp_path = tmp_file.name
try:
os.rename(tmp_path, cached_path)
except OSError:
# Another process may have already written the file.
os.remove(tmp_path)
path = cached_path
Comment on lines +128 to +141
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

There's a potential race condition here. If multiple tests running in parallel attempt to download and cache the same URL, they could write to cached_path simultaneously, resulting in a corrupted file. To ensure atomicity, it's safer to write the downloaded content to a temporary file and then atomically rename it to the final destination.

Suggested change
resp = urllib.request.urlopen(file_path)
with open(cached_path, "wb") as f:
f.write(resp.read())
path = cached_path
resp = urllib.request.urlopen(file_path)
# To prevent race conditions, write to a temporary file and then atomically rename.
with tempfile.NamedTemporaryFile(mode="wb", dir=cache_dir, delete=False) as tmp_file:
tmp_file.write(resp.read())
tmp_path = tmp_file.name
try:
os.rename(tmp_path, cached_path)
except OSError:
# Another process might have already written the file.
os.remove(tmp_path)
path = cached_path

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done :)

else:
resp = urllib.request.urlopen(file_path)
write_to_file = resp.read()
elif file_path is not None and path_type == "path":
path = file_path
elif file_path is not None and path_type == "b64_json":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def predictable_llama_config_path(tmp_path_factory):
# Create a simple tokenizer
tokenizer = LlamaTokenizerFast.from_pretrained(
"TinyLlama/TinyLlama-1.1B-Chat-v1.0",
cache_dir=os.path.expanduser("~/.cache/huggingface"),
cache_dir=os.environ.get("HF_HOME", None),
)
tokenizer.save_pretrained(config_dir)

Expand Down
Loading