Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/scripts/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ LD_LIBRARY_PATH=${cuda_home}/lib64:$LD_LIBRARY_PATH

# Install requirements
$python_executable -m pip install -r requirements/build.txt -r requirements/cuda.txt
$python_executable -m pip install nvidia-cudnn-cu12==9.5.1.17 --force-reinstall

# Limit the number of parallel jobs to avoid OOM
export MAX_JOBS=1
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ repos:
rev: 0.6.17
hooks:
- id: pip-compile
args: [requirements/test.in, -o, requirements/test.txt, --index-strategy, unsafe-best-match, --torch-backend, cu128]
args: [requirements/test.in, -o, requirements/test.txt, --index-strategy, unsafe-best-match, --extra-index-url, https://download.pytorch.org/whl/test/cu128]
files: ^requirements/test\.(in|txt)$
- repo: local
hooks:
Expand Down
4 changes: 2 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx942;gfx950;gfx1030;gfx1100;gfx1
# requirements.txt files and should be kept consistent. The ROCm torch
# versions are derived from docker/Dockerfile.rocm
#
set(TORCH_SUPPORTED_VERSION_CUDA "2.7.1")
set(TORCH_SUPPORTED_VERSION_ROCM "2.7.0")
set(TORCH_SUPPORTED_VERSION_CUDA "2.8.0")
set(TORCH_SUPPORTED_VERSION_ROCM "2.8.0")

#
# Try to find python package with an executable that exactly matches
Expand Down
17 changes: 16 additions & 1 deletion docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ ARG UV_INDEX_URL=${PIP_INDEX_URL}
ARG UV_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}

# PyTorch provides its own indexes for standard and nightly builds
ARG PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl
ARG PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl/test
ARG PYTORCH_CUDA_NIGHTLY_INDEX_BASE_URL=https://download.pytorch.org/whl/nightly

# PIP supports multiple authentication schemes, including keyring
Expand Down Expand Up @@ -157,6 +157,8 @@ COPY requirements/cuda.txt requirements/cuda.txt
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install --system -r requirements/cuda.txt \
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install --system nvidia-cudnn-cu12==9.5.1.17 --force-reinstall

# cuda arch list used by torch
# can be useful for both `dev` and `test`
Expand Down Expand Up @@ -376,6 +378,16 @@ RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist
uv pip install --system dist/*.whl --verbose \
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')

# TODO (huydhn): Remove this once xformers is released for 2.8.0
# https://pytorch.s3.us-east-1.amazonaws.com/whl/test/cu128/xformers/xformers-0.0.30%2B4cf69f09.d20250708-cp312-cp312-linux_x86_64.whl
RUN --mount=type=cache,target=/root/.cache/uv bash - <<'BASH'
. /etc/environment
export TORCH_CUDA_ARCH_LIST='7.0 7.5 8.0 8.9 9.0 10.0 12.0'
uv pip install --system --no-build-isolation "git+https://github.com/facebookresearch/xformers@v0.0.30"
# DEBUG
python3 -m xformers.info
Comment on lines +387 to +388
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The DEBUG comment and the python3 -m xformers.info command appear to be for debugging purposes. Please remove them before merging to keep the Dockerfile clean.

BASH

# If we need to build FlashInfer wheel before its release:
# $ # Note we remove 7.0 from the arch list compared to the list below, since FlashInfer only supports sm75+
# $ export TORCH_CUDA_ARCH_LIST='7.5 8.0 8.9 9.0a 10.0a 12.0'
Expand All @@ -388,6 +400,9 @@ RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist
# -rw-rw-r-- 1 mgoin mgoin 205M Jun 9 18:03 flashinfer_python-0.2.6.post1-cp39-abi3-linux_x86_64.whl
# $ # upload the wheel to a public location, e.g. https://wheels.vllm.ai/flashinfer/v0.2.6.post1/flashinfer_python-0.2.6.post1-cp39-abi3-linux_x86_64.whl

# Allow specifying a version, Git revision or local .whl file
ARG FLASHINFER_CUDA128_INDEX_URL="https://download.pytorch.org/whl/test/cu128/flashinfer"
ARG FLASHINFER_CUDA128_WHEEL="flashinfer_python-0.2.6.post1%2Bcu128torch2.8-cp39-abi3-linux_x86_64.whl"
# Install FlashInfer from source
ARG FLASHINFER_GIT_REPO="https://github.com/flashinfer-ai/flashinfer.git"
ARG FLASHINFER_GIT_REF="v0.2.8rc1"
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ requires = [
"packaging>=24.2",
"setuptools>=77.0.3,<80.0.0",
"setuptools-scm>=8.0",
"torch == 2.7.1",
"torch == 2.8.0",
"nvidia-cudnn-cu12==9.5.1.17",
"wheel",
"jinja2",
]
Expand Down
3 changes: 2 additions & 1 deletion requirements/build.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ ninja
packaging>=24.2
setuptools>=77.0.3,<80.0.0
setuptools-scm>=8
torch==2.7.1
torch==2.8.0
wheel
jinja2>=3.1.6
regex
build
10 changes: 5 additions & 5 deletions requirements/cpu.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,18 @@ numba == 0.61.2; python_version > '3.9'
# Dependencies for CPUs
packaging>=24.2
setuptools>=77.0.3,<80.0.0
--extra-index-url https://download.pytorch.org/whl/cpu
--extra-index-url https://download.pytorch.org/whl/test/cpu
torch==2.6.0+cpu; platform_machine == "x86_64" # torch>2.6.0+cpu has performance regression on x86 platform, see https://github.com/pytorch/pytorch/pull/151218
torch==2.7.0; platform_system == "Darwin"
torch==2.7.0; platform_machine == "ppc64le" or platform_machine == "aarch64"
torch==2.8.0; platform_system == "Darwin"
torch==2.8.0; platform_machine == "ppc64le" or platform_machine == "aarch64"

# required for the image processor of minicpm-o-2_6, this must be updated alongside torch
torchaudio; platform_machine != "ppc64le" and platform_machine != "s390x"
torchaudio==2.7.0; platform_machine == "ppc64le"
torchaudio==2.8.0; platform_machine == "ppc64le"

# required for the image processor of phi3v, this must be updated alongside torch
torchvision; platform_machine != "ppc64le" and platform_machine != "s390x"
torchvision==0.22.0; platform_machine == "ppc64le"
torchvision==0.23.0; platform_machine == "ppc64le"
datasets # for benchmark scripts

# Intel Extension for PyTorch, only for x86_64 CPUs
Expand Down
9 changes: 5 additions & 4 deletions requirements/cuda.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@ numba == 0.61.2; python_version > '3.9'

# Dependencies for NVIDIA GPUs
ray[cgraph]>=2.43.0, !=2.44.* # Ray Compiled Graph, required for pipeline parallelism in V1.
torch==2.7.1
torchaudio==2.7.1
torch==2.8.0
torchaudio==2.8.0
# These must be updated alongside torch
torchvision==0.22.1 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
torchvision==0.23.0 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
# TODO (huydhn): Re-enable this once xformers is released for 2.8.0
# https://github.com/facebookresearch/xformers/releases/tag/v0.0.31
xformers==0.0.31; platform_system == 'Linux' and platform_machine == 'x86_64' # Requires PyTorch >= 2.7
# xformers==0.0.31; platform_system == 'Linux' and platform_machine == 'x86_64' # Requires PyTorch >= 2.7
8 changes: 4 additions & 4 deletions requirements/rocm-build.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
# Common dependencies
-r common.txt

--extra-index-url https://download.pytorch.org/whl/rocm6.2.4
torch==2.7.0
torchvision==0.22.0
torchaudio==2.7.0
--extra-index-url https://download.pytorch.org/whl/test/rocm6.3
torch==2.8.0
torchvision==0.23.0
torchaudio==2.8.0

triton==3.2
cmake>=3.26.1,<4
Expand Down
8 changes: 5 additions & 3 deletions requirements/test.in
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,11 @@ sentence-transformers # required for embedding tests
soundfile # required for audio tests
jiwer # required for audio tests
timm # required for internvl test
torch==2.7.1
torchaudio==2.7.1
torchvision==0.22.1
--extra-index-url https://download.pytorch.org/whl/test/cu128
torch==2.8.0
torchaudio==2.8.0
torchvision==0.23.0
nvidia-cudnn-cu12==9.5.1.17
transformers_stream_generator # required for qwen-vl test
mamba_ssm # required for plamo2 test
matplotlib # required for qwen-vl test
Expand Down
38 changes: 19 additions & 19 deletions requirements/test.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# This file was autogenerated by uv via the following command:
# uv pip compile requirements/test.in -o requirements/test.txt --index-strategy unsafe-best-match --torch-backend cu128
# uv pip compile requirements/test.in -o requirements/test.txt --index-strategy unsafe-best-match
absl-py==2.1.0
# via rouge-score
accelerate==1.0.1
Expand Down Expand Up @@ -380,42 +380,42 @@ numpy==1.26.4
# transformers
# tritonclient
# vocos
nvidia-cublas-cu12==12.8.3.14
nvidia-cublas-cu12==12.8.4.1
# via
# nvidia-cudnn-cu12
# nvidia-cusolver-cu12
# torch
nvidia-cuda-cupti-cu12==12.8.57
nvidia-cuda-cupti-cu12==12.8.90
# via torch
nvidia-cuda-nvrtc-cu12==12.8.61
nvidia-cuda-nvrtc-cu12==12.8.93
# via torch
nvidia-cuda-runtime-cu12==12.8.57
nvidia-cuda-runtime-cu12==12.8.90
# via torch
nvidia-cudnn-cu12==9.7.1.26
nvidia-cudnn-cu12==9.5.1.17
# via torch
nvidia-cufft-cu12==11.3.3.41
nvidia-cufft-cu12==11.3.3.83
# via torch
nvidia-cufile-cu12==1.13.0.11
nvidia-cufile-cu12==1.13.1.3
# via torch
nvidia-curand-cu12==10.3.9.55
nvidia-curand-cu12==10.3.9.90
# via torch
nvidia-cusolver-cu12==11.7.2.55
nvidia-cusolver-cu12==11.7.3.90
# via torch
nvidia-cusparse-cu12==12.5.7.53
nvidia-cusparse-cu12==12.5.8.93
# via
# nvidia-cusolver-cu12
# torch
nvidia-cusparselt-cu12==0.6.3
nvidia-cusparselt-cu12==0.7.1
# via torch
nvidia-nccl-cu12==2.26.2
nvidia-nccl-cu12==2.27.3
# via torch
nvidia-nvjitlink-cu12==12.8.61
nvidia-nvjitlink-cu12==12.8.93
# via
# nvidia-cufft-cu12
# nvidia-cusolver-cu12
# nvidia-cusparse-cu12
# torch
nvidia-nvtx-cu12==12.8.55
nvidia-nvtx-cu12==12.8.90
# via torch
open-clip-torch==2.32.0
# via -r requirements/test.in
Expand Down Expand Up @@ -771,7 +771,7 @@ tomli==2.2.1
# via schemathesis
tomli-w==1.2.0
# via schemathesis
torch==2.7.1+cu128
torch==2.8.0+cu128
# via
# -r requirements/test.in
# accelerate
Expand All @@ -791,12 +791,12 @@ torch==2.7.1+cu128
# torchvision
# vector-quantize-pytorch
# vocos
torchaudio==2.7.1+cu128
torchaudio==2.8.0+cu128
# via
# -r requirements/test.in
# encodec
# vocos
torchvision==0.22.1+cu128
torchvision==0.23.0+cu128
# via
# -r requirements/test.in
# open-clip-torch
Expand Down Expand Up @@ -828,7 +828,7 @@ transformers==4.53.2
# transformers-stream-generator
transformers-stream-generator==0.0.5
# via -r requirements/test.in
triton==3.3.1
triton==3.4.0
# via torch
tritonclient==2.51.0
# via
Expand Down
2 changes: 1 addition & 1 deletion tests/distributed/test_sequence_parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@ def _compare_sp(
# TODO support other models
# [LANGUAGE GENERATION]
"meta-llama/Llama-3.2-1B-Instruct",
"RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8"
"RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8",
]


Expand Down
4 changes: 2 additions & 2 deletions tests/entrypoints/openai/test_vision.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,11 @@
],
[
"The image shows a Venn diagram with three over",
"The image shows a Venn diagram with three intersect",
"This image shows a Venn diagram with three over",
],
[
"This image displays a gradient of colors ranging from",
"The image displays a gradient of colors ranging from",
"This image displays a gradient of colors transitioning from",
],
]

Expand Down
6 changes: 5 additions & 1 deletion tests/standalone_tests/python_only_compile.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,11 @@ apt autoremove -y

echo 'import os; os.system("touch /tmp/changed.file")' >> vllm/__init__.py

VLLM_TEST_USE_PRECOMPILED_NIGHTLY_WHEEL=1 VLLM_USE_PRECOMPILED=1 pip3 install -vvv -e .
# TESTING, TO BE REMOVED
VLLM_TEST_USE_PRECOMPILED_NIGHTLY_WHEEL=1 VLLM_USE_PRECOMPILED=1 pip3 install -vvv -e . \
--extra-index-url https://download.pytorch.org/whl/test/cu128

pip3 install nvidia-cudnn-cu12==9.5.1.17 --force-reinstall
Comment on lines +21 to +25
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

critical

These changes are marked as temporary for testing and should be removed. Merging this would introduce test-specific logic into a test script that should reflect the final installation process. Please revert these changes to the original pip3 install command.

Suggested change
# TESTING, TO BE REMOVED
VLLM_TEST_USE_PRECOMPILED_NIGHTLY_WHEEL=1 VLLM_USE_PRECOMPILED=1 pip3 install -vvv -e . \
--extra-index-url https://download.pytorch.org/whl/test/cu128
pip3 install nvidia-cudnn-cu12==9.5.1.17 --force-reinstall
VLLM_TEST_USE_PRECOMPILED_NIGHTLY_WHEEL=1 VLLM_USE_PRECOMPILED=1 pip3 install -vvv -e .


# Run the script
python3 -c 'import vllm'
Expand Down
Loading