vllm-project · atalman · Jul 2, 2025 · Jul 2, 2025 · Jul 8, 2025 · Jul 8, 2025
diff --git a/.github/workflows/scripts/build.sh b/.github/workflows/scripts/build.sh
@@ -10,6 +10,7 @@ LD_LIBRARY_PATH=${cuda_home}/lib64:$LD_LIBRARY_PATH
 
 # Install requirements
 $python_executable -m pip install -r requirements/build.txt -r requirements/cuda.txt
+$python_executable -m pip install nvidia-cudnn-cu12==9.5.1.17 --force-reinstall
 
 # Limit the number of parallel jobs to avoid OOM
 export MAX_JOBS=1

@@ -49,7 +49,7 @@ repos:
   rev: 0.6.17
   hooks:
     - id: pip-compile
-      args: [requirements/test.in, -o, requirements/test.txt, --index-strategy, unsafe-best-match, --torch-backend, cu128]
+      args: [requirements/test.in, -o, requirements/test.txt, --index-strategy, unsafe-best-match, --extra-index-url, https://download.pytorch.org/whl/test/cu128]
       files: ^requirements/test\.(in|txt)$
 - repo: local
   hooks:

@@ -45,8 +45,8 @@ set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx942;gfx950;gfx1030;gfx1100;gfx1
 # requirements.txt files and should be kept consistent.  The ROCm torch
 # versions are derived from docker/Dockerfile.rocm
 #
-set(TORCH_SUPPORTED_VERSION_CUDA "2.7.1")
-set(TORCH_SUPPORTED_VERSION_ROCM "2.7.0")
+set(TORCH_SUPPORTED_VERSION_CUDA "2.8.0")
+set(TORCH_SUPPORTED_VERSION_ROCM "2.8.0")
 
 #
 # Try to find python package with an executable that exactly matches

diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -50,7 +50,7 @@ ARG UV_INDEX_URL=${PIP_INDEX_URL}
 ARG UV_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
 
 # PyTorch provides its own indexes for standard and nightly builds
-ARG PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl
+ARG PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl/test
 ARG PYTORCH_CUDA_NIGHTLY_INDEX_BASE_URL=https://download.pytorch.org/whl/nightly
 
 # PIP supports multiple authentication schemes, including keyring
@@ -157,6 +157,8 @@ COPY requirements/cuda.txt requirements/cuda.txt
 RUN --mount=type=cache,target=/root/.cache/uv \
     uv pip install --system -r requirements/cuda.txt \
     --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
+RUN --mount=type=cache,target=/root/.cache/uv \
+    uv pip install --system nvidia-cudnn-cu12==9.5.1.17 --force-reinstall
 
 # cuda arch list used by torch
 # can be useful for both `dev` and `test`
@@ -376,6 +378,16 @@ RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist
     uv pip install --system dist/*.whl --verbose \
         --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
 
+# TODO (huydhn): Remove this once xformers is released for 2.8.0
+# https://pytorch.s3.us-east-1.amazonaws.com/whl/test/cu128/xformers/xformers-0.0.30%2B4cf69f09.d20250708-cp312-cp312-linux_x86_64.whl
+RUN --mount=type=cache,target=/root/.cache/uv bash - <<'BASH'
+    . /etc/environment
+    export TORCH_CUDA_ARCH_LIST='7.0 7.5 8.0 8.9 9.0 10.0 12.0'
+    uv pip install --system --no-build-isolation "git+https://github.com/facebookresearch/xformers@v0.0.30"
+    # DEBUG
+    python3 -m xformers.info
+BASH
+
 # If we need to build FlashInfer wheel before its release:
 # $ # Note we remove 7.0 from the arch list compared to the list below, since FlashInfer only supports sm75+
 # $ export TORCH_CUDA_ARCH_LIST='7.5 8.0 8.9 9.0a 10.0a 12.0'
@@ -388,6 +400,9 @@ RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist
 # -rw-rw-r-- 1 mgoin mgoin 205M Jun  9 18:03 flashinfer_python-0.2.6.post1-cp39-abi3-linux_x86_64.whl
 # $ # upload the wheel to a public location, e.g. https://wheels.vllm.ai/flashinfer/v0.2.6.post1/flashinfer_python-0.2.6.post1-cp39-abi3-linux_x86_64.whl
 
+# Allow specifying a version, Git revision or local .whl file
+ARG FLASHINFER_CUDA128_INDEX_URL="https://download.pytorch.org/whl/test/cu128/flashinfer"
+ARG FLASHINFER_CUDA128_WHEEL="flashinfer_python-0.2.6.post1%2Bcu128torch2.8-cp39-abi3-linux_x86_64.whl"
 # Install FlashInfer from source
 ARG FLASHINFER_GIT_REPO="https://github.com/flashinfer-ai/flashinfer.git"
 ARG FLASHINFER_GIT_REF="v0.2.8rc1"

diff --git a/pyproject.toml b/pyproject.toml
@@ -6,7 +6,8 @@ requires = [
     "packaging>=24.2",
     "setuptools>=77.0.3,<80.0.0",
     "setuptools-scm>=8.0",
-    "torch == 2.7.1",
+    "torch == 2.8.0",
+    "nvidia-cudnn-cu12==9.5.1.17",
     "wheel",
     "jinja2",
 ]

diff --git a/requirements/build.txt b/requirements/build.txt
@@ -4,7 +4,8 @@ ninja
 packaging>=24.2
 setuptools>=77.0.3,<80.0.0
 setuptools-scm>=8
-torch==2.7.1
+torch==2.8.0
 wheel
 jinja2>=3.1.6
 regex
+build
diff --git a/requirements/cpu.txt b/requirements/cpu.txt
@@ -7,18 +7,18 @@ numba == 0.61.2; python_version > '3.9'
 # Dependencies for CPUs
 packaging>=24.2
 setuptools>=77.0.3,<80.0.0
---extra-index-url https://download.pytorch.org/whl/cpu
+--extra-index-url https://download.pytorch.org/whl/test/cpu
 torch==2.6.0+cpu; platform_machine == "x86_64" # torch>2.6.0+cpu has performance regression on x86 platform, see https://github.com/pytorch/pytorch/pull/151218
-torch==2.7.0; platform_system == "Darwin"
-torch==2.7.0; platform_machine == "ppc64le" or platform_machine == "aarch64"
+torch==2.8.0; platform_system == "Darwin"
+torch==2.8.0; platform_machine == "ppc64le" or platform_machine == "aarch64"
 
 # required for the image processor of minicpm-o-2_6, this must be updated alongside torch
 torchaudio; platform_machine != "ppc64le" and platform_machine != "s390x"
-torchaudio==2.7.0; platform_machine == "ppc64le"
+torchaudio==2.8.0; platform_machine == "ppc64le"
 
 # required for the image processor of phi3v, this must be updated alongside torch
 torchvision; platform_machine != "ppc64le" and platform_machine != "s390x"
-torchvision==0.22.0; platform_machine == "ppc64le"
+torchvision==0.23.0; platform_machine == "ppc64le"
 datasets # for benchmark scripts
 
 # Intel Extension for PyTorch, only for x86_64 CPUs

diff --git a/requirements/cuda.txt b/requirements/cuda.txt
@@ -6,9 +6,10 @@ numba == 0.61.2; python_version > '3.9'
 
 # Dependencies for NVIDIA GPUs
 ray[cgraph]>=2.43.0, !=2.44.* # Ray Compiled Graph, required for pipeline parallelism in V1.
-torch==2.7.1
-torchaudio==2.7.1
+torch==2.8.0
+torchaudio==2.8.0
 # These must be updated alongside torch
-torchvision==0.22.1 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
+torchvision==0.23.0 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
+# TODO (huydhn): Re-enable this once xformers is released for 2.8.0
 # https://github.com/facebookresearch/xformers/releases/tag/v0.0.31
-xformers==0.0.31; platform_system == 'Linux' and platform_machine == 'x86_64'  # Requires PyTorch >= 2.7
+# xformers==0.0.31; platform_system == 'Linux' and platform_machine == 'x86_64'  # Requires PyTorch >= 2.7
@@ -1,10 +1,10 @@
 # Common dependencies
 -r common.txt
 
---extra-index-url https://download.pytorch.org/whl/rocm6.2.4
-torch==2.7.0
-torchvision==0.22.0
-torchaudio==2.7.0
+--extra-index-url https://download.pytorch.org/whl/test/rocm6.3
+torch==2.8.0
+torchvision==0.23.0
+torchaudio==2.8.0
 
 triton==3.2
 cmake>=3.26.1,<4

diff --git a/requirements/test.in b/requirements/test.in
@@ -22,9 +22,11 @@ sentence-transformers # required for embedding tests
 soundfile # required for audio tests
 jiwer # required for audio tests
 timm # required for internvl test
-torch==2.7.1
-torchaudio==2.7.1
-torchvision==0.22.1
+--extra-index-url https://download.pytorch.org/whl/test/cu128
+torch==2.8.0
+torchaudio==2.8.0
+torchvision==0.23.0
+nvidia-cudnn-cu12==9.5.1.17
 transformers_stream_generator # required for qwen-vl test
 mamba_ssm # required for plamo2 test
 matplotlib # required for qwen-vl test

diff --git a/requirements/test.txt b/requirements/test.txt
@@ -1,5 +1,5 @@
 # This file was autogenerated by uv via the following command:
-#    uv pip compile requirements/test.in -o requirements/test.txt --index-strategy unsafe-best-match --torch-backend cu128
+#    uv pip compile requirements/test.in -o requirements/test.txt --index-strategy unsafe-best-match
 absl-py==2.1.0
     # via rouge-score
 accelerate==1.0.1
@@ -380,42 +380,42 @@ numpy==1.26.4
     #   transformers
     #   tritonclient
     #   vocos
-nvidia-cublas-cu12==12.8.3.14
+nvidia-cublas-cu12==12.8.4.1
     # via
     #   nvidia-cudnn-cu12
     #   nvidia-cusolver-cu12
     #   torch
-nvidia-cuda-cupti-cu12==12.8.57
+nvidia-cuda-cupti-cu12==12.8.90
     # via torch
-nvidia-cuda-nvrtc-cu12==12.8.61
+nvidia-cuda-nvrtc-cu12==12.8.93
     # via torch
-nvidia-cuda-runtime-cu12==12.8.57
+nvidia-cuda-runtime-cu12==12.8.90
     # via torch
-nvidia-cudnn-cu12==9.7.1.26
+nvidia-cudnn-cu12==9.5.1.17
     # via torch
-nvidia-cufft-cu12==11.3.3.41
+nvidia-cufft-cu12==11.3.3.83
     # via torch
-nvidia-cufile-cu12==1.13.0.11
+nvidia-cufile-cu12==1.13.1.3
     # via torch
-nvidia-curand-cu12==10.3.9.55
+nvidia-curand-cu12==10.3.9.90
     # via torch
-nvidia-cusolver-cu12==11.7.2.55
+nvidia-cusolver-cu12==11.7.3.90
     # via torch
-nvidia-cusparse-cu12==12.5.7.53
+nvidia-cusparse-cu12==12.5.8.93
     # via
     #   nvidia-cusolver-cu12
     #   torch
-nvidia-cusparselt-cu12==0.6.3
+nvidia-cusparselt-cu12==0.7.1
     # via torch
-nvidia-nccl-cu12==2.26.2
+nvidia-nccl-cu12==2.27.3
     # via torch
-nvidia-nvjitlink-cu12==12.8.61
+nvidia-nvjitlink-cu12==12.8.93
     # via
     #   nvidia-cufft-cu12
     #   nvidia-cusolver-cu12
     #   nvidia-cusparse-cu12
     #   torch
-nvidia-nvtx-cu12==12.8.55
+nvidia-nvtx-cu12==12.8.90
     # via torch
 open-clip-torch==2.32.0
     # via -r requirements/test.in
@@ -771,7 +771,7 @@ tomli==2.2.1
     # via schemathesis
 tomli-w==1.2.0
     # via schemathesis
-torch==2.7.1+cu128
+torch==2.8.0+cu128
     # via
     #   -r requirements/test.in
     #   accelerate
@@ -791,12 +791,12 @@ torch==2.7.1+cu128
     #   torchvision
     #   vector-quantize-pytorch
     #   vocos
-torchaudio==2.7.1+cu128
+torchaudio==2.8.0+cu128
     # via
     #   -r requirements/test.in
     #   encodec
     #   vocos
-torchvision==0.22.1+cu128
+torchvision==0.23.0+cu128
     # via
     #   -r requirements/test.in
     #   open-clip-torch
@@ -828,7 +828,7 @@ transformers==4.53.2
     #   transformers-stream-generator
 transformers-stream-generator==0.0.5
     # via -r requirements/test.in
-triton==3.3.1
+triton==3.4.0
     # via torch
 tritonclient==2.51.0
     # via

diff --git a/tests/distributed/test_sequence_parallel.py b/tests/distributed/test_sequence_parallel.py
@@ -292,7 +292,7 @@ def _compare_sp(
     # TODO support other models
     # [LANGUAGE GENERATION]
     "meta-llama/Llama-3.2-1B-Instruct",
-    "RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8"
+    "RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8",
 ]
 
 

@@ -36,11 +36,11 @@
     ],
     [
         "The image shows a Venn diagram with three over",
-        "The image shows a Venn diagram with three intersect",
+        "This image shows a Venn diagram with three over",
     ],
     [
         "This image displays a gradient of colors ranging from",
-        "The image displays a gradient of colors ranging from",
+        "This image displays a gradient of colors transitioning from",
     ],
 ]
 

diff --git a/tests/standalone_tests/python_only_compile.sh b/tests/standalone_tests/python_only_compile.sh
@@ -18,7 +18,11 @@ apt autoremove -y
 
 echo 'import os; os.system("touch /tmp/changed.file")' >> vllm/__init__.py
 
-VLLM_TEST_USE_PRECOMPILED_NIGHTLY_WHEEL=1 VLLM_USE_PRECOMPILED=1 pip3 install -vvv -e .
+# TESTING, TO BE REMOVED
+VLLM_TEST_USE_PRECOMPILED_NIGHTLY_WHEEL=1 VLLM_USE_PRECOMPILED=1 pip3 install -vvv -e . \
+  --extra-index-url https://download.pytorch.org/whl/test/cu128
+
+pip3 install nvidia-cudnn-cu12==9.5.1.17 --force-reinstall
-# TESTING, TO BE REMOVED
-VLLM_TEST_USE_PRECOMPILED_NIGHTLY_WHEEL=1 VLLM_USE_PRECOMPILED=1 pip3 install -vvv -e . \
-  --extra-index-url https://download.pytorch.org/whl/test/cu128
-
-pip3 install nvidia-cudnn-cu12==9.5.1.17 --force-reinstall
+VLLM_TEST_USE_PRECOMPILED_NIGHTLY_WHEEL=1 VLLM_USE_PRECOMPILED=1 pip3 install -vvv -e .
-# TESTING, TO BE REMOVED
-VLLM_TEST_USE_PRECOMPILED_NIGHTLY_WHEEL=1 VLLM_USE_PRECOMPILED=1 pip3 install -vvv -e . \
-  --extra-index-url https://download.pytorch.org/whl/test/cu128
-
-pip3 install nvidia-cudnn-cu12==9.5.1.17 --force-reinstall
+VLLM_TEST_USE_PRECOMPILED_NIGHTLY_WHEEL=1 VLLM_USE_PRECOMPILED=1 pip3 install -vvv -e .
 
 # Run the script
 python3 -c 'import vllm'