vllm-project · atalman · Jun 10, 2026 · Jun 10, 2026 · Jun 10, 2026 · Jun 10, 2026
@@ -1,10 +1,11 @@
 #!/bin/bash
 set -euox pipefail
 
-export VLLM_CPU_KVCACHE_SPACE=1 
+export VLLM_CPU_KVCACHE_SPACE=1
 export VLLM_CPU_CI_ENV=1
-# Reduce sub-processes for acceleration
-export TORCH_COMPILE_DISABLE=1 
+# Skip torch.compile via vLLM's --enforce-eager flag (passed below) instead of
+# TORCH_COMPILE_DISABLE=1, which torch 2.12 no longer treats as a silent no-op
+# when callers specify fullgraph=True.
 export VLLM_ENABLE_V1_MULTIPROCESSING=0
 
 SDE_ARCHIVE="sde-external-10.7.0-2026-02-18-lin.tar.xz"
@@ -49,15 +50,15 @@ wait_for_pid_and_check_log() {
 }
 
 # Test Sky Lake (AVX512F)
-./sde/sde64 -skl -- python3 examples/basic/offline_inference/generate.py --model facebook/opt-125m --dtype bfloat16 > test_0.log 2>&1 &
+./sde/sde64 -skl -- python3 examples/basic/offline_inference/generate.py --model facebook/opt-125m --dtype bfloat16 --enforce-eager > test_0.log 2>&1 &
 PID_TEST_0=$!
 
 # Test Cascade Lake (AVX512F + VNNI)
-./sde/sde64 -clx -- python3 examples/basic/offline_inference/generate.py --model facebook/opt-125m --dtype bfloat16 > test_1.log 2>&1 &
+./sde/sde64 -clx -- python3 examples/basic/offline_inference/generate.py --model facebook/opt-125m --dtype bfloat16 --enforce-eager > test_1.log 2>&1 &
 PID_TEST_1=$!
 
 # Test Cooper Lake (AVX512F + VNNI + BF16)
-./sde/sde64 -cpx -- python3 examples/basic/offline_inference/generate.py --model facebook/opt-125m --dtype bfloat16 > test_2.log 2>&1 &
+./sde/sde64 -cpx -- python3 examples/basic/offline_inference/generate.py --model facebook/opt-125m --dtype bfloat16 --enforce-eager > test_2.log 2>&1 &
 PID_TEST_2=$!
 
 wait_for_pid_and_check_log $PID_TEST_0 test_0.log

@@ -17,7 +17,7 @@ steps:
   # https://github.com/pytorch/ao/issues/2919, we'll have to skip new torchao tests for now
   # we can only upgrade after this is resolved
   # TODO(jerryzh168): resolve the above comment
-  - uv pip install --system torchao==0.17.0 --index-url https://download.pytorch.org/whl/cu130
+  - uv pip install --system torchao==0.17.0 --index-url https://download.pytorch.org/whl/test/cu130
   - uv pip install --system conch-triton-kernels
   - VLLM_TEST_FORCE_LOAD_FORMAT=auto pytest -v -s quantization/ --ignore quantization/test_blackwell_moe.py
 

@@ -60,8 +60,8 @@ endif()
 # requirements.txt files and should be kept consistent.  The ROCm torch
 # versions are derived from docker/Dockerfile.rocm
 #
-set(TORCH_SUPPORTED_VERSION_CUDA "2.11.0")
-set(TORCH_SUPPORTED_VERSION_ROCM "2.11.0")
+set(TORCH_SUPPORTED_VERSION_CUDA "2.12.1")
+set(TORCH_SUPPORTED_VERSION_ROCM "2.12.1")
 
 #
 # Try to find python package with an executable that exactly matches

@@ -80,7 +80,7 @@ ARG UV_INDEX_URL=${PIP_INDEX_URL}
 ARG UV_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
 
 # PyTorch provides its own indexes for standard and nightly builds
-ARG PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl
+ARG PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl/test
 
 # PIP supports multiple authentication schemes, including keyring
 # By parameterizing the PIP_KEYRING_PROVIDER variable and setting it to

@@ -25,7 +25,7 @@ FROM ubuntu:22.04 AS base-common
 WORKDIR /workspace
 
 ARG PYTHON_VERSION=3.12
-ARG PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu"
+ARG PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/test/cpu"
 
 ARG max_jobs=32
 ENV MAX_JOBS=${max_jobs}
@@ -198,12 +198,14 @@ RUN \
     esac; \
     }; \
     remove_packages_not_supported_on_aarch64 && \
-    sed -i 's/^torch==.*/torch==2.11.0/g' requirements/test/cpu.in && \
+    # cpu.in is seeded from cuda.in which points at whl/test/cu130; redirect to CPU test channel
+    sed -i 's|https://download.pytorch.org/whl/test/cu130|https://download.pytorch.org/whl/test/cpu|g' requirements/test/cpu.in && \
+    sed -i 's/^torch==.*/torch==2.12.1/g' requirements/test/cpu.in && \
     sed -i 's/torchaudio.*/torchaudio/g' requirements/test/cpu.in && \
     sed -i 's/torchvision.*/torchvision/g' requirements/test/cpu.in && \
     # Related issue: https://github.com/vllm-project/vllm/pull/38800#issuecomment-4228314305
     sed -i 's/^sentence-transformers.*/sentence-transformers==5.3.0/g' requirements/test/cpu.in && \
-    uv pip compile requirements/test/cpu.in -o requirements/test/cpu.txt --index-strategy unsafe-best-match --torch-backend cpu
+    uv pip compile requirements/test/cpu.in -o requirements/test/cpu.txt --index-strategy unsafe-best-match
 
 RUN --mount=type=cache,target=/root/.cache/uv \
     uv pip install -r requirements/test/cpu.txt

diff --git a/docker/Dockerfile.s390x b/docker/Dockerfile.s390x
@@ -92,13 +92,13 @@ ENV C_INCLUDE_PATH="/usr/local/include:$C_INCLUDE_PATH"
 
 FROM python-install AS torch-vision
 # Install torchvision
-ARG TORCH_VISION_VERSION=v0.26.0
+ARG TORCH_VISION_VERSION=v0.27.1
 WORKDIR /tmp
 RUN --mount=type=cache,target=/root/.cache/uv \
     git clone https://github.com/pytorch/vision.git && \
     cd vision && \
     git checkout $TORCH_VISION_VERSION && \
-    uv pip install torch==2.11.0 --index-url https://download.pytorch.org/whl/cpu && \
+    uv pip install torch==2.12.1 --index-url https://download.pytorch.org/whl/test/cpu && \
     python setup.py bdist_wheel
 
 FROM python-install AS hf-xet-builder
@@ -210,7 +210,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
 # # Final build stage
 FROM python-install AS vllm-cpu
 ARG PYTHON_VERSION
-ARG PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu"
+ARG PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/test/cpu"
 # Set correct library path for torch and numactl
 ENV LD_LIBRARY_PATH="/opt/vllm/lib64/python${PYTHON_VERSION}/site-packages/torch/lib:/usr/local/lib:/opt/rh/gcc-toolset-14/root/usr/lib64:$LD_LIBRARY_PATH"
 ENV C_INCLUDE_PATH="/usr/local/include:$C_INCLUDE_PATH"

diff --git a/docker/versions.json b/docker/versions.json
@@ -23,7 +23,7 @@
       "default": "https://bootstrap.pypa.io/get-pip.py"
     },
     "PYTORCH_CUDA_INDEX_BASE_URL": {
-      "default": "https://download.pytorch.org/whl"
+      "default": "https://download.pytorch.org/whl/test"
     },
     "PIP_KEYRING_PROVIDER": {
       "default": "disabled"

@@ -7,7 +7,7 @@ requires = [
     "setuptools>=77.0.3,<81.0.0",
     "setuptools-scm>=8.0",
     "setuptools-rust>=1.9.0",
-    "torch == 2.11.0",
+    "torch == 2.12.1",
     "wheel",
     "jinja2",
 ]

diff --git a/requirements/build/cpu.txt b/requirements/build/cpu.txt
@@ -1,12 +1,12 @@
---extra-index-url https://download.pytorch.org/whl/cpu
+--extra-index-url https://download.pytorch.org/whl/test/cpu
 cmake>=3.26.1
 ninja
 packaging>=24.2
 setuptools==77.0.3 # this version can reuse CMake build dir
 setuptools-scm>=8
 setuptools-rust>=1.9.0
-torch==2.11.0+cpu; platform_machine == "x86_64" or platform_machine == "s390x" or platform_machine == "aarch64"
-torch==2.11.0; platform_system == "Darwin" or platform_machine == "ppc64le"  or platform_machine == "riscv64"
+torch==2.12.1+cpu; platform_machine == "x86_64" or platform_machine == "s390x" or platform_machine == "aarch64"
+torch==2.12.1; platform_system == "Darwin" or platform_machine == "ppc64le"  or platform_machine == "riscv64"
 wheel
 jinja2>=3.1.6
 regex
diff --git a/requirements/build/cuda.txt b/requirements/build/cuda.txt
@@ -1,11 +1,12 @@
+--extra-index-url https://download.pytorch.org/whl/test/cu130
 # Should be mirrored in pyproject.toml
 cmake>=3.26.1
 ninja
 packaging>=24.2
 setuptools>=77.0.3,<81.0.0
 setuptools-scm>=8
 setuptools-rust>=1.9.0
-torch==2.11.0
+torch==2.12.1
 wheel
 jinja2>=3.1.6
 regex

diff --git a/requirements/build/rocm.txt b/requirements/build/rocm.txt
@@ -3,10 +3,10 @@
 
 
 --extra-index-url https://download.pytorch.org/whl/rocm7.1
-torch==2.11.0
-torchvision==0.26.0
+torch==2.12.1
+torchvision==0.27.1
 torchaudio==2.11.0
-triton==3.6.0
+triton==3.7.1
 cmake>=3.26.1,<4
 packaging>=24.2
 setuptools>=77.0.3,<80.0.0

diff --git a/requirements/cpu.txt b/requirements/cpu.txt
@@ -1,4 +1,4 @@
---extra-index-url https://download.pytorch.org/whl/cpu
+--extra-index-url https://download.pytorch.org/whl/test/cpu
 # Common dependencies
 -r common.txt
 
@@ -7,8 +7,8 @@ setuptools==77.0.3 # this version can reuse CMake build dir
 numba == 0.65.0; platform_machine != "s390x" # Required for N-gram speculative decoding
 
 # Dependencies for CPUs
-torch==2.11.0+cpu; platform_machine == "x86_64" or platform_machine == "s390x" or platform_machine == "aarch64"
-torch==2.11.0; platform_system == "Darwin" or platform_machine == "ppc64le" or platform_machine == "riscv64"
+torch==2.12.1+cpu; platform_machine == "x86_64" or platform_machine == "s390x" or platform_machine == "aarch64"
+torch==2.12.1; platform_system == "Darwin" or platform_machine == "ppc64le" or platform_machine == "riscv64"
 
 # required for the image processor of minicpm-o-2_6, this must be updated alongside torch
 torchaudio; platform_machine != "s390x" and platform_machine != "riscv64"

diff --git a/requirements/cuda.txt b/requirements/cuda.txt
@@ -1,13 +1,14 @@
+--extra-index-url https://download.pytorch.org/whl/test/cu130
 # Common dependencies
 -r common.txt
 
 numba == 0.65.0 # Required for N-gram speculative decoding
 
 # Dependencies for NVIDIA GPUs
-torch==2.11.0
+torch==2.12.1
 torchaudio==2.11.0
 # These must be updated alongside torch
-torchvision==0.26.0 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
+torchvision==0.27.1 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
 # FlashInfer should be updated together with the Dockerfile
 flashinfer-python==0.6.12
 flashinfer-cubin==0.6.12

diff --git a/requirements/test/cuda.in b/requirements/test/cuda.in
@@ -1,3 +1,4 @@
+--extra-index-url https://download.pytorch.org/whl/test/cu130
 # testing
 pytest
 tensorizer==2.10.1
@@ -26,9 +27,9 @@ soundfile # required for audio tests
 jiwer # required for audio tests
 tblib # for pickling test exceptions
 timm >=1.0.17 # required for internvl and gemma3n-mm test
-torch==2.11.0
+torch==2.12.1
 torchaudio==2.11.0
-torchvision==0.26.0
+torchvision==0.27.1
 transformers_stream_generator # required for qwen-vl test
 matplotlib # required for qwen-vl test
 mistral_common[image,audio] >= 1.11.3 # required for voxtral test

diff --git a/requirements/test/cuda.txt b/requirements/test/cuda.txt
@@ -509,7 +509,7 @@ nvidia-cuda-nvrtc==13.0.88
     # via cuda-toolkit
 nvidia-cuda-runtime==13.0.96
     # via cuda-toolkit
-nvidia-cudnn-cu13==9.19.0.56
+nvidia-cudnn-cu13==9.20.0.48
     # via torch
 nvidia-cufft==12.0.0.61
     # via cuda-toolkit
@@ -523,9 +523,9 @@ nvidia-cusparse==12.6.3.3
     # via
     #   cuda-toolkit
     #   nvidia-cusolver
-nvidia-cusparselt-cu13==0.8.0
+nvidia-cusparselt-cu13==0.8.1
     # via torch
-nvidia-nccl-cu13==2.28.9
+nvidia-nccl-cu13==2.29.7
     # via torch
 nvidia-nvjitlink==13.0.88
     # via
@@ -993,7 +993,7 @@ tomli==2.2.1
     # via schemathesis
 tomli-w==1.2.0
     # via schemathesis
-torch==2.11.0+cu130
+torch==2.12.1+cu130
     # via
     #   -c requirements/cuda.txt
     #   -r requirements/test/cuda.in
@@ -1018,7 +1018,7 @@ torchaudio==2.11.0+cu130
     #   -r requirements/test/cuda.in
     #   encodec
     #   vocos
-torchvision==0.26.0+cu130
+torchvision==0.27.1+cu130
     # via
     #   -c requirements/cuda.txt
     #   -r requirements/test/cuda.in
@@ -1050,7 +1050,7 @@ transformers==5.5.3
     #   transformers-stream-generator
 transformers-stream-generator==0.0.5
     # via -r requirements/test/cuda.in
-triton==3.6.0
+triton==3.7.1
     # via torch
 tritonclient==2.64.0
     # via -r requirements/test/cuda.in

@@ -142,8 +142,19 @@ def qwen2audio_aligned_content_and_embeds_b64() -> tuple[str, str]:
 @pytest.mark.asyncio
 @pytest.mark.parametrize(
     "audio_first",
-    [True, False],
-    ids=["audio_embeds-then-text", "text-then-audio_embeds"],
+    [
+        pytest.param(True, id="audio_embeds-then-text"),
+        pytest.param(
+            False,
+            id="text-then-audio_embeds",
+            marks=pytest.mark.xfail(
+                reason="torch 2.12 regression: prompt_embeds output diverges "
+                "from raw-text when text precedes audio; "
+                "https://github.com/pytorch/pytorch/issues/184431",
+                strict=True,
+            ),
+        ),
+    ],
 )
 async def test_text_content_and_prompt_embeds_match_with_audio_embeds(
     qwen2audio_client: openai.AsyncOpenAI,

diff --git a/tests/standalone_tests/python_only_compile.sh b/tests/standalone_tests/python_only_compile.sh
@@ -67,7 +67,11 @@ apt autoremove -y
 
 echo 'import os; os.system("touch /tmp/changed.file")' >> vllm/__init__.py
 
-VLLM_PRECOMPILED_WHEEL_COMMIT=$merge_base_commit VLLM_USE_PRECOMPILED=1 pip3 install -vvv -e .
+# RELEASE-ONLY: torch==2.12.1 is a pre-release that is not on PyPI yet, so pull
+# it from the PyTorch test channel (matches docker/Dockerfile and the other CI
+# install paths). Drop this once torch 2.12.1 is published to PyPI.
+VLLM_PRECOMPILED_WHEEL_COMMIT=$merge_base_commit VLLM_USE_PRECOMPILED=1 pip3 install -vvv -e . \
+    --extra-index-url https://download.pytorch.org/whl/test/cu130
 # Run the script
 python3 -c 'import vllm'