Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 13 additions & 5 deletions .buildkite/test-amd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2165,7 +2165,15 @@ steps:
- vllm/platforms/rocm.py
- tests/quantization
commands:
- uv pip install --system torchao==0.14.1

# temporary install here since we need nightly, will move to requirements/test.in
# after torchao 0.12 release, and pin a working version of torchao nightly here

# since torchao nightly is only compatible with torch nightly currently
# https://github.com/pytorch/ao/issues/2919, we'll have to skip new torchao tests for now
# we can only upgrade after this is resolved
# TODO(jerryzh168): resolve the above comment
- uv pip install --system torchao==0.17.0
- uv pip install --system conch-triton-kernels
- VLLM_TEST_FORCE_LOAD_FORMAT=auto pytest -v -s quantization/ --ignore quantization/test_blackwell_moe.py

Expand Down Expand Up @@ -2924,10 +2932,10 @@ steps:
- bash .buildkite/scripts/scheduled_integration_test/qwen3_next_mtp_async_eplb.sh 0.8 1319 8040

##### .buildkite/test_areas/compile.yaml #####
# Slowly setting up the tests so that it is also easier for the
# Slowly setting up the tests so that it is also easier for the
# CI team to review and upstream to the pipelinev2.
# The following tests are important for vLLM IR Ops refactoring,
# which affects fusion passes on ROCm. So we have to
# which affects fusion passes on ROCm. So we have to
# enable them as as soon as possible.

## TODO: Enable the test in this group
Expand Down Expand Up @@ -3006,7 +3014,7 @@ steps:

## There are no ops on ROCm for these tests.
## The test still passes but the logs are not useful.
## fused ops just call torch.ops.symm_mem which
## fused ops just call torch.ops.symm_mem which
## exists in ROCm even though they don't work
# - label: AsyncTP Correctness Tests (2xH100-2xMI325)
# - label: Fusion E2E TP2 Quick (H100-MI325)
Expand Down Expand Up @@ -3338,7 +3346,7 @@ steps:
- vllm/_aiter_ops.py
- vllm/platforms/rocm.py
commands:
- uv pip install --system torchao==0.14.1
- uv pip install --system torchao==0.17.0
- uv pip install --system conch-triton-kernels
- VLLM_TEST_FORCE_LOAD_FORMAT=auto pytest -v -s quantization/ --ignore quantization/test_blackwell_moe.py

Expand Down
4 changes: 2 additions & 2 deletions .buildkite/test_areas/models_language.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ steps:
# Install fast path packages for testing against transformers
# Note: also needed to run plamo2 model in vLLM
- uv pip install --system --no-build-isolation 'git+https://github.com/state-spaces/mamba@v2.3.0'
- uv pip install --system --no-build-isolation 'git+https://github.com/Dao-AILab/causal-conv1d@v1.5.2'
- uv pip install --system --no-build-isolation 'git+https://github.com/Dao-AILab/causal-conv1d@v1.6.0'
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we also change this version in test-amd.yaml? Btw was there a problem with 1.5.2?

# Shard hybrid language model tests
- pytest -v -s models/language/generation -m hybrid_model --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB
parallelism: 2
Expand All @@ -53,7 +53,7 @@ steps:
# Install fast path packages for testing against transformers
# Note: also needed to run plamo2 model in vLLM
- uv pip install --system --no-build-isolation 'git+https://github.com/state-spaces/mamba@v2.3.0'
- uv pip install --system --no-build-isolation 'git+https://github.com/Dao-AILab/causal-conv1d@v1.5.2'
- uv pip install --system --no-build-isolation 'git+https://github.com/Dao-AILab/causal-conv1d@v1.6.0'
- pytest -v -s models/language/generation -m '(not core_model) and (not hybrid_model)'
mirror:
amd:
Expand Down
4 changes: 2 additions & 2 deletions .buildkite/test_areas/quantization.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
group: Quantization
depends_on:
depends_on:
- image-build
steps:
- label: Quantization
Expand All @@ -16,7 +16,7 @@ steps:
# https://github.com/pytorch/ao/issues/2919, we'll have to skip new torchao tests for now
# we can only upgrade after this is resolved
# TODO(jerryzh168): resolve the above comment
- uv pip install --system torchao==0.14.1 --index-url https://download.pytorch.org/whl/cu129
- uv pip install --system torchao==0.17.0 --index-url https://download.pytorch.org/whl/cu130
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

- uv pip install --system conch-triton-kernels
- VLLM_TEST_FORCE_LOAD_FORMAT=auto pytest -v -s quantization/ --ignore quantization/test_blackwell_moe.py

Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ repos:
rev: 0.11.1
hooks:
- id: pip-compile
args: [requirements/test.in, -c, requirements/common.txt, -o, requirements/test.txt, --index-strategy, unsafe-best-match, --torch-backend, cu129, --python-platform, x86_64-manylinux_2_28, --python-version, "3.12"]
args: [requirements/test.in, -c, requirements/common.txt, -o, requirements/test.txt, --index-strategy, unsafe-best-match, --torch-backend, cu130, --python-platform, x86_64-manylinux_2_28, --python-version, "3.12"]
files: ^requirements/test\.(in|txt)$
- id: pip-compile
alias: pip-compile-rocm
Expand Down
4 changes: 2 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ endif()
# requirements.txt files and should be kept consistent. The ROCm torch
# versions are derived from docker/Dockerfile.rocm
#
set(TORCH_SUPPORTED_VERSION_CUDA "2.10.0")
set(TORCH_SUPPORTED_VERSION_ROCM "2.10.0")
set(TORCH_SUPPORTED_VERSION_CUDA "2.11.0")
set(TORCH_SUPPORTED_VERSION_ROCM "2.11.0")

#
# Try to find python package with an executable that exactly matches
Expand Down
3 changes: 2 additions & 1 deletion csrc/cpu/utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ struct Counter {

inline int64_t get_available_l2_size() {
static int64_t size = []() {
const uint32_t l2_cache_size = at::cpu::L2_cache_size();
auto caps = at::cpu::get_cpu_capabilities();
const uint32_t l2_cache_size = caps.at("l2_cache_size").toInt();
return l2_cache_size >> 1; // use 50% of L2 cache
}();
return size;
Expand Down
20 changes: 12 additions & 8 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
# docker buildx bake -f docker/docker-bake.hcl -f docker/versions.json
# =============================================================================

ARG CUDA_VERSION=12.9.1
ARG CUDA_VERSION=13.0.0
ARG PYTHON_VERSION=3.12
ARG UBUNTU_VERSION=22.04

Expand All @@ -37,7 +37,7 @@ ARG UBUNTU_VERSION=22.04
# compatibility with other Linux OSes. The main reason for this is that the
# glibc version is baked into the distro, and binaries built with one glibc
# version are not backwards compatible with OSes that use an earlier version.
ARG BUILD_BASE_IMAGE=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu20.04
ARG BUILD_BASE_IMAGE=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updating the build base image Ubuntu version will likely affect the glibc support, as noted in the comment above. Are there no CUDA 13 images available for 20.04? Cc @tlrmchlsmth

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

# Using cuda base image with minimal dependencies necessary for JIT compilation (FlashInfer, DeepGEMM, EP kernels)
ARG FINAL_BASE_IMAGE=nvidia/cuda:${CUDA_VERSION}-base-ubuntu${UBUNTU_VERSION}

Expand Down Expand Up @@ -546,17 +546,21 @@ RUN apt-get update -y \
# Install CUDA development tools for runtime JIT compilation
# (FlashInfer, DeepGEMM, EP kernels all require compilation at runtime)
RUN CUDA_VERSION_DASH=$(echo $CUDA_VERSION | cut -d. -f1,2 | tr '.' '-') && \
CUDA_VERSION_SHORT=$(echo $CUDA_VERSION | cut -d. -f1,2) && \
apt-get update -y && \
apt-get install -y --no-install-recommends \
apt-get install -y --no-install-recommends --allow-change-held-packages \
cuda-nvcc-${CUDA_VERSION_DASH} \
cuda-cudart-${CUDA_VERSION_DASH} \
cuda-nvrtc-${CUDA_VERSION_DASH} \
cuda-cuobjdump-${CUDA_VERSION_DASH} \
libcurand-dev-${CUDA_VERSION_DASH} \
libcublas-${CUDA_VERSION_DASH} \
# Fixes nccl_allocator requiring nccl.h at runtime
# https://github.com/vllm-project/vllm/blob/1336a1ea244fa8bfd7e72751cabbdb5b68a0c11a/vllm/distributed/device_communicators/pynccl_allocator.py#L22
libnccl-dev && \
libcublas-${CUDA_VERSION_DASH} && \
# Fixes nccl_allocator requiring nccl.h at runtime
# https://github.com/vllm-project/vllm/blob/1336a1ea244fa8bfd7e72751cabbdb5b68a0c11a/vllm/distributed/device_communicators/pynccl_allocator.py#L22
# NCCL packages don't use the cuda-MAJOR-MINOR naming convention,
# so we pin the version to match our CUDA version
NCCL_VER=$(apt-cache madison libnccl-dev | grep "+cuda${CUDA_VERSION_SHORT}" | head -1 | awk -F'|' '{gsub(/^ +| +$/, "", $2); print $2}') && \
apt-get install -y --no-install-recommends --allow-change-held-packages libnccl-dev=${NCCL_VER} libnccl2=${NCCL_VER} && \
rm -rf /var/lib/apt/lists/*

# Install uv for faster pip installs
Expand Down Expand Up @@ -822,7 +826,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install --system -r /tmp/kv_connectors.txt --no-build || ( \
# if the above fails, install from source
apt-get update -y && \
apt-get install -y --no-install-recommends ${BUILD_PKGS} && \
apt-get install -y --no-install-recommends --allow-change-held-packages ${BUILD_PKGS} && \
uv pip install --system -r /tmp/kv_connectors.txt --no-build-isolation && \
apt-get purge -y ${BUILD_PKGS} && \
# clean up -dev packages, keep runtime libraries
Expand Down
2 changes: 1 addition & 1 deletion docker/Dockerfile.cpu
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ RUN \
esac; \
}; \
remove_packages_not_supported_on_aarch64 && \
sed -i 's/^torch==.*/torch==2.10.0/g' requirements/cpu-test.in && \
sed -i 's/^torch==.*/torch==2.11.0/g' requirements/cpu-test.in && \
sed -i 's/torchaudio.*/torchaudio/g' requirements/cpu-test.in && \
sed -i 's/torchvision.*/torchvision/g' requirements/cpu-test.in && \
uv pip compile requirements/cpu-test.in -o requirements/cpu-test.txt --index-strategy unsafe-best-match --torch-backend cpu
Expand Down
6 changes: 3 additions & 3 deletions docker/Dockerfile.s390x
Original file line number Diff line number Diff line change
Expand Up @@ -93,13 +93,13 @@ RUN curl https://sh.rustup.rs -sSf | sh -s -- -y && \

FROM python-install AS torch-vision
# Install torchvision
ARG TORCH_VISION_VERSION=v0.25.0
ARG TORCH_VISION_VERSION=v0.26.0
WORKDIR /tmp
RUN --mount=type=cache,target=/root/.cache/uv \
git clone https://github.com/pytorch/vision.git && \
cd vision && \
git checkout $TORCH_VISION_VERSION && \
uv pip install torch==2.10.0 --index-url https://download.pytorch.org/whl/cpu && \
uv pip install torch==2.11.0 --index-url https://download.pytorch.org/whl/cpu && \
python setup.py bdist_wheel

FROM python-install AS hf-xet-builder
Expand Down Expand Up @@ -253,7 +253,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
NUMBA_WHL_FILE=$(ls /tmp/numba-wheels/*.whl) && \
OPENCV_WHL_FILE=$(ls /tmp/opencv-wheels/*.whl) && \
OUTLINES_CORE_WHL_FILE=$(ls /tmp/outlines-core/dist/*.whl) && \
uv pip install -v \
uv pip install -v \
$ARROW_WHL_FILE \
$VISION_WHL_FILE \
$HF_XET_WHL_FILE \
Expand Down
6 changes: 3 additions & 3 deletions docker/versions.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"_comment": "Auto-generated from Dockerfile ARGs. Do not edit manually. Run: python tools/generate_versions_json.py",
"variable": {
"CUDA_VERSION": {
"default": "12.9.1"
"default": "13.0.0"
},
"PYTHON_VERSION": {
"default": "3.12"
Expand All @@ -11,10 +11,10 @@
"default": "22.04"
},
"BUILD_BASE_IMAGE": {
"default": "nvidia/cuda:12.9.1-devel-ubuntu20.04"
"default": "nvidia/cuda:13.0.0-devel-ubuntu22.04"
},
"FINAL_BASE_IMAGE": {
"default": "nvidia/cuda:12.9.1-base-ubuntu22.04"
"default": "nvidia/cuda:13.0.0-base-ubuntu22.04"
},
"GET_PIP_URL": {
"default": "https://bootstrap.pypa.io/get-pip.py"
Expand Down
Binary file modified docs/assets/contributing/dockerfile-stages-dependency.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ requires = [
"packaging>=24.2",
"setuptools>=77.0.3,<81.0.0",
"setuptools-scm>=8.0",
"torch == 2.10.0",
"torch == 2.11.0",
"wheel",
"jinja2",
]
Expand Down
2 changes: 1 addition & 1 deletion requirements/build.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ ninja
packaging>=24.2
setuptools>=77.0.3,<81.0.0
setuptools-scm>=8
torch==2.10.0
torch==2.11.0
wheel
jinja2>=3.1.6
regex
Expand Down
5 changes: 3 additions & 2 deletions requirements/cpu-build.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
--extra-index-url https://download.pytorch.org/whl/cpu
cmake>=3.26.1
ninja
packaging>=24.2
setuptools==77.0.3 # this version can reuse CMake build dir
setuptools-scm>=8
torch==2.10.0+cpu; platform_machine == "x86_64" or platform_machine == "s390x"
torch==2.10.0; platform_machine == "aarch64" or platform_system == "Darwin" or platform_machine == "ppc64le"
torch==2.11.0+cpu; platform_machine == "x86_64" or platform_machine == "s390x" or platform_machine == "aarch64"
torch==2.11.0; platform_system == "Darwin" or platform_machine == "ppc64le" or platform_machine == "riscv64"
wheel
jinja2>=3.1.6
regex
5 changes: 3 additions & 2 deletions requirements/cpu.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
--extra-index-url https://download.pytorch.org/whl/cpu
# Common dependencies
-r common.txt

Expand All @@ -6,8 +7,8 @@ setuptools==77.0.3 # this version can reuse CMake build dir
numba == 0.61.2; platform_machine != "s390x" # Required for N-gram speculative decoding

# Dependencies for CPUs
torch==2.10.0+cpu; platform_machine == "x86_64" or platform_machine == "s390x"
torch==2.10.0; platform_machine == "aarch64" or platform_system == "Darwin" or platform_machine == "ppc64le" or platform_machine == "riscv64"
torch==2.11.0+cpu; platform_machine == "x86_64" or platform_machine == "s390x" or platform_machine == "aarch64"
torch==2.11.0; platform_system == "Darwin" or platform_machine == "ppc64le" or platform_machine == "riscv64"

# required for the image processor of minicpm-o-2_6, this must be updated alongside torch
torchaudio; platform_machine != "s390x" and platform_machine != "riscv64"
Expand Down
6 changes: 3 additions & 3 deletions requirements/cuda.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
numba == 0.61.2 # Required for N-gram speculative decoding

# Dependencies for NVIDIA GPUs
torch==2.10.0
torchaudio==2.10.0
torch==2.11.0
torchaudio==2.11.0
# These must be updated alongside torch
torchvision==0.25.0 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
torchvision==0.26.0 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
# FlashInfer should be updated together with the Dockerfile
flashinfer-python==0.6.7
flashinfer-cubin==0.6.7
Expand Down
2 changes: 1 addition & 1 deletion requirements/kv_connectors.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
lmcache >= 0.3.9
nixl >= 0.7.1, < 0.10.0 # Required for disaggregated prefill
nixl[cu13] >= 0.7.1, < 0.10.0 # Required for disaggregated prefill
mooncake-transfer-engine >= 0.3.8
7 changes: 4 additions & 3 deletions requirements/rocm-build.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
# Common dependencies
-r common.txt


--extra-index-url https://download.pytorch.org/whl/rocm7.1
torch==2.10.0
torchvision==0.25.0
torchaudio==2.10.0
torch==2.11.0
torchvision==0.26.0
torchaudio==2.11.0
triton==3.6.0
cmake>=3.26.1,<4
packaging>=24.2
Expand Down
6 changes: 3 additions & 3 deletions requirements/test.in
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ soundfile # required for audio tests
jiwer # required for audio tests
tblib # for pickling test exceptions
timm >=1.0.17 # required for internvl and gemma3n-mm test
torch==2.10.0
torchaudio==2.10.0
torchvision==0.25.0
torch==2.11.0
torchaudio==2.11.0
torchvision==0.26.0
transformers_stream_generator # required for qwen-vl test
matplotlib # required for qwen-vl test
mistral_common[image,audio] >= 1.11.0 # required for voxtral test
Expand Down
Loading
Loading