conda-forge · shermansiu · Sep 2, 2025 · Jul 30, 2025 · Jul 30, 2025 · Aug 31, 2025
@@ -1,17 +1,17 @@
-From 7d8537c0d09b0817053f05f30a871bd7f3f9cca7 Mon Sep 17 00:00:00 2001
+From b4e6f75bcd35bfe2513be1a1c6c82c0720a08d8d Mon Sep 17 00:00:00 2001
 From: Sherman Siu <s8siu@uwaterloo.ca>
 Date: Mon, 21 Jul 2025 05:01:32 -0400
-Subject: [PATCH 1/4] Search for the CUDA package in CMakeLists
+Subject: [PATCH 1/6] Search for the CUDA package in CMakeLists
 
 ---
  CMakeLists.txt | 1 +
  1 file changed, 1 insertion(+)
 
 diff --git a/CMakeLists.txt b/CMakeLists.txt
-index 15db4a4f4..d86fab3f4 100644
+index 0129f8512..564445522 100644
 --- a/CMakeLists.txt
 +++ b/CMakeLists.txt
-@@ -67,6 +67,7 @@ endif()
+@@ -66,6 +66,7 @@ endif()
  append_cmake_prefix_path("torch" "torch.utils.cmake_prefix_path")
 
  # Ensure the 'nvcc' command is in the PATH

@@ -1,18 +1,19 @@
-From 69d16023885cab08141882461ab01b3bf951d9de Mon Sep 17 00:00:00 2001
+From 643db40d812d663f825f99b028e5b32765ad266b Mon Sep 17 00:00:00 2001
 From: Sherman Siu <s8siu@uwaterloo.ca>
 Date: Mon, 21 Jul 2025 05:04:55 -0400
-Subject: [PATCH 2/4] Remove ninja pip requirement
+Subject: [PATCH 2/6] Remove ninja pip requirement
 
 ---
  requirements/common.txt | 1 -
  1 file changed, 1 deletion(-)
 
 diff --git a/requirements/common.txt b/requirements/common.txt
-index 24a1e6d67..0f9cac6c8 100644
+index 8bc0be777..5d0fb5235 100644
 --- a/requirements/common.txt
 +++ b/requirements/common.txt
-@@ -42,4 +42,3 @@ cloudpickle # allows pickling lambda functions in model_executor/models/registry
+@@ -43,5 +43,4 @@ cloudpickle # allows pickling lambda functions in model_executor/models/registry
  watchfiles # required for http server to monitor the updates of TLS files
- python-json-logger # Used by logging as per examples/other/logging_configuration.md
+ python-json-logger # Used by logging as per examples/others/logging_configuration.md
  scipy # Required for phi-4-multimodal-instruct
 -ninja # Required for xgrammar, rocm, tpu, xpu
+ pybase64 # fast base64 implementation
@@ -1,18 +1,18 @@
-From 9fe9a9d7e6dc4cf38e8b5c29f386d1b445c73af8 Mon Sep 17 00:00:00 2001
+From 8135446bc484b8edda531c1f81fd0ef9c4d314be Mon Sep 17 00:00:00 2001
 From: Sherman Siu <s8siu@uwaterloo.ca>
 Date: Mon, 21 Jul 2025 05:05:36 -0400
-Subject: [PATCH 3/4] Manually define gettid
+Subject: [PATCH 3/6] Manually define gettid
 
 - `gettid` is not defined by default until glibc 2.30: see https://stackoverflow.com/questions/30680550/c-gettid-was-not-declared-in-this-scope for details
 ---
  csrc/cpu/utils.cpp | 3 +++
  1 file changed, 3 insertions(+)
 
 diff --git a/csrc/cpu/utils.cpp b/csrc/cpu/utils.cpp
-index 79771ecd9..e21efc92a 100644
+index 02514edce..e605da905 100644
 --- a/csrc/cpu/utils.cpp
 +++ b/csrc/cpu/utils.cpp
-@@ -7,6 +7,9 @@
+@@ -12,6 +12,9 @@
 
  #include "cpu_types.hpp"
 

@@ -1,18 +1,18 @@
-From 940ed92c475e6b14e94acba9bd376dde9c47733a Mon Sep 17 00:00:00 2001
+From 5e1f64fe4ca4a4a6fc9e0f2adcad3d04469b74dd Mon Sep 17 00:00:00 2001
 From: Sherman Siu <s8siu@uwaterloo.ca>
 Date: Mon, 21 Jul 2025 05:08:38 -0400
-Subject: [PATCH 4/4] Factor in the cmake args when building, e.g. for
+Subject: [PATCH 4/6] Factor in the cmake args when building, e.g. for
  cross-compilation
 
 ---
  setup.py | 3 +++
  1 file changed, 3 insertions(+)
 
 diff --git a/setup.py b/setup.py
-index b0cc2f481..4068094eb 100755
+index ea7cd0169..42a1a8e8a 100644
 --- a/setup.py
 +++ b/setup.py
-@@ -204,6 +204,9 @@ class cmake_build_ext(build_ext):
+@@ -205,6 +205,9 @@ class cmake_build_ext(build_ext):
          # Make sure we use the nvcc from CUDA_HOME
          if _is_cuda():
              cmake_args += [f'-DCMAKE_CUDA_COMPILER={CUDA_HOME}/bin/nvcc']

@@ -1,18 +1,18 @@
-From 49056e431b67064eec4f284a301c92ab26a9dbd8 Mon Sep 17 00:00:00 2001
+From 0edd58495c6962b1f478a7040bbad7f18e2e20f8 Mon Sep 17 00:00:00 2001
 From: Sherman Siu <s8siu@uwaterloo.ca>
-Date: Fri, 1 Aug 2025 05:10:59 -0400
-Subject: [PATCH 5/5] Configure build to target aarch64 even though CMake finds
- x86_64 hardware
+Date: Sun, 31 Aug 2025 20:37:56 -0400
+Subject: [PATCH 5/6] Configure build to target aarch64 even though CMake finds
+  x86_64 hardware
 
 ---
  cmake/cpu_extension.cmake | 5 +++++
  1 file changed, 5 insertions(+)
 
 diff --git a/cmake/cpu_extension.cmake b/cmake/cpu_extension.cmake
-index fdc03a795..3142a1452 100644
+index fc7291972..c745e20c5 100644
 --- a/cmake/cpu_extension.cmake
 +++ b/cmake/cpu_extension.cmake
-@@ -85,6 +85,11 @@ else()
+@@ -83,6 +83,11 @@ else()
  endif()
 
 

@@ -0,0 +1,37 @@
+From 9c532a9b58b65017ae517bacbe438d9d71f71891 Mon Sep 17 00:00:00 2001
+From: Sherman Siu <s8siu@uwaterloo.ca>
+Date: Sun, 31 Aug 2025 18:08:54 -0400
+Subject: [PATCH 6/6] Use PyTorch 2.7.0 to keep version number consistent
+ across builds
+
+---
+ requirements/cpu-build.txt | 2 +-
+ requirements/cpu.txt       | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/requirements/cpu-build.txt b/requirements/cpu-build.txt
+index 37f072202..91f6cf297 100644
+--- a/requirements/cpu-build.txt
++++ b/requirements/cpu-build.txt
+@@ -6,7 +6,7 @@ packaging>=24.2
+ setuptools>=77.0.3,<80.0.0
+ setuptools-scm>=8
+ --extra-index-url https://download.pytorch.org/whl/cpu
+-torch==2.6.0+cpu
++torch==2.7.0
+ wheel
+ jinja2>=3.1.6
+ regex
+diff --git a/requirements/cpu.txt b/requirements/cpu.txt
+index df3a33935..29e53b997 100644
+--- a/requirements/cpu.txt
++++ b/requirements/cpu.txt
+@@ -8,7 +8,7 @@ numba == 0.61.2; python_version > '3.9'
+ packaging>=24.2
+ setuptools>=77.0.3,<80.0.0
+ --extra-index-url https://download.pytorch.org/whl/cpu
+-torch==2.6.0+cpu; platform_machine == "x86_64" # torch>2.6.0+cpu has performance regression on x86 platform, see https://github.com/pytorch/pytorch/pull/151218
++torch==2.7.0; platform_machine == "x86_64" # torch>2.6.0+cpu has performance regression on x86 platform, see https://github.com/pytorch/pytorch/pull/151218
+ torch==2.7.0; platform_system == "Darwin"
+ torch==2.7.0; platform_machine == "ppc64le" or platform_machine == "aarch64"
+
@@ -1,7 +1,7 @@
 context:
-  version: 0.8.3
-  pytorch_version: 2.6.0
+  version: 0.9.2
   use_cuda: ${{ cuda_compiler_version != "None" }}
+  pytorch_version: 2.7.1  # The solver refuses to pick up 2.7.0 for some unknown reason
   vllm_target_device: ${{ "cuda" if use_cuda else "cpu" }}
   cuda_build_string: cuda_${{ cuda_compiler_version | version_to_buildstring }}
   string_prefix: ${{ cuda_build_string if cuda_compiler_version != "None" else "cpu_" }}
@@ -13,7 +13,7 @@ package:
 
 source:
 - url: https://pypi.org/packages/source/v/vllm/vllm-${{ version }}.tar.gz
-  sha256: 475a39d1093b8ef8a905d63eafe0c6c9b8f4f4c2ae2d23f1f3d0fae5e37bb4bd
+  sha256: 6b0d855ea8ba18d76364c9b82ea94bfcaa9c9e724055438b5733e4716ed104e1
   patches:
   - patches/0001-Search-for-the-CUDA-package-in-CMakeLists.patch
   - patches/0002-Remove-ninja-pip-requirement.patch
@@ -26,14 +26,15 @@ source:
   - if: aarch64
     then:
     - patches/0005-Configure-build-to-target-aarch64-even-though-CMake-.patch
+  - patches/0006-Use-PyTorch-2.7.0-to-keep-version-number-consistent-.patch
   target_directory: vllm
 # Needs to be vendored because vLLM uses a modified version of the flash attention primitives that supports KV-caching.
-- url: https://github.com/vllm-project/flash-attention/archive/d637d8927a35922ce6f6c0dff6dd3f765ed71f3c.tar.gz
-  sha256: 3099add00c9938735b84319d176c5b239c0165e3f9be6540a7a3505cd897c7cd
+- url: https://github.com/vllm-project/flash-attention/archive/1c2624e53c078854e0637ee566c72fe2107e75f4.tar.gz
+  sha256: cca19d7e53af08aa6d6f0c4fd9dd78d30314497e38fb03b1368b3d5a77ab4b5c
   target_directory: flash-attention
 
 build:
-  number: 4
+  number: 0
   string: ${{ string_prefix }}py${{ python | version_to_buildstring }}h${{ hash }}_${{ build_number }}
   script:
     - sed -i.bak 's/set(TORCH_SUPPORTED_VERSION_CUDA "2.4.0")/set(TORCH_SUPPORTED_VERSION_CUDA "${{ pytorch_version }}")/g' flash-attention/CMakeLists.txt
@@ -65,7 +66,7 @@ build:
 
 requirements:
   build:
-  - cmake >=3.26
+  - cmake >=3.26.1
   - git
   - ninja
   - zlib
@@ -86,10 +87,11 @@ requirements:
   host:
   - python
   - jinja2 >=3.1.6
-  - packaging
+  - packaging >=24.2
   - pip
   - pytorch ==${{ pytorch_version }}
-  - setuptools >=61
+  - regex
+  - setuptools >=77.0.3,<80.0.0
   - setuptools-scm >=8
   - wheel
   - if: linux
@@ -112,22 +114,22 @@ requirements:
   - blake3
   - cachetools
   - cloudpickle
-  - compressed-tensors ==0.9.2
+  - compressed-tensors ==0.10.2
   - depyf ==0.18.0
   - einops
   - fastapi >=0.115.0
   - filelock >=3.16.1
-  - gguf ==0.10.0
+  - gguf >=0.13.0
   - importlib-metadata
-  - hf-xet >=0.1.4
-  - huggingface_hub >=0.30.0
+  - hf-xet >=1.1.2,<2.0.0
+  - huggingface_hub >=0.33.0
   - lark ==1.2.2
-  - llguidance >=0.7.9,<0.8.0
   - lm-format-enforcer >=0.10.11,<0.11
-  - mistral-common >=1.5.4
+  - mistral-common >=1.6.2
   - msgspec
+  - numba ==0.61.2
   - numpy
-  - openai >=1.52.0
+  - openai >=1.52.0,<=1.90.0
   - opencv >=4.11.0
   - outlines ==0.1.11
   - partial-json-parser
@@ -137,44 +139,53 @@ requirements:
   - protobuf
   - psutil
   - py-cpuinfo
-  - pydantic >=2.9
+  - pybase64
+  - pydantic >=2.10
   - python-json-logger
   - pytorch ==${{ pytorch_version }}
   - pyyaml
-  - pyzmq
+  - pyzmq >=25.0.0
+  - regex
   - requests >=2.26.0
   - scipy
   - sentencepiece
   - tiktoken >=0.6.0
-  - tokenizers >=0.19.1
+  - tokenizers >=0.21.1
   - tqdm
   # Newer versions of transformers already define the aimv2 config, so we can't use it for now
   # See https://github.com/vllm-project/vllm-ascend/issues/2046#issuecomment-3123639101 for more details.
   # The required fix: https://github.com/vllm-project/vllm/commit/3fc964433a84bad785d9d0656fd56195462321b8
-  - transformers >=4.51.0,<4.54.0
+  - transformers >=4.51.1,<4.54.0
   - typing_extensions >=4.10
   - uvicorn-standard
   - watchfiles
-  - if: x86_64 or aarch64
+  - if: x86_64 or arm64 or aarch64
     then:
-    - xgrammar ==0.1.17
+    - llguidance >=0.7.11,< 0.8.0
+    - xgrammar ==0.1.19
   - if: match(python, ">3.11")
     then:
     - six >=1.16.0
-    - setuptools >=74.1.1
+    - setuptools >=77.0.3,<80
   - if: use_cuda
     then:
-    - numba ==0.61
     - ray-cgraph >=2.43.0,!=2.44
     - torchaudio ==${{ pytorch_version }}
-    - torchvision ==0.21.0
+    - torchvision ==0.22.0
     - if: linux64
       then:
-      - xformers ==0.0.29.post2
+      - xformers ==0.0.30  # platform_system == "Linux" and platform_machine == "x86_64"
     else:
     - torchaudio
     - torchvision
+    - if: x86_64
+      then:
+      - triton ==3.2.0
   run_constraints:
+  # Fixes issue with incompatibility between old `datasets` versions and `pyarrow` v21+
+  # See https://github.com/apache/arrow/issues/47155 for more details.
+  # The required PR is: https://github.com/huggingface/datasets/pull/6404
+  - datasets >=2.15
   - if: use_cuda
     then:
     - pytorch * [build=cuda*]
@@ -189,11 +200,13 @@ tests:
     - if: linux and use_cuda
       then:
       - vllm.vllm_flash_attn
-    # Disable until opentelemetry-prometheus-exporter has fixed constraints
-    # https://github.com/conda-forge/opentelemetry-exporter-prometheus-feedstock/pull/24
     pip_check: false
 - script:
-  - vllm --version
+  # As of vllm v0.9 and later, it seems like libcuda.so.1 is required for the CLI for CUDA builds (stub libraries don't work)
+  # We can't test this on the CPU runners, which is what we're using to build the wheel
+  - if: not use_cuda
+    then:
+    - vllm --version
 - script:
     # Pick an arbitrary test to run: some of the other ones rely on a bunch of external packages
   - pytest ./vllm/tests/core/test_scheduler.py