diff --git a/.azure-pipelines/azure-pipelines-osx.yml b/.azure-pipelines/azure-pipelines-osx.yml
index 628d2b5..3a4811f 100755
--- a/.azure-pipelines/azure-pipelines-osx.yml
+++ b/.azure-pipelines/azure-pipelines-osx.yml
@@ -5,7 +5,7 @@
 jobs:
 - job: osx
   pool:
-    vmImage: macOS-13
+    vmImage: macOS-15
   strategy:
     matrix:
       osx_arm64_python3.10.____cpython:
diff --git a/.ci_support/linux_64_c_compiler_version13cuda_compiler_version12.6cxx_compiler_version13python3.10.____cpython.yaml b/.ci_support/linux_64_cuda_compiler_version12.9python3.10.____cpython.yaml
similarity index 91%
rename from .ci_support/linux_64_c_compiler_version13cuda_compiler_version12.6cxx_compiler_version13python3.10.____cpython.yaml
rename to .ci_support/linux_64_cuda_compiler_version12.9python3.10.____cpython.yaml
index 33d6d5c..abef889 100644
--- a/.ci_support/linux_64_c_compiler_version13cuda_compiler_version12.6cxx_compiler_version13python3.10.____cpython.yaml
+++ b/.ci_support/linux_64_cuda_compiler_version12.9python3.10.____cpython.yaml
@@ -1,7 +1,7 @@
 c_compiler:
 - gcc
 c_compiler_version:
-- '13'
+- '14'
 c_stdlib:
 - sysroot
 c_stdlib_version:
@@ -15,15 +15,15 @@ channel_targets:
 cuda_compiler:
 - cuda-nvcc
 cuda_compiler_version:
-- '12.6'
+- '12.9'
 cxx_compiler:
 - gxx
 cxx_compiler_version:
-- '13'
+- '14'
 docker_image:
 - quay.io/condaforge/linux-anvil-x86_64:alma9
 github_actions_labels:
-- cirun-openstack-cpu-xlarge
+- cirun-openstack-cpu-4xlarge
 pin_run_as_build:
   python:
     min_pin: x.x
diff --git a/.ci_support/linux_64_c_compiler_version13cuda_compiler_version12.6cxx_compiler_version13python3.11.____cpython.yaml b/.ci_support/linux_64_cuda_compiler_version12.9python3.11.____cpython.yaml
similarity index 91%
rename from .ci_support/linux_64_c_compiler_version13cuda_compiler_version12.6cxx_compiler_version13python3.11.____cpython.yaml
rename to .ci_support/linux_64_cuda_compiler_version12.9python3.11.____cpython.yaml
index 2ad3e17..05f62f4 100644
--- a/.ci_support/linux_64_c_compiler_version13cuda_compiler_version12.6cxx_compiler_version13python3.11.____cpython.yaml
+++ b/.ci_support/linux_64_cuda_compiler_version12.9python3.11.____cpython.yaml
@@ -1,7 +1,7 @@
 c_compiler:
 - gcc
 c_compiler_version:
-- '13'
+- '14'
 c_stdlib:
 - sysroot
 c_stdlib_version:
@@ -15,15 +15,15 @@ channel_targets:
 cuda_compiler:
 - cuda-nvcc
 cuda_compiler_version:
-- '12.6'
+- '12.9'
 cxx_compiler:
 - gxx
 cxx_compiler_version:
-- '13'
+- '14'
 docker_image:
 - quay.io/condaforge/linux-anvil-x86_64:alma9
 github_actions_labels:
-- cirun-openstack-cpu-xlarge
+- cirun-openstack-cpu-4xlarge
 pin_run_as_build:
   python:
     min_pin: x.x
diff --git a/.ci_support/linux_64_c_compiler_version13cuda_compiler_version12.6cxx_compiler_version13python3.12.____cpython.yaml b/.ci_support/linux_64_cuda_compiler_version12.9python3.12.____cpython.yaml
similarity index 91%
rename from .ci_support/linux_64_c_compiler_version13cuda_compiler_version12.6cxx_compiler_version13python3.12.____cpython.yaml
rename to .ci_support/linux_64_cuda_compiler_version12.9python3.12.____cpython.yaml
index e9e8299..0736054 100644
--- a/.ci_support/linux_64_c_compiler_version13cuda_compiler_version12.6cxx_compiler_version13python3.12.____cpython.yaml
+++ b/.ci_support/linux_64_cuda_compiler_version12.9python3.12.____cpython.yaml
@@ -1,7 +1,7 @@
 c_compiler:
 - gcc
 c_compiler_version:
-- '13'
+- '14'
 c_stdlib:
 - sysroot
 c_stdlib_version:
@@ -15,15 +15,15 @@ channel_targets:
 cuda_compiler:
 - cuda-nvcc
 cuda_compiler_version:
-- '12.6'
+- '12.9'
 cxx_compiler:
 - gxx
 cxx_compiler_version:
-- '13'
+- '14'
 docker_image:
 - quay.io/condaforge/linux-anvil-x86_64:alma9
 github_actions_labels:
-- cirun-openstack-cpu-xlarge
+- cirun-openstack-cpu-4xlarge
 pin_run_as_build:
   python:
     min_pin: x.x
diff --git a/.ci_support/linux_64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.10.____cpython.yaml b/.ci_support/linux_64_cuda_compiler_versionNonepython3.10.____cpython.yaml
similarity index 95%
rename from .ci_support/linux_64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.10.____cpython.yaml
rename to .ci_support/linux_64_cuda_compiler_versionNonepython3.10.____cpython.yaml
index c7421d9..9fa1fd2 100644
--- a/.ci_support/linux_64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.10.____cpython.yaml
+++ b/.ci_support/linux_64_cuda_compiler_versionNonepython3.10.____cpython.yaml
@@ -23,7 +23,7 @@ cxx_compiler_version:
 docker_image:
 - quay.io/condaforge/linux-anvil-x86_64:alma9
 github_actions_labels:
-- cirun-openstack-cpu-xlarge
+- cirun-openstack-cpu-4xlarge
 pin_run_as_build:
   python:
     min_pin: x.x
diff --git a/.ci_support/linux_64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.11.____cpython.yaml b/.ci_support/linux_64_cuda_compiler_versionNonepython3.11.____cpython.yaml
similarity index 95%
rename from .ci_support/linux_64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.11.____cpython.yaml
rename to .ci_support/linux_64_cuda_compiler_versionNonepython3.11.____cpython.yaml
index f671561..a16f769 100644
--- a/.ci_support/linux_64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.11.____cpython.yaml
+++ b/.ci_support/linux_64_cuda_compiler_versionNonepython3.11.____cpython.yaml
@@ -23,7 +23,7 @@ cxx_compiler_version:
 docker_image:
 - quay.io/condaforge/linux-anvil-x86_64:alma9
 github_actions_labels:
-- cirun-openstack-cpu-xlarge
+- cirun-openstack-cpu-4xlarge
 pin_run_as_build:
   python:
     min_pin: x.x
diff --git a/.ci_support/linux_64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.12.____cpython.yaml b/.ci_support/linux_64_cuda_compiler_versionNonepython3.12.____cpython.yaml
similarity index 95%
rename from .ci_support/linux_64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.12.____cpython.yaml
rename to .ci_support/linux_64_cuda_compiler_versionNonepython3.12.____cpython.yaml
index 068522c..c1ad00a 100644
--- a/.ci_support/linux_64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.12.____cpython.yaml
+++ b/.ci_support/linux_64_cuda_compiler_versionNonepython3.12.____cpython.yaml
@@ -23,7 +23,7 @@ cxx_compiler_version:
 docker_image:
 - quay.io/condaforge/linux-anvil-x86_64:alma9
 github_actions_labels:
-- cirun-openstack-cpu-xlarge
+- cirun-openstack-cpu-4xlarge
 pin_run_as_build:
   python:
     min_pin: x.x
diff --git a/.ci_support/linux_aarch64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.10.____cpython.yaml b/.ci_support/linux_aarch64_cuda_compiler_versionNonepython3.10.____cpython.yaml
similarity index 95%
rename from .ci_support/linux_aarch64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.10.____cpython.yaml
rename to .ci_support/linux_aarch64_cuda_compiler_versionNonepython3.10.____cpython.yaml
index 043d12b..05f5233 100644
--- a/.ci_support/linux_aarch64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.10.____cpython.yaml
+++ b/.ci_support/linux_aarch64_cuda_compiler_versionNonepython3.10.____cpython.yaml
@@ -23,7 +23,7 @@ cxx_compiler_version:
 docker_image:
 - quay.io/condaforge/linux-anvil-x86_64:alma9
 github_actions_labels:
-- cirun-openstack-cpu-xlarge
+- cirun-openstack-cpu-4xlarge
 pin_run_as_build:
   python:
     min_pin: x.x
diff --git a/.ci_support/linux_aarch64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.11.____cpython.yaml b/.ci_support/linux_aarch64_cuda_compiler_versionNonepython3.11.____cpython.yaml
similarity index 95%
rename from .ci_support/linux_aarch64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.11.____cpython.yaml
rename to .ci_support/linux_aarch64_cuda_compiler_versionNonepython3.11.____cpython.yaml
index 263f911..c0aee0d 100644
--- a/.ci_support/linux_aarch64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.11.____cpython.yaml
+++ b/.ci_support/linux_aarch64_cuda_compiler_versionNonepython3.11.____cpython.yaml
@@ -23,7 +23,7 @@ cxx_compiler_version:
 docker_image:
 - quay.io/condaforge/linux-anvil-x86_64:alma9
 github_actions_labels:
-- cirun-openstack-cpu-xlarge
+- cirun-openstack-cpu-4xlarge
 pin_run_as_build:
   python:
     min_pin: x.x
diff --git a/.ci_support/linux_aarch64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.12.____cpython.yaml b/.ci_support/linux_aarch64_cuda_compiler_versionNonepython3.12.____cpython.yaml
similarity index 95%
rename from .ci_support/linux_aarch64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.12.____cpython.yaml
rename to .ci_support/linux_aarch64_cuda_compiler_versionNonepython3.12.____cpython.yaml
index 7a36782..e0c1d91 100644
--- a/.ci_support/linux_aarch64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.12.____cpython.yaml
+++ b/.ci_support/linux_aarch64_cuda_compiler_versionNonepython3.12.____cpython.yaml
@@ -23,7 +23,7 @@ cxx_compiler_version:
 docker_image:
 - quay.io/condaforge/linux-anvil-x86_64:alma9
 github_actions_labels:
-- cirun-openstack-cpu-xlarge
+- cirun-openstack-cpu-4xlarge
 pin_run_as_build:
   python:
     min_pin: x.x
diff --git a/.ci_support/migrations/cuda129.yaml b/.ci_support/migrations/cuda129.yaml
new file mode 100644
index 0000000..5074cd1
--- /dev/null
+++ b/.ci_support/migrations/cuda129.yaml
@@ -0,0 +1,57 @@
+migrator_ts: 1738229377
+__migrator:
+  kind:
+    version
+  migration_number:
+    1
+  build_number:
+    1
+  paused: false
+  override_cbc_keys:
+    - cuda_compiler_stub
+  check_solvable: false
+  primary_key: cuda_compiler_version
+  ordering:
+    cuda_compiler_version:
+      - 12.4
+      - 12.6
+      - 12.8
+      - None
+      - 12.9
+      # to allow manual opt-in for CUDA 11.8, see
+      # https://github.com/conda-forge/conda-forge-pinning-feedstock/pull/7472
+      # must be last due to how cuda_compiler ordering in that migrator works
+      - 11.8
+  commit_message: |
+    Upgrade to CUDA 12.9
+    
+    CUDA 12.8 added support for architectures `sm_100`, `sm_101` and `sm_120`,
+    while CUDA 12.9 further added `sm_103` and `sm_121`. To build for these,
+    maintainers will need to modify their existing list of specified architectures
+    (e.g. `CMAKE_CUDA_ARCHITECTURES`, `TORCH_CUDA_ARCH_LIST`, etc.)
+    for their package. A good balance between broad support and storage
+    footprint (resp. compilation time) is to add `sm_100` and `sm_120`.
+    
+    Since CUDA 12.8, the conda-forge nvcc package now sets `CUDAARCHS` and
+    `TORCH_CUDA_ARCH_LIST` in its activation script to a string containing all
+    of the supported real architectures plus the virtual architecture of the
+    latest. Recipes for packages who use these variables to control their build
+    but do not want to build for all supported architectures will need to override
+    these variables in their build script.
+    
+    ref: https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html#new-features
+
+cuda_compiler_version:         # [((linux and (x86_64 or aarch64)) or win64) and os.environ.get("CF_CUDA_ENABLED", "False") == "True"]
+  - 12.9                       # [((linux and (x86_64 or aarch64)) or win64) and os.environ.get("CF_CUDA_ENABLED", "False") == "True"]
+
+cuda_compiler_version_min:     # [((linux and (x86_64 or aarch64)) or win64) and os.environ.get("CF_CUDA_ENABLED", "False") == "True"]
+  - 12.9                       # [((linux and (x86_64 or aarch64)) or win64) and os.environ.get("CF_CUDA_ENABLED", "False") == "True"]
+
+c_compiler_version:            # [(linux and (x86_64 or aarch64)) and os.environ.get("CF_CUDA_ENABLED", "False") == "True"]
+  - 14                         # [(linux and (x86_64 or aarch64)) and os.environ.get("CF_CUDA_ENABLED", "False") == "True"]
+
+cxx_compiler_version:          # [(linux and (x86_64 or aarch64)) and os.environ.get("CF_CUDA_ENABLED", "False") == "True"]
+  - 14                         # [(linux and (x86_64 or aarch64)) and os.environ.get("CF_CUDA_ENABLED", "False") == "True"]
+
+fortran_compiler_version:      # [(linux and (x86_64 or aarch64)) and os.environ.get("CF_CUDA_ENABLED", "False") == "True"]
+  - 14                         # [(linux and (x86_64 or aarch64)) and os.environ.get("CF_CUDA_ENABLED", "False") == "True"]
diff --git a/.github/workflows/conda-build.yml b/.github/workflows/conda-build.yml
index dd79ac4..02674a9 100644
--- a/.github/workflows/conda-build.yml
+++ b/.github/workflows/conda-build.yml
@@ -21,50 +21,50 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          - CONFIG: linux_64_c_compiler_version13cuda_compiler_version12.6cxx_compiler_version13python3.10.____cpython
+          - CONFIG: linux_64_cuda_compiler_version12.9python3.10.____cpython
             UPLOAD_PACKAGES: True
             os: ubuntu
-            runs_on: ['cirun-openstack-cpu-xlarge--${{ github.run_id }}-linux_64_c_compiler_version13cuda_compil_h351c87d5', 'linux', 'x64', 'self-hosted']
+            runs_on: ['cirun-openstack-cpu-4xlarge--${{ github.run_id }}-linux_64_cuda_compiler_version12.9python_h6e6b5039', 'linux', 'x64', 'self-hosted']
             DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9
-          - CONFIG: linux_64_c_compiler_version13cuda_compiler_version12.6cxx_compiler_version13python3.11.____cpython
+          - CONFIG: linux_64_cuda_compiler_version12.9python3.11.____cpython
             UPLOAD_PACKAGES: True
             os: ubuntu
-            runs_on: ['cirun-openstack-cpu-xlarge--${{ github.run_id }}-linux_64_c_compiler_version13cuda_compil_ha71f7a93', 'linux', 'x64', 'self-hosted']
+            runs_on: ['cirun-openstack-cpu-4xlarge--${{ github.run_id }}-linux_64_cuda_compiler_version12.9python_h9224ed27', 'linux', 'x64', 'self-hosted']
             DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9
-          - CONFIG: linux_64_c_compiler_version13cuda_compiler_version12.6cxx_compiler_version13python3.12.____cpython
+          - CONFIG: linux_64_cuda_compiler_version12.9python3.12.____cpython
             UPLOAD_PACKAGES: True
             os: ubuntu
-            runs_on: ['cirun-openstack-cpu-xlarge--${{ github.run_id }}-linux_64_c_compiler_version13cuda_compil_h5eb56615', 'linux', 'x64', 'self-hosted']
+            runs_on: ['cirun-openstack-cpu-4xlarge--${{ github.run_id }}-linux_64_cuda_compiler_version12.9python_h33f95ef4', 'linux', 'x64', 'self-hosted']
             DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9
-          - CONFIG: linux_64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.10.____cpython
+          - CONFIG: linux_64_cuda_compiler_versionNonepython3.10.____cpython
             UPLOAD_PACKAGES: True
             os: ubuntu
-            runs_on: ['cirun-openstack-cpu-xlarge--${{ github.run_id }}-linux_64_c_compiler_version14cuda_compil_h85f9ff1d', 'linux', 'x64', 'self-hosted']
+            runs_on: ['cirun-openstack-cpu-4xlarge--${{ github.run_id }}-linux_64_cuda_compiler_versionNonepython_hb6a3b480', 'linux', 'x64', 'self-hosted']
             DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9
-          - CONFIG: linux_64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.11.____cpython
+          - CONFIG: linux_64_cuda_compiler_versionNonepython3.11.____cpython
             UPLOAD_PACKAGES: True
             os: ubuntu
-            runs_on: ['cirun-openstack-cpu-xlarge--${{ github.run_id }}-linux_64_c_compiler_version14cuda_compil_h5acdecea', 'linux', 'x64', 'self-hosted']
+            runs_on: ['cirun-openstack-cpu-4xlarge--${{ github.run_id }}-linux_64_cuda_compiler_versionNonepython_h3c1a96fc', 'linux', 'x64', 'self-hosted']
             DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9
-          - CONFIG: linux_64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.12.____cpython
+          - CONFIG: linux_64_cuda_compiler_versionNonepython3.12.____cpython
             UPLOAD_PACKAGES: True
             os: ubuntu
-            runs_on: ['cirun-openstack-cpu-xlarge--${{ github.run_id }}-linux_64_c_compiler_version14cuda_compil_h678875e6', 'linux', 'x64', 'self-hosted']
+            runs_on: ['cirun-openstack-cpu-4xlarge--${{ github.run_id }}-linux_64_cuda_compiler_versionNonepython_h7883cd14', 'linux', 'x64', 'self-hosted']
             DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9
-          - CONFIG: linux_aarch64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.10.____cpython
+          - CONFIG: linux_aarch64_cuda_compiler_versionNonepython3.10.____cpython
             UPLOAD_PACKAGES: True
             os: ubuntu
-            runs_on: ['cirun-openstack-cpu-xlarge--${{ github.run_id }}-linux_aarch64_c_compiler_version14cuda_c_h519023ab', 'linux', 'x64', 'self-hosted']
+            runs_on: ['cirun-openstack-cpu-4xlarge--${{ github.run_id }}-linux_aarch64_cuda_compiler_versionNonep_h9166da89', 'linux', 'x64', 'self-hosted']
             DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9
-          - CONFIG: linux_aarch64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.11.____cpython
+          - CONFIG: linux_aarch64_cuda_compiler_versionNonepython3.11.____cpython
             UPLOAD_PACKAGES: True
             os: ubuntu
-            runs_on: ['cirun-openstack-cpu-xlarge--${{ github.run_id }}-linux_aarch64_c_compiler_version14cuda_c_h0bce4f42', 'linux', 'x64', 'self-hosted']
+            runs_on: ['cirun-openstack-cpu-4xlarge--${{ github.run_id }}-linux_aarch64_cuda_compiler_versionNonep_h61f3f778', 'linux', 'x64', 'self-hosted']
             DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9
-          - CONFIG: linux_aarch64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.12.____cpython
+          - CONFIG: linux_aarch64_cuda_compiler_versionNonepython3.12.____cpython
             UPLOAD_PACKAGES: True
             os: ubuntu
-            runs_on: ['cirun-openstack-cpu-xlarge--${{ github.run_id }}-linux_aarch64_c_compiler_version14cuda_c_hc176500d', 'linux', 'x64', 'self-hosted']
+            runs_on: ['cirun-openstack-cpu-4xlarge--${{ github.run_id }}-linux_aarch64_cuda_compiler_versionNonep_h72ca1b73', 'linux', 'x64', 'self-hosted']
             DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9
     steps:
 
diff --git a/README.md b/README.md
index 4af119e..f9be0ea 100644
--- a/README.md
+++ b/README.md
@@ -31,66 +31,66 @@ Current build status
         <table>
           <thead><tr><th>Variant</th><th>Status</th></tr></thead>
           <tbody><tr>
-              <td>linux_64_c_compiler_version13cuda_compiler_version12.6cxx_compiler_version13python3.10.____cpython</td>
+              <td>linux_64_cuda_compiler_version12.9python3.10.____cpython</td>
               <td>
                 <a href="https://dev.azure.com/conda-forge/feedstock-builds/_build/latest?definitionId=26279&branchName=main">
-                  <img src="https://dev.azure.com/conda-forge/feedstock-builds/_apis/build/status/vllm-feedstock?branchName=main&jobName=linux&configuration=linux%20linux_64_c_compiler_version13cuda_compiler_version12.6cxx_compiler_version13python3.10.____cpython" alt="variant">
+                  <img src="https://dev.azure.com/conda-forge/feedstock-builds/_apis/build/status/vllm-feedstock?branchName=main&jobName=linux&configuration=linux%20linux_64_cuda_compiler_version12.9python3.10.____cpython" alt="variant">
                 </a>
               </td>
             </tr><tr>
-              <td>linux_64_c_compiler_version13cuda_compiler_version12.6cxx_compiler_version13python3.11.____cpython</td>
+              <td>linux_64_cuda_compiler_version12.9python3.11.____cpython</td>
               <td>
                 <a href="https://dev.azure.com/conda-forge/feedstock-builds/_build/latest?definitionId=26279&branchName=main">
-                  <img src="https://dev.azure.com/conda-forge/feedstock-builds/_apis/build/status/vllm-feedstock?branchName=main&jobName=linux&configuration=linux%20linux_64_c_compiler_version13cuda_compiler_version12.6cxx_compiler_version13python3.11.____cpython" alt="variant">
+                  <img src="https://dev.azure.com/conda-forge/feedstock-builds/_apis/build/status/vllm-feedstock?branchName=main&jobName=linux&configuration=linux%20linux_64_cuda_compiler_version12.9python3.11.____cpython" alt="variant">
                 </a>
               </td>
             </tr><tr>
-              <td>linux_64_c_compiler_version13cuda_compiler_version12.6cxx_compiler_version13python3.12.____cpython</td>
+              <td>linux_64_cuda_compiler_version12.9python3.12.____cpython</td>
               <td>
                 <a href="https://dev.azure.com/conda-forge/feedstock-builds/_build/latest?definitionId=26279&branchName=main">
-                  <img src="https://dev.azure.com/conda-forge/feedstock-builds/_apis/build/status/vllm-feedstock?branchName=main&jobName=linux&configuration=linux%20linux_64_c_compiler_version13cuda_compiler_version12.6cxx_compiler_version13python3.12.____cpython" alt="variant">
+                  <img src="https://dev.azure.com/conda-forge/feedstock-builds/_apis/build/status/vllm-feedstock?branchName=main&jobName=linux&configuration=linux%20linux_64_cuda_compiler_version12.9python3.12.____cpython" alt="variant">
                 </a>
               </td>
             </tr><tr>
-              <td>linux_64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.10.____cpython</td>
+              <td>linux_64_cuda_compiler_versionNonepython3.10.____cpython</td>
               <td>
                 <a href="https://dev.azure.com/conda-forge/feedstock-builds/_build/latest?definitionId=26279&branchName=main">
-                  <img src="https://dev.azure.com/conda-forge/feedstock-builds/_apis/build/status/vllm-feedstock?branchName=main&jobName=linux&configuration=linux%20linux_64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.10.____cpython" alt="variant">
+                  <img src="https://dev.azure.com/conda-forge/feedstock-builds/_apis/build/status/vllm-feedstock?branchName=main&jobName=linux&configuration=linux%20linux_64_cuda_compiler_versionNonepython3.10.____cpython" alt="variant">
                 </a>
               </td>
             </tr><tr>
-              <td>linux_64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.11.____cpython</td>
+              <td>linux_64_cuda_compiler_versionNonepython3.11.____cpython</td>
               <td>
                 <a href="https://dev.azure.com/conda-forge/feedstock-builds/_build/latest?definitionId=26279&branchName=main">
-                  <img src="https://dev.azure.com/conda-forge/feedstock-builds/_apis/build/status/vllm-feedstock?branchName=main&jobName=linux&configuration=linux%20linux_64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.11.____cpython" alt="variant">
+                  <img src="https://dev.azure.com/conda-forge/feedstock-builds/_apis/build/status/vllm-feedstock?branchName=main&jobName=linux&configuration=linux%20linux_64_cuda_compiler_versionNonepython3.11.____cpython" alt="variant">
                 </a>
               </td>
             </tr><tr>
-              <td>linux_64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.12.____cpython</td>
+              <td>linux_64_cuda_compiler_versionNonepython3.12.____cpython</td>
               <td>
                 <a href="https://dev.azure.com/conda-forge/feedstock-builds/_build/latest?definitionId=26279&branchName=main">
-                  <img src="https://dev.azure.com/conda-forge/feedstock-builds/_apis/build/status/vllm-feedstock?branchName=main&jobName=linux&configuration=linux%20linux_64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.12.____cpython" alt="variant">
+                  <img src="https://dev.azure.com/conda-forge/feedstock-builds/_apis/build/status/vllm-feedstock?branchName=main&jobName=linux&configuration=linux%20linux_64_cuda_compiler_versionNonepython3.12.____cpython" alt="variant">
                 </a>
               </td>
             </tr><tr>
-              <td>linux_aarch64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.10.____cpython</td>
+              <td>linux_aarch64_cuda_compiler_versionNonepython3.10.____cpython</td>
               <td>
                 <a href="https://dev.azure.com/conda-forge/feedstock-builds/_build/latest?definitionId=26279&branchName=main">
-                  <img src="https://dev.azure.com/conda-forge/feedstock-builds/_apis/build/status/vllm-feedstock?branchName=main&jobName=linux&configuration=linux%20linux_aarch64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.10.____cpython" alt="variant">
+                  <img src="https://dev.azure.com/conda-forge/feedstock-builds/_apis/build/status/vllm-feedstock?branchName=main&jobName=linux&configuration=linux%20linux_aarch64_cuda_compiler_versionNonepython3.10.____cpython" alt="variant">
                 </a>
               </td>
             </tr><tr>
-              <td>linux_aarch64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.11.____cpython</td>
+              <td>linux_aarch64_cuda_compiler_versionNonepython3.11.____cpython</td>
               <td>
                 <a href="https://dev.azure.com/conda-forge/feedstock-builds/_build/latest?definitionId=26279&branchName=main">
-                  <img src="https://dev.azure.com/conda-forge/feedstock-builds/_apis/build/status/vllm-feedstock?branchName=main&jobName=linux&configuration=linux%20linux_aarch64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.11.____cpython" alt="variant">
+                  <img src="https://dev.azure.com/conda-forge/feedstock-builds/_apis/build/status/vllm-feedstock?branchName=main&jobName=linux&configuration=linux%20linux_aarch64_cuda_compiler_versionNonepython3.11.____cpython" alt="variant">
                 </a>
               </td>
             </tr><tr>
-              <td>linux_aarch64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.12.____cpython</td>
+              <td>linux_aarch64_cuda_compiler_versionNonepython3.12.____cpython</td>
               <td>
                 <a href="https://dev.azure.com/conda-forge/feedstock-builds/_build/latest?definitionId=26279&branchName=main">
-                  <img src="https://dev.azure.com/conda-forge/feedstock-builds/_apis/build/status/vllm-feedstock?branchName=main&jobName=linux&configuration=linux%20linux_aarch64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.12.____cpython" alt="variant">
+                  <img src="https://dev.azure.com/conda-forge/feedstock-builds/_apis/build/status/vllm-feedstock?branchName=main&jobName=linux&configuration=linux%20linux_aarch64_cuda_compiler_versionNonepython3.12.____cpython" alt="variant">
                 </a>
               </td>
             </tr><tr>
diff --git a/recipe/conda_build_config.yaml b/recipe/conda_build_config.yaml
index 02b6f4c..21d8462 100644
--- a/recipe/conda_build_config.yaml
+++ b/recipe/conda_build_config.yaml
@@ -1,3 +1,3 @@
 # https://github.com/conda-forge/.cirun
 github_actions_labels:  # [linux]
-- cirun-openstack-cpu-xlarge  # [linux]
+- cirun-openstack-cpu-4xlarge  # [linux]
diff --git a/recipe/recipe.yaml b/recipe/recipe.yaml
index b5d53e0..99b5002 100644
--- a/recipe/recipe.yaml
+++ b/recipe/recipe.yaml
@@ -12,29 +12,29 @@ package:
   version: ${{ version }}
 
 source:
-- url: https://pypi.org/packages/source/v/vllm/vllm-${{ version }}.tar.gz
-  sha256: 6b0d855ea8ba18d76364c9b82ea94bfcaa9c9e724055438b5733e4716ed104e1
-  patches:
-  - patches/0001-Search-for-the-CUDA-package-in-CMakeLists.patch
-  - patches/0002-Remove-ninja-pip-requirement.patch
-  - if: linux
-    then:
-    - patches/0003-Manually-define-gettid.patch
-  - if: is_cross_compiling
-    then:
-    - patches/0004-Factor-in-the-cmake-args-when-building-e.g.-for-cros.patch
-  - if: aarch64
-    then:
-    - patches/0005-Configure-build-to-target-aarch64-even-though-CMake-.patch
-  - patches/0006-Use-PyTorch-2.7.0-to-keep-version-number-consistent-.patch
-  target_directory: vllm
-# Needs to be vendored because vLLM uses a modified version of the flash attention primitives that supports KV-caching.
-- url: https://github.com/vllm-project/flash-attention/archive/1c2624e53c078854e0637ee566c72fe2107e75f4.tar.gz
-  sha256: cca19d7e53af08aa6d6f0c4fd9dd78d30314497e38fb03b1368b3d5a77ab4b5c
-  target_directory: flash-attention
+  - url: https://pypi.org/packages/source/v/vllm/vllm-${{ version }}.tar.gz
+    sha256: 6b0d855ea8ba18d76364c9b82ea94bfcaa9c9e724055438b5733e4716ed104e1
+    patches:
+      - patches/0001-Search-for-the-CUDA-package-in-CMakeLists.patch
+      - patches/0002-Remove-ninja-pip-requirement.patch
+      - if: linux
+        then:
+          - patches/0003-Manually-define-gettid.patch
+      - if: is_cross_compiling
+        then:
+          - patches/0004-Factor-in-the-cmake-args-when-building-e.g.-for-cros.patch
+      - if: aarch64
+        then:
+          - patches/0005-Configure-build-to-target-aarch64-even-though-CMake-.patch
+      - patches/0006-Use-PyTorch-2.7.0-to-keep-version-number-consistent-.patch
+    target_directory: vllm
+  # Needs to be vendored because vLLM uses a modified version of the flash attention primitives that supports KV-caching.
+  - url: https://github.com/vllm-project/flash-attention/archive/1c2624e53c078854e0637ee566c72fe2107e75f4.tar.gz
+    sha256: cca19d7e53af08aa6d6f0c4fd9dd78d30314497e38fb03b1368b3d5a77ab4b5c
+    target_directory: flash-attention
 
 build:
-  number: 0
+  number: 1
   string: ${{ string_prefix }}py${{ python | version_to_buildstring }}h${{ hash }}_${{ build_number }}
   script:
     - sed -i.bak 's/set(TORCH_SUPPORTED_VERSION_CUDA "2.4.0")/set(TORCH_SUPPORTED_VERSION_CUDA "${{ pytorch_version }}")/g' flash-attention/CMakeLists.txt
@@ -45,177 +45,185 @@ build:
     - ln -s $PREFIX/include $SRC_DIR/vllm/third_party/NVTX/c/include
     - export VERBOSE=1
     - export VLLM_TARGET_DEVICE=${{ vllm_target_device }}
+    - if: use_cuda
+      then:
+        - export TORCH_NVCC_FLAGS="-Xfatbin -compress-all"
+        # Building vLLM is memory-intensive: see https://github.com/Dao-AILab/flash-attention/issues/1043#issuecomment-2770635000
+        - export MAX_JOBS=3
+        # Override the CUDA architectures configured in the conda-forge nvcc package: https://github.com/conda-forge/cuda-nvcc-feedstock/blob/7843e9f1b9ea6bc555cd70c247d774189fc34110/recipe/conda_build_config.yaml#L21-L28
+        - export CUDAARCHS="50-real;60-real;70-real;75-real;80-real;86-real;89-real;90a-real;100f-real;120a-real"
+        - export TORCH_CUDA_ARCH_LIST="5.0;6.0;7.0;7.5;8.0;8.6;8.9;9.0;10.0;12.0+PTX"
     # CMake is unable to automatically locate the Python include dir for aarch64 for some reason
     - if: aarch64
       then:
-      - export CMAKE_ARGS="$CMAKE_ARGS -DPython_INCLUDE_DIR="$(python -c 'import sysconfig; print(sysconfig.get_path("include"))')""
+        - export CMAKE_ARGS="$CMAKE_ARGS -DPython_INCLUDE_DIR="$(python -c 'import sysconfig; print(sysconfig.get_path("include"))')""
     - ${{ PYTHON }} -m pip install . -vv --no-build-isolation --no-deps
 
   python:
     entry_points:
-    - vllm = vllm.entrypoints.cli.main:main
+      - vllm = vllm.entrypoints.cli.main:main
 
   skip:
     - win
     - osx and x86_64
-    # conda-forge torchaudio dropped support for Python 3.9 (llvmlite fix only available for Python >=3.10)
-    # Also, we don't have Python 3.13 support until https://github.com/vllm-project/vllm/commit/21dce80ea96bcf033d159c0f952fb274567b315c is released
-    - match(python, "<3.10") or match(python, ">=3.13")
+    # We don't have Python 3.13 support until https://github.com/vllm-project/vllm/commit/21dce80ea96bcf033d159c0f952fb274567b315c is released
+    - match(python, ">=3.13")
     - aarch64 and use_cuda  # Still have issues locating CUDA for the aarch64 build
-    # - match(python, "!=3.12")  # Until the build works
+    # - not use_cuda  # Just build CUDA for now
+    # - match(python, "!=3.10")  # Until all the builds succeed
 
 requirements:
   build:
-  - cmake >=3.26.1
-  - git
-  - ninja
-  - zlib
-  - ${{ stdlib('c') }}
-  - ${{ compiler('c') }}
-  - ${{ compiler('cxx') }}
-  - if: use_cuda
-    then:
-    - ${{ compiler('cuda') }}
-  - if: is_cross_compiling
-    then:
+    - cmake >=3.26.1
+    - git
+    - ninja
+    - zlib
+    - ${{ stdlib('c') }}
+    - ${{ compiler('c') }}
+    - ${{ compiler('cxx') }}
+    - if: use_cuda
+      then:
+        - ${{ compiler('cuda') }}
+    - if: is_cross_compiling
+      then:
+        - python
+        - cross-python_${{ target_platform }}
+        - pytorch ==${{ pytorch_version }}
+        - if: use_cuda
+          then:
+            - pytorch * [build=cuda*]
+  host:
     - python
-    - cross-python_${{ target_platform }}
+    - jinja2 >=3.1.6
+    - packaging >=24.2
+    - pip
     - pytorch ==${{ pytorch_version }}
+    - regex
+    - setuptools >=77.0.3,<80.0.0
+    - setuptools-scm >=8
+    - wheel
+    - if: linux
+      then:
+        - libnuma
     - if: use_cuda
       then:
-      - pytorch * [build=cuda*]
-  host:
-  - python
-  - jinja2 >=3.1.6
-  - packaging >=24.2
-  - pip
-  - pytorch ==${{ pytorch_version }}
-  - regex
-  - setuptools >=77.0.3,<80.0.0
-  - setuptools-scm >=8
-  - wheel
-  - if: linux
-    then:
-    - libnuma
-  - if: use_cuda
-    then:
-    - pytorch * [build=cuda*]
-    - cuda
-    - cuda-cudart-dev
-    - cuda-nvrtc-dev
-    - cuda-nvrtc-static
-    - cuda-version ==${{ cuda_compiler_version }}
-    - cutlass <4  # Cutlass 4 introduces some major changes to the API that causes it to not compile
-    - libcublas-dev
-    - nvtx-c
+        - pytorch * [build=cuda*]
+        - cuda
+        - cuda-cudart-dev
+        - cuda-nvrtc-dev
+        - cuda-nvrtc-static
+        - cuda-version ==${{ cuda_compiler_version }}
+        - cutlass <4  # Cutlass 4 introduces some major changes to the API that causes it to not compile
+        - libcublas-dev
+        - nvtx-c
   run:
-  - python
-  - aiohttp
-  - blake3
-  - cachetools
-  - cloudpickle
-  - compressed-tensors ==0.10.2
-  - depyf ==0.18.0
-  - einops
-  - fastapi >=0.115.0
-  - filelock >=3.16.1
-  - gguf >=0.13.0
-  - importlib-metadata
-  - hf-xet >=1.1.2,<2.0.0
-  - huggingface_hub >=0.33.0
-  - lark ==1.2.2
-  - lm-format-enforcer >=0.10.11,<0.11
-  - mistral-common >=1.6.2
-  - msgspec
-  - numba ==0.61.2
-  - numpy
-  - openai >=1.52.0,<=1.90.0
-  - opencv >=4.11.0
-  - outlines ==0.1.11
-  - partial-json-parser
-  - pillow
-  - prometheus_client >=0.18.0
-  - prometheus-fastapi-instrumentator >=7.0.0
-  - protobuf
-  - psutil
-  - py-cpuinfo
-  - pybase64
-  - pydantic >=2.10
-  - python-json-logger
-  - pytorch ==${{ pytorch_version }}
-  - pyyaml
-  - pyzmq >=25.0.0
-  - regex
-  - requests >=2.26.0
-  - scipy
-  - sentencepiece
-  - tiktoken >=0.6.0
-  - tokenizers >=0.21.1
-  - tqdm
-  # Newer versions of transformers already define the aimv2 config, so we can't use it for now
-  # See https://github.com/vllm-project/vllm-ascend/issues/2046#issuecomment-3123639101 for more details.
-  # The required fix: https://github.com/vllm-project/vllm/commit/3fc964433a84bad785d9d0656fd56195462321b8
-  - transformers >=4.51.1,<4.54.0
-  - typing_extensions >=4.10
-  - uvicorn-standard
-  - watchfiles
-  - if: x86_64 or arm64 or aarch64
-    then:
-    - llguidance >=0.7.11,< 0.8.0
-    - xgrammar ==0.1.19
-  - if: match(python, ">3.11")
-    then:
-    - six >=1.16.0
-    - setuptools >=77.0.3,<80
-  - if: use_cuda
-    then:
-    - ray-cgraph >=2.43.0,!=2.44
-    - torchaudio ==${{ pytorch_version }}
-    - torchvision ==0.22.0
-    - if: linux64
+    - python
+    - aiohttp
+    - blake3
+    - cachetools
+    - cloudpickle
+    - compressed-tensors ==0.10.2
+    - depyf ==0.18.0
+    - einops
+    - fastapi >=0.115.0
+    - filelock >=3.16.1
+    - gguf >=0.13.0
+    - importlib-metadata
+    - hf-xet >=1.1.2,<2.0.0
+    - huggingface_hub >=0.33.0
+    - lark ==1.2.2
+    - lm-format-enforcer >=0.10.11,<0.11
+    - mistral-common >=1.6.2
+    - msgspec
+    - numba ==0.61.2
+    - numpy
+    - openai >=1.52.0,<=1.90.0
+    - opencv >=4.11.0
+    - outlines ==0.1.11
+    - partial-json-parser
+    - pillow
+    - prometheus_client >=0.18.0
+    - prometheus-fastapi-instrumentator >=7.0.0
+    - protobuf
+    - psutil
+    - py-cpuinfo
+    - pybase64
+    - pydantic >=2.10
+    - python-json-logger
+    - pytorch ==${{ pytorch_version }}
+    - pyyaml
+    - pyzmq >=25.0.0
+    - regex
+    - requests >=2.26.0
+    - scipy
+    - sentencepiece
+    - tiktoken >=0.6.0
+    - tokenizers >=0.21.1
+    - tqdm
+    # Newer versions of transformers already define the aimv2 config, so we can't use it for now
+    # See https://github.com/vllm-project/vllm-ascend/issues/2046#issuecomment-3123639101 for more details.
+    # The required fix: https://github.com/vllm-project/vllm/commit/3fc964433a84bad785d9d0656fd56195462321b8
+    - transformers >=4.51.1,<4.54.0
+    - typing_extensions >=4.10
+    - uvicorn-standard
+    - watchfiles
+    - if: x86_64 or arm64 or aarch64
       then:
-      - xformers ==0.0.30  # platform_system == "Linux" and platform_machine == "x86_64"
-    else:
-    - torchaudio
-    - torchvision
-    - if: x86_64
+        - llguidance >=0.7.11,< 0.8.0
+        - xgrammar ==0.1.19
+    - if: match(python, ">3.11")
       then:
-      - triton ==3.2.0
+        - six >=1.16.0
+        - setuptools >=77.0.3,<80
+    - if: use_cuda
+      then:
+        - ray-cgraph >=2.43.0,!=2.44
+        - torchaudio ==${{ pytorch_version }}
+        - torchvision ==0.22.0
+        - if: linux64
+          then:
+            - xformers ==0.0.30  # platform_system == "Linux" and platform_machine == "x86_64"
+      else:
+        - torchaudio
+        - torchvision
+        - if: x86_64
+          then:
+            - triton ==3.2.0
   run_constraints:
-  # Fixes issue with incompatibility between old `datasets` versions and `pyarrow` v21+
-  # See https://github.com/apache/arrow/issues/47155 for more details.
-  # The required PR is: https://github.com/huggingface/datasets/pull/6404
-  - datasets >=2.15
-  - if: use_cuda
-    then:
-    - pytorch * [build=cuda*]
+    # Fixes issue with incompatibility between old `datasets` versions and `pyarrow` v21+
+    # See https://github.com/apache/arrow/issues/47155 for more details.
+    # The required PR is: https://github.com/huggingface/datasets/pull/6404
+    - datasets >=2.15
+    - if: use_cuda
+      then:
+        - pytorch * [build=cuda*]
   ignore_run_exports:
     from_package:
-    - cuda-nvrtc-dev
-    - libcublas-dev
+      - cuda-nvrtc-dev
+      - libcublas-dev
 tests:
-- python:
-    imports:
-    - vllm
-    - if: linux and use_cuda
+  - python:
+      imports:
+        - vllm
+        - if: linux and use_cuda
+          then:
+            - vllm.vllm_flash_attn
+      pip_check: false
+  - script:
+    # As of vllm v0.9 and later, it seems like libcuda.so.1 is required for the CLI for CUDA builds (stub libraries don't work)
+    # We can't test this on the CPU runners, which is what we're using to build the wheel
+    - if: not use_cuda
       then:
-      - vllm.vllm_flash_attn
-    pip_check: false
-- script:
-  # As of vllm v0.9 and later, it seems like libcuda.so.1 is required for the CLI for CUDA builds (stub libraries don't work)
-  # We can't test this on the CPU runners, which is what we're using to build the wheel
-  - if: not use_cuda
-    then:
-    - vllm --version
-- script:
-    # Pick an arbitrary test to run: some of the other ones rely on a bunch of external packages
-  - pytest ./vllm/tests/core/test_scheduler.py
-  requirements:
-    run:
-    - pytest
-  files:
-    source:
-    - vllm/tests
+        - vllm --version
+  - script:
+      # Pick an arbitrary test to run: some of the other ones rely on a bunch of external packages
+      - pytest ./vllm/tests/core/test_scheduler.py
+    requirements:
+      run:
+        - pytest
+    files:
+      source:
+        - vllm/tests
 
 about:
   homepage: https://github.com/vllm-project/vllm
@@ -223,9 +231,9 @@ about:
   description: Easy, fast, and cheap LLM serving for everyone
   license: Apache-2.0 AND BSD-3-Clause
   license_file:
-  - vllm/LICENSE
-  - flash-attention/LICENSE
-  - LICENSE_CUTLASS.txt
+    - vllm/LICENSE
+    - flash-attention/LICENSE
+    - LICENSE_CUTLASS.txt
   documentation: https://vllm.readthedocs.io/en/latest/
 
 extra: