diff --git a/.azure-pipelines/azure-pipelines-osx.yml b/.azure-pipelines/azure-pipelines-osx.yml index 628d2b5..3a4811f 100755 --- a/.azure-pipelines/azure-pipelines-osx.yml +++ b/.azure-pipelines/azure-pipelines-osx.yml @@ -5,7 +5,7 @@ jobs: - job: osx pool: - vmImage: macOS-13 + vmImage: macOS-15 strategy: matrix: osx_arm64_python3.10.____cpython: diff --git a/.ci_support/linux_64_c_compiler_version13cuda_compiler_version12.6cxx_compiler_version13python3.10.____cpython.yaml b/.ci_support/linux_64_cuda_compiler_version12.9python3.10.____cpython.yaml similarity index 91% rename from .ci_support/linux_64_c_compiler_version13cuda_compiler_version12.6cxx_compiler_version13python3.10.____cpython.yaml rename to .ci_support/linux_64_cuda_compiler_version12.9python3.10.____cpython.yaml index 33d6d5c..abef889 100644 --- a/.ci_support/linux_64_c_compiler_version13cuda_compiler_version12.6cxx_compiler_version13python3.10.____cpython.yaml +++ b/.ci_support/linux_64_cuda_compiler_version12.9python3.10.____cpython.yaml @@ -1,7 +1,7 @@ c_compiler: - gcc c_compiler_version: -- '13' +- '14' c_stdlib: - sysroot c_stdlib_version: @@ -15,15 +15,15 @@ channel_targets: cuda_compiler: - cuda-nvcc cuda_compiler_version: -- '12.6' +- '12.9' cxx_compiler: - gxx cxx_compiler_version: -- '13' +- '14' docker_image: - quay.io/condaforge/linux-anvil-x86_64:alma9 github_actions_labels: -- cirun-openstack-cpu-xlarge +- cirun-openstack-cpu-4xlarge pin_run_as_build: python: min_pin: x.x diff --git a/.ci_support/linux_64_c_compiler_version13cuda_compiler_version12.6cxx_compiler_version13python3.11.____cpython.yaml b/.ci_support/linux_64_cuda_compiler_version12.9python3.11.____cpython.yaml similarity index 91% rename from .ci_support/linux_64_c_compiler_version13cuda_compiler_version12.6cxx_compiler_version13python3.11.____cpython.yaml rename to .ci_support/linux_64_cuda_compiler_version12.9python3.11.____cpython.yaml index 2ad3e17..05f62f4 100644 --- a/.ci_support/linux_64_c_compiler_version13cuda_compiler_version12.6cxx_compiler_version13python3.11.____cpython.yaml +++ b/.ci_support/linux_64_cuda_compiler_version12.9python3.11.____cpython.yaml @@ -1,7 +1,7 @@ c_compiler: - gcc c_compiler_version: -- '13' +- '14' c_stdlib: - sysroot c_stdlib_version: @@ -15,15 +15,15 @@ channel_targets: cuda_compiler: - cuda-nvcc cuda_compiler_version: -- '12.6' +- '12.9' cxx_compiler: - gxx cxx_compiler_version: -- '13' +- '14' docker_image: - quay.io/condaforge/linux-anvil-x86_64:alma9 github_actions_labels: -- cirun-openstack-cpu-xlarge +- cirun-openstack-cpu-4xlarge pin_run_as_build: python: min_pin: x.x diff --git a/.ci_support/linux_64_c_compiler_version13cuda_compiler_version12.6cxx_compiler_version13python3.12.____cpython.yaml b/.ci_support/linux_64_cuda_compiler_version12.9python3.12.____cpython.yaml similarity index 91% rename from .ci_support/linux_64_c_compiler_version13cuda_compiler_version12.6cxx_compiler_version13python3.12.____cpython.yaml rename to .ci_support/linux_64_cuda_compiler_version12.9python3.12.____cpython.yaml index e9e8299..0736054 100644 --- a/.ci_support/linux_64_c_compiler_version13cuda_compiler_version12.6cxx_compiler_version13python3.12.____cpython.yaml +++ b/.ci_support/linux_64_cuda_compiler_version12.9python3.12.____cpython.yaml @@ -1,7 +1,7 @@ c_compiler: - gcc c_compiler_version: -- '13' +- '14' c_stdlib: - sysroot c_stdlib_version: @@ -15,15 +15,15 @@ channel_targets: cuda_compiler: - cuda-nvcc cuda_compiler_version: -- '12.6' +- '12.9' cxx_compiler: - gxx cxx_compiler_version: -- '13' +- '14' docker_image: - quay.io/condaforge/linux-anvil-x86_64:alma9 github_actions_labels: -- cirun-openstack-cpu-xlarge +- cirun-openstack-cpu-4xlarge pin_run_as_build: python: min_pin: x.x diff --git a/.ci_support/linux_64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.10.____cpython.yaml b/.ci_support/linux_64_cuda_compiler_versionNonepython3.10.____cpython.yaml similarity index 95% rename from .ci_support/linux_64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.10.____cpython.yaml rename to .ci_support/linux_64_cuda_compiler_versionNonepython3.10.____cpython.yaml index c7421d9..9fa1fd2 100644 --- a/.ci_support/linux_64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.10.____cpython.yaml +++ b/.ci_support/linux_64_cuda_compiler_versionNonepython3.10.____cpython.yaml @@ -23,7 +23,7 @@ cxx_compiler_version: docker_image: - quay.io/condaforge/linux-anvil-x86_64:alma9 github_actions_labels: -- cirun-openstack-cpu-xlarge +- cirun-openstack-cpu-4xlarge pin_run_as_build: python: min_pin: x.x diff --git a/.ci_support/linux_64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.11.____cpython.yaml b/.ci_support/linux_64_cuda_compiler_versionNonepython3.11.____cpython.yaml similarity index 95% rename from .ci_support/linux_64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.11.____cpython.yaml rename to .ci_support/linux_64_cuda_compiler_versionNonepython3.11.____cpython.yaml index f671561..a16f769 100644 --- a/.ci_support/linux_64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.11.____cpython.yaml +++ b/.ci_support/linux_64_cuda_compiler_versionNonepython3.11.____cpython.yaml @@ -23,7 +23,7 @@ cxx_compiler_version: docker_image: - quay.io/condaforge/linux-anvil-x86_64:alma9 github_actions_labels: -- cirun-openstack-cpu-xlarge +- cirun-openstack-cpu-4xlarge pin_run_as_build: python: min_pin: x.x diff --git a/.ci_support/linux_64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.12.____cpython.yaml b/.ci_support/linux_64_cuda_compiler_versionNonepython3.12.____cpython.yaml similarity index 95% rename from .ci_support/linux_64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.12.____cpython.yaml rename to .ci_support/linux_64_cuda_compiler_versionNonepython3.12.____cpython.yaml index 068522c..c1ad00a 100644 --- a/.ci_support/linux_64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.12.____cpython.yaml +++ b/.ci_support/linux_64_cuda_compiler_versionNonepython3.12.____cpython.yaml @@ -23,7 +23,7 @@ cxx_compiler_version: docker_image: - quay.io/condaforge/linux-anvil-x86_64:alma9 github_actions_labels: -- cirun-openstack-cpu-xlarge +- cirun-openstack-cpu-4xlarge pin_run_as_build: python: min_pin: x.x diff --git a/.ci_support/linux_aarch64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.10.____cpython.yaml b/.ci_support/linux_aarch64_cuda_compiler_versionNonepython3.10.____cpython.yaml similarity index 95% rename from .ci_support/linux_aarch64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.10.____cpython.yaml rename to .ci_support/linux_aarch64_cuda_compiler_versionNonepython3.10.____cpython.yaml index 043d12b..05f5233 100644 --- a/.ci_support/linux_aarch64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.10.____cpython.yaml +++ b/.ci_support/linux_aarch64_cuda_compiler_versionNonepython3.10.____cpython.yaml @@ -23,7 +23,7 @@ cxx_compiler_version: docker_image: - quay.io/condaforge/linux-anvil-x86_64:alma9 github_actions_labels: -- cirun-openstack-cpu-xlarge +- cirun-openstack-cpu-4xlarge pin_run_as_build: python: min_pin: x.x diff --git a/.ci_support/linux_aarch64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.11.____cpython.yaml b/.ci_support/linux_aarch64_cuda_compiler_versionNonepython3.11.____cpython.yaml similarity index 95% rename from .ci_support/linux_aarch64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.11.____cpython.yaml rename to .ci_support/linux_aarch64_cuda_compiler_versionNonepython3.11.____cpython.yaml index 263f911..c0aee0d 100644 --- a/.ci_support/linux_aarch64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.11.____cpython.yaml +++ b/.ci_support/linux_aarch64_cuda_compiler_versionNonepython3.11.____cpython.yaml @@ -23,7 +23,7 @@ cxx_compiler_version: docker_image: - quay.io/condaforge/linux-anvil-x86_64:alma9 github_actions_labels: -- cirun-openstack-cpu-xlarge +- cirun-openstack-cpu-4xlarge pin_run_as_build: python: min_pin: x.x diff --git a/.ci_support/linux_aarch64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.12.____cpython.yaml b/.ci_support/linux_aarch64_cuda_compiler_versionNonepython3.12.____cpython.yaml similarity index 95% rename from .ci_support/linux_aarch64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.12.____cpython.yaml rename to .ci_support/linux_aarch64_cuda_compiler_versionNonepython3.12.____cpython.yaml index 7a36782..e0c1d91 100644 --- a/.ci_support/linux_aarch64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.12.____cpython.yaml +++ b/.ci_support/linux_aarch64_cuda_compiler_versionNonepython3.12.____cpython.yaml @@ -23,7 +23,7 @@ cxx_compiler_version: docker_image: - quay.io/condaforge/linux-anvil-x86_64:alma9 github_actions_labels: -- cirun-openstack-cpu-xlarge +- cirun-openstack-cpu-4xlarge pin_run_as_build: python: min_pin: x.x diff --git a/.ci_support/migrations/cuda129.yaml b/.ci_support/migrations/cuda129.yaml new file mode 100644 index 0000000..5074cd1 --- /dev/null +++ b/.ci_support/migrations/cuda129.yaml @@ -0,0 +1,57 @@ +migrator_ts: 1738229377 +__migrator: + kind: + version + migration_number: + 1 + build_number: + 1 + paused: false + override_cbc_keys: + - cuda_compiler_stub + check_solvable: false + primary_key: cuda_compiler_version + ordering: + cuda_compiler_version: + - 12.4 + - 12.6 + - 12.8 + - None + - 12.9 + # to allow manual opt-in for CUDA 11.8, see + # https://github.com/conda-forge/conda-forge-pinning-feedstock/pull/7472 + # must be last due to how cuda_compiler ordering in that migrator works + - 11.8 + commit_message: | + Upgrade to CUDA 12.9 + + CUDA 12.8 added support for architectures `sm_100`, `sm_101` and `sm_120`, + while CUDA 12.9 further added `sm_103` and `sm_121`. To build for these, + maintainers will need to modify their existing list of specified architectures + (e.g. `CMAKE_CUDA_ARCHITECTURES`, `TORCH_CUDA_ARCH_LIST`, etc.) + for their package. A good balance between broad support and storage + footprint (resp. compilation time) is to add `sm_100` and `sm_120`. + + Since CUDA 12.8, the conda-forge nvcc package now sets `CUDAARCHS` and + `TORCH_CUDA_ARCH_LIST` in its activation script to a string containing all + of the supported real architectures plus the virtual architecture of the + latest. Recipes for packages who use these variables to control their build + but do not want to build for all supported architectures will need to override + these variables in their build script. + + ref: https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html#new-features + +cuda_compiler_version: # [((linux and (x86_64 or aarch64)) or win64) and os.environ.get("CF_CUDA_ENABLED", "False") == "True"] + - 12.9 # [((linux and (x86_64 or aarch64)) or win64) and os.environ.get("CF_CUDA_ENABLED", "False") == "True"] + +cuda_compiler_version_min: # [((linux and (x86_64 or aarch64)) or win64) and os.environ.get("CF_CUDA_ENABLED", "False") == "True"] + - 12.9 # [((linux and (x86_64 or aarch64)) or win64) and os.environ.get("CF_CUDA_ENABLED", "False") == "True"] + +c_compiler_version: # [(linux and (x86_64 or aarch64)) and os.environ.get("CF_CUDA_ENABLED", "False") == "True"] + - 14 # [(linux and (x86_64 or aarch64)) and os.environ.get("CF_CUDA_ENABLED", "False") == "True"] + +cxx_compiler_version: # [(linux and (x86_64 or aarch64)) and os.environ.get("CF_CUDA_ENABLED", "False") == "True"] + - 14 # [(linux and (x86_64 or aarch64)) and os.environ.get("CF_CUDA_ENABLED", "False") == "True"] + +fortran_compiler_version: # [(linux and (x86_64 or aarch64)) and os.environ.get("CF_CUDA_ENABLED", "False") == "True"] + - 14 # [(linux and (x86_64 or aarch64)) and os.environ.get("CF_CUDA_ENABLED", "False") == "True"] diff --git a/.github/workflows/conda-build.yml b/.github/workflows/conda-build.yml index dd79ac4..02674a9 100644 --- a/.github/workflows/conda-build.yml +++ b/.github/workflows/conda-build.yml @@ -21,50 +21,50 @@ jobs: fail-fast: false matrix: include: - - CONFIG: linux_64_c_compiler_version13cuda_compiler_version12.6cxx_compiler_version13python3.10.____cpython + - CONFIG: linux_64_cuda_compiler_version12.9python3.10.____cpython UPLOAD_PACKAGES: True os: ubuntu - runs_on: ['cirun-openstack-cpu-xlarge--${{ github.run_id }}-linux_64_c_compiler_version13cuda_compil_h351c87d5', 'linux', 'x64', 'self-hosted'] + runs_on: ['cirun-openstack-cpu-4xlarge--${{ github.run_id }}-linux_64_cuda_compiler_version12.9python_h6e6b5039', 'linux', 'x64', 'self-hosted'] DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9 - - CONFIG: linux_64_c_compiler_version13cuda_compiler_version12.6cxx_compiler_version13python3.11.____cpython + - CONFIG: linux_64_cuda_compiler_version12.9python3.11.____cpython UPLOAD_PACKAGES: True os: ubuntu - runs_on: ['cirun-openstack-cpu-xlarge--${{ github.run_id }}-linux_64_c_compiler_version13cuda_compil_ha71f7a93', 'linux', 'x64', 'self-hosted'] + runs_on: ['cirun-openstack-cpu-4xlarge--${{ github.run_id }}-linux_64_cuda_compiler_version12.9python_h9224ed27', 'linux', 'x64', 'self-hosted'] DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9 - - CONFIG: linux_64_c_compiler_version13cuda_compiler_version12.6cxx_compiler_version13python3.12.____cpython + - CONFIG: linux_64_cuda_compiler_version12.9python3.12.____cpython UPLOAD_PACKAGES: True os: ubuntu - runs_on: ['cirun-openstack-cpu-xlarge--${{ github.run_id }}-linux_64_c_compiler_version13cuda_compil_h5eb56615', 'linux', 'x64', 'self-hosted'] + runs_on: ['cirun-openstack-cpu-4xlarge--${{ github.run_id }}-linux_64_cuda_compiler_version12.9python_h33f95ef4', 'linux', 'x64', 'self-hosted'] DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9 - - CONFIG: linux_64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.10.____cpython + - CONFIG: linux_64_cuda_compiler_versionNonepython3.10.____cpython UPLOAD_PACKAGES: True os: ubuntu - runs_on: ['cirun-openstack-cpu-xlarge--${{ github.run_id }}-linux_64_c_compiler_version14cuda_compil_h85f9ff1d', 'linux', 'x64', 'self-hosted'] + runs_on: ['cirun-openstack-cpu-4xlarge--${{ github.run_id }}-linux_64_cuda_compiler_versionNonepython_hb6a3b480', 'linux', 'x64', 'self-hosted'] DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9 - - CONFIG: linux_64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.11.____cpython + - CONFIG: linux_64_cuda_compiler_versionNonepython3.11.____cpython UPLOAD_PACKAGES: True os: ubuntu - runs_on: ['cirun-openstack-cpu-xlarge--${{ github.run_id }}-linux_64_c_compiler_version14cuda_compil_h5acdecea', 'linux', 'x64', 'self-hosted'] + runs_on: ['cirun-openstack-cpu-4xlarge--${{ github.run_id }}-linux_64_cuda_compiler_versionNonepython_h3c1a96fc', 'linux', 'x64', 'self-hosted'] DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9 - - CONFIG: linux_64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.12.____cpython + - CONFIG: linux_64_cuda_compiler_versionNonepython3.12.____cpython UPLOAD_PACKAGES: True os: ubuntu - runs_on: ['cirun-openstack-cpu-xlarge--${{ github.run_id }}-linux_64_c_compiler_version14cuda_compil_h678875e6', 'linux', 'x64', 'self-hosted'] + runs_on: ['cirun-openstack-cpu-4xlarge--${{ github.run_id }}-linux_64_cuda_compiler_versionNonepython_h7883cd14', 'linux', 'x64', 'self-hosted'] DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9 - - CONFIG: linux_aarch64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.10.____cpython + - CONFIG: linux_aarch64_cuda_compiler_versionNonepython3.10.____cpython UPLOAD_PACKAGES: True os: ubuntu - runs_on: ['cirun-openstack-cpu-xlarge--${{ github.run_id }}-linux_aarch64_c_compiler_version14cuda_c_h519023ab', 'linux', 'x64', 'self-hosted'] + runs_on: ['cirun-openstack-cpu-4xlarge--${{ github.run_id }}-linux_aarch64_cuda_compiler_versionNonep_h9166da89', 'linux', 'x64', 'self-hosted'] DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9 - - CONFIG: linux_aarch64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.11.____cpython + - CONFIG: linux_aarch64_cuda_compiler_versionNonepython3.11.____cpython UPLOAD_PACKAGES: True os: ubuntu - runs_on: ['cirun-openstack-cpu-xlarge--${{ github.run_id }}-linux_aarch64_c_compiler_version14cuda_c_h0bce4f42', 'linux', 'x64', 'self-hosted'] + runs_on: ['cirun-openstack-cpu-4xlarge--${{ github.run_id }}-linux_aarch64_cuda_compiler_versionNonep_h61f3f778', 'linux', 'x64', 'self-hosted'] DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9 - - CONFIG: linux_aarch64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.12.____cpython + - CONFIG: linux_aarch64_cuda_compiler_versionNonepython3.12.____cpython UPLOAD_PACKAGES: True os: ubuntu - runs_on: ['cirun-openstack-cpu-xlarge--${{ github.run_id }}-linux_aarch64_c_compiler_version14cuda_c_hc176500d', 'linux', 'x64', 'self-hosted'] + runs_on: ['cirun-openstack-cpu-4xlarge--${{ github.run_id }}-linux_aarch64_cuda_compiler_versionNonep_h72ca1b73', 'linux', 'x64', 'self-hosted'] DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9 steps: diff --git a/README.md b/README.md index 4af119e..f9be0ea 100644 --- a/README.md +++ b/README.md @@ -31,66 +31,66 @@ Current build status - + - + - + - + - + - + - + - + - + diff --git a/recipe/conda_build_config.yaml b/recipe/conda_build_config.yaml index 02b6f4c..21d8462 100644 --- a/recipe/conda_build_config.yaml +++ b/recipe/conda_build_config.yaml @@ -1,3 +1,3 @@ # https://github.com/conda-forge/.cirun github_actions_labels: # [linux] -- cirun-openstack-cpu-xlarge # [linux] +- cirun-openstack-cpu-4xlarge # [linux] diff --git a/recipe/recipe.yaml b/recipe/recipe.yaml index b5d53e0..99b5002 100644 --- a/recipe/recipe.yaml +++ b/recipe/recipe.yaml @@ -12,29 +12,29 @@ package: version: ${{ version }} source: -- url: https://pypi.org/packages/source/v/vllm/vllm-${{ version }}.tar.gz - sha256: 6b0d855ea8ba18d76364c9b82ea94bfcaa9c9e724055438b5733e4716ed104e1 - patches: - - patches/0001-Search-for-the-CUDA-package-in-CMakeLists.patch - - patches/0002-Remove-ninja-pip-requirement.patch - - if: linux - then: - - patches/0003-Manually-define-gettid.patch - - if: is_cross_compiling - then: - - patches/0004-Factor-in-the-cmake-args-when-building-e.g.-for-cros.patch - - if: aarch64 - then: - - patches/0005-Configure-build-to-target-aarch64-even-though-CMake-.patch - - patches/0006-Use-PyTorch-2.7.0-to-keep-version-number-consistent-.patch - target_directory: vllm -# Needs to be vendored because vLLM uses a modified version of the flash attention primitives that supports KV-caching. -- url: https://github.com/vllm-project/flash-attention/archive/1c2624e53c078854e0637ee566c72fe2107e75f4.tar.gz - sha256: cca19d7e53af08aa6d6f0c4fd9dd78d30314497e38fb03b1368b3d5a77ab4b5c - target_directory: flash-attention + - url: https://pypi.org/packages/source/v/vllm/vllm-${{ version }}.tar.gz + sha256: 6b0d855ea8ba18d76364c9b82ea94bfcaa9c9e724055438b5733e4716ed104e1 + patches: + - patches/0001-Search-for-the-CUDA-package-in-CMakeLists.patch + - patches/0002-Remove-ninja-pip-requirement.patch + - if: linux + then: + - patches/0003-Manually-define-gettid.patch + - if: is_cross_compiling + then: + - patches/0004-Factor-in-the-cmake-args-when-building-e.g.-for-cros.patch + - if: aarch64 + then: + - patches/0005-Configure-build-to-target-aarch64-even-though-CMake-.patch + - patches/0006-Use-PyTorch-2.7.0-to-keep-version-number-consistent-.patch + target_directory: vllm + # Needs to be vendored because vLLM uses a modified version of the flash attention primitives that supports KV-caching. + - url: https://github.com/vllm-project/flash-attention/archive/1c2624e53c078854e0637ee566c72fe2107e75f4.tar.gz + sha256: cca19d7e53af08aa6d6f0c4fd9dd78d30314497e38fb03b1368b3d5a77ab4b5c + target_directory: flash-attention build: - number: 0 + number: 1 string: ${{ string_prefix }}py${{ python | version_to_buildstring }}h${{ hash }}_${{ build_number }} script: - sed -i.bak 's/set(TORCH_SUPPORTED_VERSION_CUDA "2.4.0")/set(TORCH_SUPPORTED_VERSION_CUDA "${{ pytorch_version }}")/g' flash-attention/CMakeLists.txt @@ -45,177 +45,185 @@ build: - ln -s $PREFIX/include $SRC_DIR/vllm/third_party/NVTX/c/include - export VERBOSE=1 - export VLLM_TARGET_DEVICE=${{ vllm_target_device }} + - if: use_cuda + then: + - export TORCH_NVCC_FLAGS="-Xfatbin -compress-all" + # Building vLLM is memory-intensive: see https://github.com/Dao-AILab/flash-attention/issues/1043#issuecomment-2770635000 + - export MAX_JOBS=3 + # Override the CUDA architectures configured in the conda-forge nvcc package: https://github.com/conda-forge/cuda-nvcc-feedstock/blob/7843e9f1b9ea6bc555cd70c247d774189fc34110/recipe/conda_build_config.yaml#L21-L28 + - export CUDAARCHS="50-real;60-real;70-real;75-real;80-real;86-real;89-real;90a-real;100f-real;120a-real" + - export TORCH_CUDA_ARCH_LIST="5.0;6.0;7.0;7.5;8.0;8.6;8.9;9.0;10.0;12.0+PTX" # CMake is unable to automatically locate the Python include dir for aarch64 for some reason - if: aarch64 then: - - export CMAKE_ARGS="$CMAKE_ARGS -DPython_INCLUDE_DIR="$(python -c 'import sysconfig; print(sysconfig.get_path("include"))')"" + - export CMAKE_ARGS="$CMAKE_ARGS -DPython_INCLUDE_DIR="$(python -c 'import sysconfig; print(sysconfig.get_path("include"))')"" - ${{ PYTHON }} -m pip install . -vv --no-build-isolation --no-deps python: entry_points: - - vllm = vllm.entrypoints.cli.main:main + - vllm = vllm.entrypoints.cli.main:main skip: - win - osx and x86_64 - # conda-forge torchaudio dropped support for Python 3.9 (llvmlite fix only available for Python >=3.10) - # Also, we don't have Python 3.13 support until https://github.com/vllm-project/vllm/commit/21dce80ea96bcf033d159c0f952fb274567b315c is released - - match(python, "<3.10") or match(python, ">=3.13") + # We don't have Python 3.13 support until https://github.com/vllm-project/vllm/commit/21dce80ea96bcf033d159c0f952fb274567b315c is released + - match(python, ">=3.13") - aarch64 and use_cuda # Still have issues locating CUDA for the aarch64 build - # - match(python, "!=3.12") # Until the build works + # - not use_cuda # Just build CUDA for now + # - match(python, "!=3.10") # Until all the builds succeed requirements: build: - - cmake >=3.26.1 - - git - - ninja - - zlib - - ${{ stdlib('c') }} - - ${{ compiler('c') }} - - ${{ compiler('cxx') }} - - if: use_cuda - then: - - ${{ compiler('cuda') }} - - if: is_cross_compiling - then: + - cmake >=3.26.1 + - git + - ninja + - zlib + - ${{ stdlib('c') }} + - ${{ compiler('c') }} + - ${{ compiler('cxx') }} + - if: use_cuda + then: + - ${{ compiler('cuda') }} + - if: is_cross_compiling + then: + - python + - cross-python_${{ target_platform }} + - pytorch ==${{ pytorch_version }} + - if: use_cuda + then: + - pytorch * [build=cuda*] + host: - python - - cross-python_${{ target_platform }} + - jinja2 >=3.1.6 + - packaging >=24.2 + - pip - pytorch ==${{ pytorch_version }} + - regex + - setuptools >=77.0.3,<80.0.0 + - setuptools-scm >=8 + - wheel + - if: linux + then: + - libnuma - if: use_cuda then: - - pytorch * [build=cuda*] - host: - - python - - jinja2 >=3.1.6 - - packaging >=24.2 - - pip - - pytorch ==${{ pytorch_version }} - - regex - - setuptools >=77.0.3,<80.0.0 - - setuptools-scm >=8 - - wheel - - if: linux - then: - - libnuma - - if: use_cuda - then: - - pytorch * [build=cuda*] - - cuda - - cuda-cudart-dev - - cuda-nvrtc-dev - - cuda-nvrtc-static - - cuda-version ==${{ cuda_compiler_version }} - - cutlass <4 # Cutlass 4 introduces some major changes to the API that causes it to not compile - - libcublas-dev - - nvtx-c + - pytorch * [build=cuda*] + - cuda + - cuda-cudart-dev + - cuda-nvrtc-dev + - cuda-nvrtc-static + - cuda-version ==${{ cuda_compiler_version }} + - cutlass <4 # Cutlass 4 introduces some major changes to the API that causes it to not compile + - libcublas-dev + - nvtx-c run: - - python - - aiohttp - - blake3 - - cachetools - - cloudpickle - - compressed-tensors ==0.10.2 - - depyf ==0.18.0 - - einops - - fastapi >=0.115.0 - - filelock >=3.16.1 - - gguf >=0.13.0 - - importlib-metadata - - hf-xet >=1.1.2,<2.0.0 - - huggingface_hub >=0.33.0 - - lark ==1.2.2 - - lm-format-enforcer >=0.10.11,<0.11 - - mistral-common >=1.6.2 - - msgspec - - numba ==0.61.2 - - numpy - - openai >=1.52.0,<=1.90.0 - - opencv >=4.11.0 - - outlines ==0.1.11 - - partial-json-parser - - pillow - - prometheus_client >=0.18.0 - - prometheus-fastapi-instrumentator >=7.0.0 - - protobuf - - psutil - - py-cpuinfo - - pybase64 - - pydantic >=2.10 - - python-json-logger - - pytorch ==${{ pytorch_version }} - - pyyaml - - pyzmq >=25.0.0 - - regex - - requests >=2.26.0 - - scipy - - sentencepiece - - tiktoken >=0.6.0 - - tokenizers >=0.21.1 - - tqdm - # Newer versions of transformers already define the aimv2 config, so we can't use it for now - # See https://github.com/vllm-project/vllm-ascend/issues/2046#issuecomment-3123639101 for more details. - # The required fix: https://github.com/vllm-project/vllm/commit/3fc964433a84bad785d9d0656fd56195462321b8 - - transformers >=4.51.1,<4.54.0 - - typing_extensions >=4.10 - - uvicorn-standard - - watchfiles - - if: x86_64 or arm64 or aarch64 - then: - - llguidance >=0.7.11,< 0.8.0 - - xgrammar ==0.1.19 - - if: match(python, ">3.11") - then: - - six >=1.16.0 - - setuptools >=77.0.3,<80 - - if: use_cuda - then: - - ray-cgraph >=2.43.0,!=2.44 - - torchaudio ==${{ pytorch_version }} - - torchvision ==0.22.0 - - if: linux64 + - python + - aiohttp + - blake3 + - cachetools + - cloudpickle + - compressed-tensors ==0.10.2 + - depyf ==0.18.0 + - einops + - fastapi >=0.115.0 + - filelock >=3.16.1 + - gguf >=0.13.0 + - importlib-metadata + - hf-xet >=1.1.2,<2.0.0 + - huggingface_hub >=0.33.0 + - lark ==1.2.2 + - lm-format-enforcer >=0.10.11,<0.11 + - mistral-common >=1.6.2 + - msgspec + - numba ==0.61.2 + - numpy + - openai >=1.52.0,<=1.90.0 + - opencv >=4.11.0 + - outlines ==0.1.11 + - partial-json-parser + - pillow + - prometheus_client >=0.18.0 + - prometheus-fastapi-instrumentator >=7.0.0 + - protobuf + - psutil + - py-cpuinfo + - pybase64 + - pydantic >=2.10 + - python-json-logger + - pytorch ==${{ pytorch_version }} + - pyyaml + - pyzmq >=25.0.0 + - regex + - requests >=2.26.0 + - scipy + - sentencepiece + - tiktoken >=0.6.0 + - tokenizers >=0.21.1 + - tqdm + # Newer versions of transformers already define the aimv2 config, so we can't use it for now + # See https://github.com/vllm-project/vllm-ascend/issues/2046#issuecomment-3123639101 for more details. + # The required fix: https://github.com/vllm-project/vllm/commit/3fc964433a84bad785d9d0656fd56195462321b8 + - transformers >=4.51.1,<4.54.0 + - typing_extensions >=4.10 + - uvicorn-standard + - watchfiles + - if: x86_64 or arm64 or aarch64 then: - - xformers ==0.0.30 # platform_system == "Linux" and platform_machine == "x86_64" - else: - - torchaudio - - torchvision - - if: x86_64 + - llguidance >=0.7.11,< 0.8.0 + - xgrammar ==0.1.19 + - if: match(python, ">3.11") then: - - triton ==3.2.0 + - six >=1.16.0 + - setuptools >=77.0.3,<80 + - if: use_cuda + then: + - ray-cgraph >=2.43.0,!=2.44 + - torchaudio ==${{ pytorch_version }} + - torchvision ==0.22.0 + - if: linux64 + then: + - xformers ==0.0.30 # platform_system == "Linux" and platform_machine == "x86_64" + else: + - torchaudio + - torchvision + - if: x86_64 + then: + - triton ==3.2.0 run_constraints: - # Fixes issue with incompatibility between old `datasets` versions and `pyarrow` v21+ - # See https://github.com/apache/arrow/issues/47155 for more details. - # The required PR is: https://github.com/huggingface/datasets/pull/6404 - - datasets >=2.15 - - if: use_cuda - then: - - pytorch * [build=cuda*] + # Fixes issue with incompatibility between old `datasets` versions and `pyarrow` v21+ + # See https://github.com/apache/arrow/issues/47155 for more details. + # The required PR is: https://github.com/huggingface/datasets/pull/6404 + - datasets >=2.15 + - if: use_cuda + then: + - pytorch * [build=cuda*] ignore_run_exports: from_package: - - cuda-nvrtc-dev - - libcublas-dev + - cuda-nvrtc-dev + - libcublas-dev tests: -- python: - imports: - - vllm - - if: linux and use_cuda + - python: + imports: + - vllm + - if: linux and use_cuda + then: + - vllm.vllm_flash_attn + pip_check: false + - script: + # As of vllm v0.9 and later, it seems like libcuda.so.1 is required for the CLI for CUDA builds (stub libraries don't work) + # We can't test this on the CPU runners, which is what we're using to build the wheel + - if: not use_cuda then: - - vllm.vllm_flash_attn - pip_check: false -- script: - # As of vllm v0.9 and later, it seems like libcuda.so.1 is required for the CLI for CUDA builds (stub libraries don't work) - # We can't test this on the CPU runners, which is what we're using to build the wheel - - if: not use_cuda - then: - - vllm --version -- script: - # Pick an arbitrary test to run: some of the other ones rely on a bunch of external packages - - pytest ./vllm/tests/core/test_scheduler.py - requirements: - run: - - pytest - files: - source: - - vllm/tests + - vllm --version + - script: + # Pick an arbitrary test to run: some of the other ones rely on a bunch of external packages + - pytest ./vllm/tests/core/test_scheduler.py + requirements: + run: + - pytest + files: + source: + - vllm/tests about: homepage: https://github.com/vllm-project/vllm @@ -223,9 +231,9 @@ about: description: Easy, fast, and cheap LLM serving for everyone license: Apache-2.0 AND BSD-3-Clause license_file: - - vllm/LICENSE - - flash-attention/LICENSE - - LICENSE_CUTLASS.txt + - vllm/LICENSE + - flash-attention/LICENSE + - LICENSE_CUTLASS.txt documentation: https://vllm.readthedocs.io/en/latest/ extra:
VariantStatus
linux_64_c_compiler_version13cuda_compiler_version12.6cxx_compiler_version13python3.10.____cpythonlinux_64_cuda_compiler_version12.9python3.10.____cpython - variant + variant
linux_64_c_compiler_version13cuda_compiler_version12.6cxx_compiler_version13python3.11.____cpythonlinux_64_cuda_compiler_version12.9python3.11.____cpython - variant + variant
linux_64_c_compiler_version13cuda_compiler_version12.6cxx_compiler_version13python3.12.____cpythonlinux_64_cuda_compiler_version12.9python3.12.____cpython - variant + variant
linux_64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.10.____cpythonlinux_64_cuda_compiler_versionNonepython3.10.____cpython - variant + variant
linux_64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.11.____cpythonlinux_64_cuda_compiler_versionNonepython3.11.____cpython - variant + variant
linux_64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.12.____cpythonlinux_64_cuda_compiler_versionNonepython3.12.____cpython - variant + variant
linux_aarch64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.10.____cpythonlinux_aarch64_cuda_compiler_versionNonepython3.10.____cpython - variant + variant
linux_aarch64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.11.____cpythonlinux_aarch64_cuda_compiler_versionNonepython3.11.____cpython - variant + variant
linux_aarch64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14python3.12.____cpythonlinux_aarch64_cuda_compiler_versionNonepython3.12.____cpython - variant + variant