From 368bab991d4f6f3f442497c1c34b32d1da4d6cfd Mon Sep 17 00:00:00 2001 From: Daniel Fahey Date: Tue, 7 Oct 2025 16:17:45 +0100 Subject: [PATCH 1/2] python3Packages.vllm: add update.py - Removes `passthru.skipBulkUpdate` since automated updates are now safe - Adds `passthru.updateScript` that coordinates updates of vLLM and its external dependencies (CUTLASS, FlashMLA, flash-attention). The script parses upstream CMake configurations to extract git revisions, computes nix hashes, and updates the derivation file - Adds Pythonic script with the `sh` library to invoke external tools (`sd`, `nix-prefetch-github`, `nix`, `update-source-version`) while handling HTTP and JSON natively. Regex patterns are pre-computed in configuration for maintainability - Script structure is designed for reusability: package-specific configuration is isolated in top-level constants, making it straightforward to adapt for other packages with similar bespoke update requirements --- .../python-modules/vllm/default.nix | 3 +- .../development/python-modules/vllm/update.py | 119 ++++++++++++++++++ 2 files changed, 120 insertions(+), 2 deletions(-) create mode 100755 pkgs/development/python-modules/vllm/update.py diff --git a/pkgs/development/python-modules/vllm/default.nix b/pkgs/development/python-modules/vllm/default.nix index b8b2e7f59d3b4..8e5361fc60db6 100644 --- a/pkgs/development/python-modules/vllm/default.nix +++ b/pkgs/development/python-modules/vllm/default.nix @@ -500,8 +500,7 @@ buildPythonPackage rec { passthru = { # make internal dependency available to overlays vllm-flash-attn = vllm-flash-attn'; - # updates the cutlass fetcher instead - skipBulkUpdate = true; + updateScript = ./update.py; }; meta = { diff --git a/pkgs/development/python-modules/vllm/update.py b/pkgs/development/python-modules/vllm/update.py new file mode 100755 index 0000000000000..4a428a1cc850a --- /dev/null +++ b/pkgs/development/python-modules/vllm/update.py @@ -0,0 +1,119 @@ +#!/usr/bin/env nix-shell +#! nix-shell -i python3 -p python3 python3Packages.sh sd nix-prefetch-github common-updater-scripts + +""" +Updates the main vLLM package and three external dependencies. +""" + +import argparse +import json +import os +import re +from pathlib import Path +from urllib.request import Request, urlopen +import sh + + +API_BASE = 'https://api.github.com/repos' +RAW_BASE = 'https://raw.githubusercontent.com' + +NIX_FILE = str(Path(__file__).resolve().parent / 'default.nix') +GITHUB_TOKEN = os.environ.get('GITHUB_TOKEN', '') +HEADERS = {'Accept': 'application/vnd.github.v3+json'} | ( + {'Authorization': f'bearer {GITHUB_TOKEN}'} if GITHUB_TOKEN else {} +) + +PKG_NAME = 'python3Packages.vllm' +PKG_REPO = 'vllm-project/vllm' +TAG_PATTERN = r'(repo = "vllm";\s+)tag = "v\$\{version\}";' +REV_PATTERN = r'(repo = "vllm";\s+)rev = "[a-f0-9]{40}";' + +DEPENDENCIES = { + 'NVIDIA/cutlass': { + 'upstream_file': 'CMakeLists.txt', + 'upstream_pattern': r'CUTLASS_REVISION "([^"]+)"', + 'update_pattern': r'(cutlass = fetchFromGitHub.*?tag = ")[^"]+(";.*?hash = ")[^"]+(";)', + }, + 'vllm-project/FlashMLA': { + 'upstream_file': 'cmake/external_projects/flashmla.cmake', + 'upstream_pattern': r'GIT_TAG ([a-f0-9]+)', + 'update_pattern': r'(flashmla = stdenv\.mkDerivation.*?rev = ")[^"]+(";.*?hash = ")[^"]+(";)', + 'version_config': { + 'source_file': 'setup.py', + 'version_pattern': r'"([0-9]+\.[0-9]+\.[0-9]+)"', + 'update_pattern': r'(flashmla = stdenv\.mkDerivation.*?version = ")[^"]+(";)', + }, + }, + 'vllm-project/flash-attention': { + 'upstream_file': 'cmake/external_projects/vllm_flash_attn.cmake', + 'upstream_pattern': r'GIT_TAG ([a-f0-9]+)', + 'update_pattern': r"(vllm-flash-attn' = lib\.defaultTo.*?rev = \")[^\"]+(\";.*?hash = \")[^\"]+(\";)", + 'version_config': { + 'source_file': 'vllm_flash_attn/__init__.py', + 'version_pattern': r'__version__\s*=\s*"([^"]+)"', + 'update_pattern': r"(vllm-flash-attn' = lib\.defaultTo.*?version = \")[^\"]+(\";)", + }, + }, +} + + +def fetch_json(url: str) -> dict: + with urlopen(Request(url, headers=HEADERS)) as r: + return json.loads(r.read()) + +def fetch_text(url: str) -> str: + with urlopen(Request(url, headers=HEADERS)) as r: + return r.read().decode('utf-8') + + +def update_git_dep(github_repo: str, config: dict, pkg_ref: str) -> None: + upstream_url = f'{RAW_BASE}/{PKG_REPO}/{pkg_ref}/{config["upstream_file"]}' + new_revision = re.search(config['upstream_pattern'], fetch_text(upstream_url)).group(1) + sri_hash = json.loads(sh.nix_prefetch_github(*github_repo.split('/'), '--rev', new_revision).strip())['hash'] + sh.sd('--flags', 'ms', config['update_pattern'], rf'${{1}}{new_revision}${{2}}{sri_hash}${{3}}', NIX_FILE) + if version_config := config.get('version_config'): + source_url = f'{RAW_BASE}/{github_repo}/{new_revision}/{version_config["source_file"]}' + version = re.search(version_config['version_pattern'], fetch_text(source_url)).group(1) + sh.sd('--flags', 'ms', version_config['update_pattern'], rf'${{1}}{version}${{2}}', NIX_FILE) + + +def update_primary_package(mode: str) -> str: + release_data = fetch_json(f'{API_BASE}/{PKG_REPO}/releases/latest') + main_data = fetch_json(f'{API_BASE}/{PKG_REPO}/commits/main') + rc_data = next(tag for tag in fetch_json(f'{API_BASE}/{PKG_REPO}/tags') if 'rc' in tag['name']) + rc_version = rc_data['name'].lstrip('v') + dev_commit_count = fetch_json(f'{API_BASE}/{PKG_REPO}/compare/{rc_data['name']}...{main_data['sha']}')['ahead_by'] + stable_version = release_data['tag_name'].lstrip('v') + + match mode: + case "dev": + version, ref = f"{rc_version}.dev{dev_commit_count}", 'main' + sh.sd('--flags', 'ms', TAG_PATTERN, rf'${{1}}rev = "{main_data["sha"]}";', NIX_FILE) + case "rc": + version, ref = rc_version, rc_data['name'] + case _: + version, ref = stable_version, release_data['tag_name'] + + if mode != "dev": + sh.sd('--flags', 'ms', REV_PATTERN, r'${1}tag = "v$${version}";', NIX_FILE) + + sh.update_source_version(PKG_NAME, version, '--ignore-same-version') + return ref + + +def main(): + parser = argparse.ArgumentParser(description='Update vLLM package and dependencies') + group = parser.add_mutually_exclusive_group() + group.add_argument('--dev', action='store_true', help='Update to main branch dev version') + group.add_argument('--rc', action='store_true', help='Update to latest release candidate') + args = parser.parse_args() + if not GITHUB_TOKEN: + print("Warning: No GITHUB_TOKEN set - may hit GitHub API rate limits") + mode = "dev" if args.dev else "rc" if args.rc else "stable" + pkg_ref = update_primary_package(mode) + for repo, config in DEPENDENCIES.items(): + update_git_dep(repo, config, pkg_ref) + + +if __name__ == '__main__': + main() From 4d45e9d901b15422d43c7b1181e5a1ac7e1e0904 Mon Sep 17 00:00:00 2001 From: Daniel Fahey Date: Thu, 20 Nov 2025 10:11:58 +0000 Subject: [PATCH 2/2] python3Packages.vllm: 0.11.0 -> 0.11.2 Diffs: https://github.com/vllm-project/vllm/compare/v0.11.0...v0.11.1 https://github.com/vllm-project/vllm/compare/v0.11.1...v0.11.2 Changelogs: https://github.com/vllm-project/vllm/releases/tag/v0.11.1 https://github.com/vllm-project/vllm/releases/tag/v0.11.2 --- ...up.py-nix-support-respect-cmakeFlags.patch | 40 ++++------- .../vllm/0003-propagate-pythonpath.patch | 13 ++-- .../vllm/0005-drop-intel-reqs.patch | 4 +- .../python-modules/vllm/default.nix | 70 ++++++++++++------- .../development/python-modules/vllm/update.py | 12 +++- 5 files changed, 79 insertions(+), 60 deletions(-) diff --git a/pkgs/development/python-modules/vllm/0002-setup.py-nix-support-respect-cmakeFlags.patch b/pkgs/development/python-modules/vllm/0002-setup.py-nix-support-respect-cmakeFlags.patch index e7a4bcd457617..6012a5aa03b70 100644 --- a/pkgs/development/python-modules/vllm/0002-setup.py-nix-support-respect-cmakeFlags.patch +++ b/pkgs/development/python-modules/vllm/0002-setup.py-nix-support-respect-cmakeFlags.patch @@ -1,40 +1,26 @@ -From 10b7e8330bdba319a4162cceb8e5dd4280215b04 Mon Sep 17 00:00:00 2001 -From: SomeoneSerge -Date: Wed, 31 Jul 2024 12:06:15 +0000 -Subject: [PATCH 2/2] setup.py: nix-support (respect cmakeFlags) - ---- - setup.py | 10 ++++++++++ - 1 file changed, 10 insertions(+) - diff --git a/setup.py b/setup.py -index 01e006f9..14762146 100644 +index 990fe4cde3ca7c605b40ff2a9078feed2dedde48..91ce3bba0b2dcddd540aa76ebf2cb4a83c200eb6 100644 --- a/setup.py +++ b/setup.py -@@ -15,6 +15,15 @@ from setuptools import Extension, find_packages, setup - from setuptools.command.build_ext import build_ext - from torch.utils.cpp_extension import CUDA_HOME +@@ -20,6 +20,12 @@ from setuptools.command.build_ext import build_ext + from setuptools_scm import get_version + from torch.utils.cpp_extension import CUDA_HOME, ROCM_HOME -+import os -+import json -+ +if "NIX_ATTRS_JSON_FILE" in os.environ: + with open(os.environ["NIX_ATTRS_JSON_FILE"], "r") as f: + NIX_ATTRS = json.load(f) +else: -+ NIX_ATTRS = { "cmakeFlags": os.environ.get("cmakeFlags", "").split() } ++ NIX_ATTRS = {"cmakeFlags": os.environ.get("cmakeFlags", "").split()} + def load_module_from_path(module_name, path): spec = importlib.util.spec_from_file_location(module_name, path) -@@ -159,6 +168,7 @@ class cmake_build_ext(build_ext): - '-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={}'.format(outdir), - '-DCMAKE_ARCHIVE_OUTPUT_DIRECTORY={}'.format(self.build_temp), - '-DVLLM_TARGET_DEVICE={}'.format(VLLM_TARGET_DEVICE), -+ *NIX_ATTRS["cmakeFlags"], - ] +@@ -213,6 +219,8 @@ class cmake_build_ext(build_ext): + if other_cmake_args: + cmake_args += other_cmake_args.split() - verbose = envs.VERBOSE --- -2.45.1 - ++ cmake_args += NIX_ATTRS["cmakeFlags"] ++ + subprocess.check_call( + ["cmake", ext.cmake_lists_dir, *build_tool, *cmake_args], + cwd=self.build_temp, diff --git a/pkgs/development/python-modules/vllm/0003-propagate-pythonpath.patch b/pkgs/development/python-modules/vllm/0003-propagate-pythonpath.patch index 59267a8f0548f..d929f44dd9779 100644 --- a/pkgs/development/python-modules/vllm/0003-propagate-pythonpath.patch +++ b/pkgs/development/python-modules/vllm/0003-propagate-pythonpath.patch @@ -1,12 +1,13 @@ diff --git a/vllm/model_executor/models/registry.py b/vllm/model_executor/models/registry.py -index 81623def..2a6e2c92 100644 +index 617854c8548fccec3b26b990e52c1ffb442e5156..3920b719b4585553d3633be02e5d9cd46f4e72e3 100644 --- a/vllm/model_executor/models/registry.py +++ b/vllm/model_executor/models/registry.py -@@ -521,6 +521,7 @@ def _run_in_subprocess(fn: Callable[[], _T]) -> _T: +@@ -1109,7 +1109,7 @@ def _run_in_subprocess(fn: Callable[[], _T]) -> _T: + # cannot use `sys.executable __file__` here because the script # contains relative imports - returned = subprocess.run(_SUBPROCESS_COMMAND, - input=input_bytes, -+ env={'PYTHONPATH': ':'.join(sys.path)}, - capture_output=True) + returned = subprocess.run( +- _SUBPROCESS_COMMAND, input=input_bytes, capture_output=True ++ _SUBPROCESS_COMMAND, input=input_bytes, capture_output=True, env={'PYTHONPATH': ':'.join(sys.path)} + ) # check if the subprocess is successful diff --git a/pkgs/development/python-modules/vllm/0005-drop-intel-reqs.patch b/pkgs/development/python-modules/vllm/0005-drop-intel-reqs.patch index 4314aff33fb9b..708a94bcddddc 100644 --- a/pkgs/development/python-modules/vllm/0005-drop-intel-reqs.patch +++ b/pkgs/development/python-modules/vllm/0005-drop-intel-reqs.patch @@ -1,8 +1,8 @@ diff --git a/requirements/cpu.txt b/requirements/cpu.txt -index 2db6d87ee..37f816170 100644 +index d53ab3649308ad6cf7d00496d391fdaada583b5d..03d3eb105afb659e80742ad426116b2d25ddbc02 100644 --- a/requirements/cpu.txt +++ b/requirements/cpu.txt -@@ -21,9 +21,6 @@ torchvision; platform_machine != "ppc64le" and platform_machine != "s390x" +@@ -20,9 +20,6 @@ torchvision; platform_machine != "ppc64le" and platform_machine != "s390x" torchvision==0.23.0; platform_machine == "ppc64le" datasets # for benchmark scripts diff --git a/pkgs/development/python-modules/vllm/default.nix b/pkgs/development/python-modules/vllm/default.nix index 8e5361fc60db6..6b354f7593fc4 100644 --- a/pkgs/development/python-modules/vllm/default.nix +++ b/pkgs/development/python-modules/vllm/default.nix @@ -72,6 +72,7 @@ py-libnuma, setproctitle, openai-harmony, + anthropic, # internal dependency - for overriding in overlays vllm-flash-attn ? null, @@ -100,8 +101,8 @@ let cutlass = fetchFromGitHub { owner = "NVIDIA"; repo = "cutlass"; - tag = "v4.0.0"; - hash = "sha256-HJY+Go1viPkSVZPEs/NyMtYJzas4mMLiIZF3kNX+WgA="; + tag = "v4.2.1"; + hash = "sha256-iP560D5Vwuj6wX1otJhwbvqe/X4mYVeKTpK533Wr5gY="; }; # FlashMLA's Blackwell (SM100) kernels were developed against CUTLASS v3.9.0 @@ -128,8 +129,8 @@ let src = fetchFromGitHub { owner = "vllm-project"; repo = "FlashMLA"; - rev = "5f65b85703c7ed75fda01e06495077caad207c3f"; - hash = "sha256-DO9EFNSoAgyfRRc095v1UjT+Zdzk4cFY0+n28FVEwI0="; + rev = "46d64a8ebef03fa50b4ae74937276a5c940e3f95"; + hash = "sha256-jtMzWB5hKz8mJGsdK6q4YpQbGp9IrQxbwmB3a64DIl0="; }; dontConfigure = true; @@ -145,6 +146,33 @@ let ''; }; + qutlass = stdenv.mkDerivation { + pname = "qutlass"; + # https://github.com/IST-DASLab/qutlass/blob/${src.rev}/setup.py + version = "0.1.0"; + + # grep for GIT_TAG in the following file + # https://github.com/vllm-project/vllm/blob/v${version}/cmake/external_projects/qutlass.cmake + src = fetchFromGitHub { + owner = "iST-DASLab"; + repo = "qutlass"; + rev = "830d2c4537c7396e14a02a46fbddd18b5d107c65"; + hash = "sha256-aG4qd0vlwP+8gudfvHwhtXCFmBOJKQQTvcwahpEqC84="; + }; + + dontConfigure = true; + + # qutlass normally relies on `git submodule update` to fetch cutlass + buildPhase = '' + rm -rf third_party/cutlass + ln -sf ${cutlass} third_party/cutlass + ''; + + installPhase = '' + cp -rva . $out + ''; + }; + vllm-flash-attn' = lib.defaultTo (stdenv.mkDerivation { pname = "vllm-flash-attn"; # https://github.com/vllm-project/flash-attention/blob/${src.rev}/vllm_flash_attn/__init__.py @@ -155,8 +183,8 @@ let src = fetchFromGitHub { owner = "vllm-project"; repo = "flash-attention"; - rev = "ee4d25bd84e0cbc7e0b9b9685085fd5db2dcb62a"; - hash = "sha256-2r0Habd/kBpvM4/aQFIYyj+uQAa3M9gjk3DcBZHFNfA="; + rev = "58e0626a692f09241182582659e3bf8f16472659"; + hash = "sha256-ewdZd7LuBKBV0y3AaGRWISJzjg6cu59D2OtgqoDjrbM="; }; patches = [ @@ -192,7 +220,7 @@ let cpuSupport = !cudaSupport && !rocmSupport; - # https://github.com/pytorch/pytorch/blob/v2.8.0/torch/utils/cpp_extension.py#L2411-L2414 + # https://github.com/pytorch/pytorch/blob/v2.9.1/torch/utils/cpp_extension.py#L2407-L2410 supportedTorchCudaCapabilities = let real = [ @@ -284,7 +312,7 @@ in buildPythonPackage rec { pname = "vllm"; - version = "0.11.0"; + version = "0.11.2"; pyproject = true; stdenv = torch.stdenv; @@ -293,24 +321,13 @@ buildPythonPackage rec { owner = "vllm-project"; repo = "vllm"; tag = "v${version}"; - hash = "sha256-47TPvvPQvVbh6Gm2yvi+xhWZ8tSma91rp9hp/SBrEY8="; + hash = "sha256-DoSlkFmR3KKEtfSfdRB++0CZeeXgxmM3zZjONlxbe8U="; }; patches = [ ./0002-setup.py-nix-support-respect-cmakeFlags.patch ./0003-propagate-pythonpath.patch ./0005-drop-intel-reqs.patch - # TODO: Remove the below patches when included in vLLM release - (fetchpatch { - url = "https://github.com/vllm-project/vllm/commit/9705fba7b727a3b9c275b012258608531e2223d1.patch"; - hash = "sha256-DxRGLiwkegMlMjqFmFc0igpaVv06/Y2WjL+ISoIOET4="; - }) - # patch above is previous commit needed to apply patch below - # oneDNN / CPU fix from https://github.com/vllm-project/vllm/pull/26401 - (fetchpatch { - url = "https://github.com/vllm-project/vllm/commit/d7be1f2a480bdc62a6a1ec0126a401e3d42985fe.patch"; - hash = "sha256-Zi1k5wiOPjsbWHFKpcLq9Ns43wIP37Mbvesi5K80zaQ="; - }) ]; postPatch = '' @@ -318,13 +335,16 @@ buildPythonPackage rec { rm vllm/third_party/pynvml.py substituteInPlace tests/utils.py \ --replace-fail "from vllm.third_party.pynvml import" "from pynvml import" - substituteInPlace vllm/utils/__init__.py \ + substituteInPlace vllm/utils/import_utils.py \ --replace-fail "import vllm.third_party.pynvml" "import pynvml" # pythonRelaxDeps does not cover build-system substituteInPlace pyproject.toml \ - --replace-fail "torch ==" "torch >=" \ - --replace-fail "setuptools>=77.0.3,<80.0.0" "setuptools" + --replace-fail "torch ==" "torch >=" + + p='setuptools>=[0-9.,<>]*' f='pyproject.toml' + grep --quiet "$p" "$f" || { echo "Error: pattern '$p' not found in $f" >&2; false; } + sed --in-place "0,/$p/s//setuptools/" "$f" # Ignore the python version check because it hard-codes minor versions and # lags behind `ray`'s python interpreter support @@ -440,6 +460,7 @@ buildPythonPackage rec { openai-harmony # vLLM needs Torch's compiler to be present in order to use torch.compile torch.stdenv.cc + anthropic ] ++ uvicorn.optional-dependencies.standard ++ aioprometheus.optional-dependencies.starlette @@ -459,6 +480,7 @@ buildPythonPackage rec { ++ lib.optionals cudaSupport [ (lib.cmakeFeature "FETCHCONTENT_SOURCE_DIR_CUTLASS" "${lib.getDev cutlass}") (lib.cmakeFeature "FLASH_MLA_SRC_DIR" "${lib.getDev flashmla}") + (lib.cmakeFeature "QUTLASS_SRC_DIR" "${lib.getDev qutlass}") (lib.cmakeFeature "VLLM_FLASH_ATTN_SRC_DIR" "${lib.getDev vllm-flash-attn'}") (lib.cmakeFeature "TORCH_CUDA_ARCH_LIST" "${gpuTargetString}") (lib.cmakeFeature "CUTLASS_NVCC_ARCHS_ENABLED" "${cudaPackages.flags.cmakeCudaArchitecturesString}") @@ -488,7 +510,7 @@ buildPythonPackage rec { }; preConfigure = '' - # See: https://github.com/vllm-project/vllm/blob/v0.7.1/setup.py#L75-L109 + # See: https://github.com/vllm-project/vllm/blob/v0.11.2/setup.py#L105 # There's also NVCC_THREADS but Nix/Nixpkgs doesn't really have this concept. export MAX_JOBS="$NIX_BUILD_CORES" ''; diff --git a/pkgs/development/python-modules/vllm/update.py b/pkgs/development/python-modules/vllm/update.py index 4a428a1cc850a..dba65258b7cd5 100755 --- a/pkgs/development/python-modules/vllm/update.py +++ b/pkgs/development/python-modules/vllm/update.py @@ -2,7 +2,7 @@ #! nix-shell -i python3 -p python3 python3Packages.sh sd nix-prefetch-github common-updater-scripts """ -Updates the main vLLM package and three external dependencies. +Updates the main vLLM package and four external dependencies. """ import argparse @@ -44,6 +44,16 @@ 'update_pattern': r'(flashmla = stdenv\.mkDerivation.*?version = ")[^"]+(";)', }, }, + 'iST-DASLab/qutlass': { + 'upstream_file': 'cmake/external_projects/qutlass.cmake', + 'upstream_pattern': r'GIT_TAG ([a-f0-9]+)', + 'update_pattern': r'(qutlass = stdenv\.mkDerivation.*?rev = ")[^"]+(";.*?hash = ")[^"]+(";)', + 'version_config': { + 'source_file': 'setup.py', + 'version_pattern': r"version='([0-9]+\.[0-9]+\.[0-9]+)'", + 'update_pattern': r'(qutlass = stdenv\.mkDerivation.*?version = ")[^"]+(";)', + }, + }, 'vllm-project/flash-attention': { 'upstream_file': 'cmake/external_projects/vllm_flash_attn.cmake', 'upstream_pattern': r'GIT_TAG ([a-f0-9]+)',