diff --git a/pkgs/development/python-modules/vllm/0002-setup.py-nix-support-respect-cmakeFlags.patch b/pkgs/development/python-modules/vllm/0002-setup.py-nix-support-respect-cmakeFlags.patch index e7a4bcd457617..fdcd53e8beee2 100644 --- a/pkgs/development/python-modules/vllm/0002-setup.py-nix-support-respect-cmakeFlags.patch +++ b/pkgs/development/python-modules/vllm/0002-setup.py-nix-support-respect-cmakeFlags.patch @@ -1,19 +1,10 @@ -From 10b7e8330bdba319a4162cceb8e5dd4280215b04 Mon Sep 17 00:00:00 2001 -From: SomeoneSerge -Date: Wed, 31 Jul 2024 12:06:15 +0000 -Subject: [PATCH 2/2] setup.py: nix-support (respect cmakeFlags) - ---- - setup.py | 10 ++++++++++ - 1 file changed, 10 insertions(+) - diff --git a/setup.py b/setup.py -index 01e006f9..14762146 100644 +index e9b36e2a2..bc9e2f1e3 100644 --- a/setup.py +++ b/setup.py -@@ -15,6 +15,15 @@ from setuptools import Extension, find_packages, setup - from setuptools.command.build_ext import build_ext - from torch.utils.cpp_extension import CUDA_HOME +@@ -20,6 +20,15 @@ from setuptools.command.build_ext import build_ext + from setuptools_scm import get_version + from torch.utils.cpp_extension import CUDA_HOME, ROCM_HOME +import os +import json @@ -27,14 +18,11 @@ index 01e006f9..14762146 100644 def load_module_from_path(module_name, path): spec = importlib.util.spec_from_file_location(module_name, path) -@@ -159,6 +168,7 @@ class cmake_build_ext(build_ext): - '-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={}'.format(outdir), - '-DCMAKE_ARCHIVE_OUTPUT_DIRECTORY={}'.format(self.build_temp), - '-DVLLM_TARGET_DEVICE={}'.format(VLLM_TARGET_DEVICE), +@@ -151,6 +160,7 @@ class cmake_build_ext(build_ext): + cmake_args = [ + "-DCMAKE_BUILD_TYPE={}".format(cfg), + "-DVLLM_TARGET_DEVICE={}".format(VLLM_TARGET_DEVICE), + *NIX_ATTRS["cmakeFlags"], ] verbose = envs.VERBOSE --- -2.45.1 - diff --git a/pkgs/development/python-modules/vllm/0003-propagate-pythonpath.patch b/pkgs/development/python-modules/vllm/0003-propagate-pythonpath.patch index 59267a8f0548f..b7ca770d45d6d 100644 --- a/pkgs/development/python-modules/vllm/0003-propagate-pythonpath.patch +++ b/pkgs/development/python-modules/vllm/0003-propagate-pythonpath.patch @@ -1,12 +1,13 @@ diff --git a/vllm/model_executor/models/registry.py b/vllm/model_executor/models/registry.py -index 81623def..2a6e2c92 100644 +index a2de597c8..4c2410209 100644 --- a/vllm/model_executor/models/registry.py +++ b/vllm/model_executor/models/registry.py -@@ -521,6 +521,7 @@ def _run_in_subprocess(fn: Callable[[], _T]) -> _T: +@@ -1121,7 +1121,7 @@ def _run_in_subprocess(fn: Callable[[], _T]) -> _T: + # cannot use `sys.executable __file__` here because the script # contains relative imports - returned = subprocess.run(_SUBPROCESS_COMMAND, - input=input_bytes, -+ env={'PYTHONPATH': ':'.join(sys.path)}, - capture_output=True) + returned = subprocess.run( +- _SUBPROCESS_COMMAND, input=input_bytes, capture_output=True ++ _SUBPROCESS_COMMAND, input=input_bytes, capture_output=True, env={'PYTHONPATH': ':'.join(sys.path)}, + ) # check if the subprocess is successful diff --git a/pkgs/development/python-modules/vllm/0005-drop-intel-reqs.patch b/pkgs/development/python-modules/vllm/0005-drop-intel-reqs.patch index 4314aff33fb9b..de5c9e8f0660e 100644 --- a/pkgs/development/python-modules/vllm/0005-drop-intel-reqs.patch +++ b/pkgs/development/python-modules/vllm/0005-drop-intel-reqs.patch @@ -1,8 +1,8 @@ diff --git a/requirements/cpu.txt b/requirements/cpu.txt -index 2db6d87ee..37f816170 100644 +index d11787df4..71575d707 100644 --- a/requirements/cpu.txt +++ b/requirements/cpu.txt -@@ -21,9 +21,6 @@ torchvision; platform_machine != "ppc64le" and platform_machine != "s390x" +@@ -20,9 +20,6 @@ torchvision; platform_machine != "ppc64le" and platform_machine != "s390x" torchvision==0.23.0; platform_machine == "ppc64le" datasets # for benchmark scripts diff --git a/pkgs/development/python-modules/vllm/default.nix b/pkgs/development/python-modules/vllm/default.nix index b8b2e7f59d3b4..fb030ed2665be 100644 --- a/pkgs/development/python-modules/vllm/default.nix +++ b/pkgs/development/python-modules/vllm/default.nix @@ -34,6 +34,7 @@ uvicorn, pydantic, aioprometheus, + anthropic, nvidia-ml-py, openai, pyzmq, @@ -53,6 +54,7 @@ compressed-tensors, mistral-common, msgspec, + model-hosting-container-standards, numactl, tokenizers, oneDNN, @@ -98,10 +100,11 @@ let # see CMakeLists.txt, grepping for CUTLASS_REVISION # https://github.com/vllm-project/vllm/blob/v${version}/CMakeLists.txt cutlass = fetchFromGitHub { + name = "cutlass-source"; owner = "NVIDIA"; repo = "cutlass"; - tag = "v4.0.0"; - hash = "sha256-HJY+Go1viPkSVZPEs/NyMtYJzas4mMLiIZF3kNX+WgA="; + tag = "v4.2.1"; + hash = "sha256-iP560D5Vwuj6wX1otJhwbvqe/X4mYVeKTpK533Wr5gY="; }; # FlashMLA's Blackwell (SM100) kernels were developed against CUTLASS v3.9.0 @@ -126,10 +129,11 @@ let # grep for GIT_TAG in the following file # https://github.com/vllm-project/vllm/blob/v${version}/cmake/external_projects/flashmla.cmake src = fetchFromGitHub { + name = "FlashMLA-source"; owner = "vllm-project"; repo = "FlashMLA"; - rev = "5f65b85703c7ed75fda01e06495077caad207c3f"; - hash = "sha256-DO9EFNSoAgyfRRc095v1UjT+Zdzk4cFY0+n28FVEwI0="; + rev = "46d64a8ebef03fa50b4ae74937276a5c940e3f95"; + hash = "sha256-jtMzWB5hKz8mJGsdK6q4YpQbGp9IrQxbwmB3a64DIl0="; }; dontConfigure = true; @@ -145,6 +149,16 @@ let ''; }; + # grep for GIT_TAG in the following file + # https://github.com/vllm-project/vllm/blob/v${version}/cmake/external_projects/qutlass.cmake + qutlass = fetchFromGitHub { + name = "qutlass-source"; + owner = "IST-DASLab"; + repo = "qutlass"; + rev = "830d2c4537c7396e14a02a46fbddd18b5d107c65"; + hash = "sha256-aG4qd0vlwP+8gudfvHwhtXCFmBOJKQQTvcwahpEqC84="; + }; + vllm-flash-attn' = lib.defaultTo (stdenv.mkDerivation { pname = "vllm-flash-attn"; # https://github.com/vllm-project/flash-attention/blob/${src.rev}/vllm_flash_attn/__init__.py @@ -153,10 +167,11 @@ let # grep for GIT_TAG in the following file # https://github.com/vllm-project/vllm/blob/v${version}/cmake/external_projects/vllm_flash_attn.cmake src = fetchFromGitHub { + name = "flash-attention-source"; owner = "vllm-project"; repo = "flash-attention"; - rev = "ee4d25bd84e0cbc7e0b9b9685085fd5db2dcb62a"; - hash = "sha256-2r0Habd/kBpvM4/aQFIYyj+uQAa3M9gjk3DcBZHFNfA="; + rev = "58e0626a692f09241182582659e3bf8f16472659"; + hash = "sha256-ewdZd7LuBKBV0y3AaGRWISJzjg6cu59D2OtgqoDjrbM="; }; patches = [ @@ -284,7 +299,7 @@ in buildPythonPackage rec { pname = "vllm"; - version = "0.11.0"; + version = "0.11.2"; pyproject = true; stdenv = torch.stdenv; @@ -293,38 +308,31 @@ buildPythonPackage rec { owner = "vllm-project"; repo = "vllm"; tag = "v${version}"; - hash = "sha256-47TPvvPQvVbh6Gm2yvi+xhWZ8tSma91rp9hp/SBrEY8="; + hash = "sha256-DoSlkFmR3KKEtfSfdRB++0CZeeXgxmM3zZjONlxbe8U="; }; patches = [ ./0002-setup.py-nix-support-respect-cmakeFlags.patch ./0003-propagate-pythonpath.patch ./0005-drop-intel-reqs.patch - # TODO: Remove the below patches when included in vLLM release - (fetchpatch { - url = "https://github.com/vllm-project/vllm/commit/9705fba7b727a3b9c275b012258608531e2223d1.patch"; - hash = "sha256-DxRGLiwkegMlMjqFmFc0igpaVv06/Y2WjL+ISoIOET4="; - }) - # patch above is previous commit needed to apply patch below - # oneDNN / CPU fix from https://github.com/vllm-project/vllm/pull/26401 - (fetchpatch { - url = "https://github.com/vllm-project/vllm/commit/d7be1f2a480bdc62a6a1ec0126a401e3d42985fe.patch"; - hash = "sha256-Zi1k5wiOPjsbWHFKpcLq9Ns43wIP37Mbvesi5K80zaQ="; - }) ]; postPatch = '' # Remove vendored pynvml entirely rm vllm/third_party/pynvml.py substituteInPlace tests/utils.py \ - --replace-fail "from vllm.third_party.pynvml import" "from pynvml import" - substituteInPlace vllm/utils/__init__.py \ - --replace-fail "import vllm.third_party.pynvml" "import pynvml" + --replace-fail \ + "from vllm.third_party.pynvml import" \ + "from pynvml import" + substituteInPlace vllm/utils/import_utils.py \ + --replace-fail \ + "import vllm.third_party.pynvml as pynvml" \ + "import pynvml" # pythonRelaxDeps does not cover build-system substituteInPlace pyproject.toml \ --replace-fail "torch ==" "torch >=" \ - --replace-fail "setuptools>=77.0.3,<80.0.0" "setuptools" + --replace-fail "setuptools>=77.0.3,<81.0.0" "setuptools" # Ignore the python version check because it hard-codes minor versions and # lags behind `ray`'s python interpreter support @@ -393,6 +401,7 @@ buildPythonPackage rec { dependencies = [ aioprometheus + anthropic blake3 cachetools cbor2 @@ -424,6 +433,7 @@ buildPythonPackage rec { partial-json-parser compressed-tensors mistral-common + model-hosting-container-standards torch torchaudio torchvision @@ -460,6 +470,7 @@ buildPythonPackage rec { (lib.cmakeFeature "FETCHCONTENT_SOURCE_DIR_CUTLASS" "${lib.getDev cutlass}") (lib.cmakeFeature "FLASH_MLA_SRC_DIR" "${lib.getDev flashmla}") (lib.cmakeFeature "VLLM_FLASH_ATTN_SRC_DIR" "${lib.getDev vllm-flash-attn'}") + (lib.cmakeFeature "QUTLASS_SRC_DIR" "${lib.getDev qutlass}") (lib.cmakeFeature "TORCH_CUDA_ARCH_LIST" "${gpuTargetString}") (lib.cmakeFeature "CUTLASS_NVCC_ARCHS_ENABLED" "${cudaPackages.flags.cmakeCudaArchitecturesString}") (lib.cmakeFeature "CUDA_TOOLKIT_ROOT_DIR" "${symlinkJoin {