diff --git a/.buildkite/test_areas/kernels.yaml b/.buildkite/test_areas/kernels.yaml index 34e1e4832d9d..bc12d1a35341 100644 --- a/.buildkite/test_areas/kernels.yaml +++ b/.buildkite/test_areas/kernels.yaml @@ -105,7 +105,11 @@ steps: device: h100 num_devices: 1 source_file_dependencies: + - cmake/external_projects/deepgemm.cmake - tools/install_deepgemm.sh + - tools/build_deepgemm_C.py + - tools/setup_deepgemm_pythons.sh + - tools/check_wheel_deepgemm.py - vllm/utils/deep_gemm.py - vllm/model_executor/layers/fused_moe - vllm/model_executor/layers/quantization @@ -115,6 +119,7 @@ steps: - tests/kernels/attention/test_deepgemm_attention.py - tests/quantization/test_cutlass_w4a16.py commands: + - python3 ../tools/check_wheel_deepgemm.py - pytest -v -s kernels/quantization/test_block_fp8.py - pytest -v -s kernels/moe/test_deepgemm.py - pytest -v -s kernels/moe/test_batched_deepgemm.py diff --git a/cmake/external_projects/deepgemm.cmake b/cmake/external_projects/deepgemm.cmake index 0d7ea43fb7d0..2d947a40906c 100644 --- a/cmake/external_projects/deepgemm.cmake +++ b/cmake/external_projects/deepgemm.cmake @@ -53,49 +53,67 @@ cuda_archs_loose_intersection(DEEPGEMM_ARCHS if(DEEPGEMM_ARCHS) message(STATUS "DeepGEMM CUDA architectures: ${DEEPGEMM_ARCHS}") - find_package(CUDAToolkit REQUIRED) - - # - # Build the _C pybind11 extension from DeepGEMM's C++ source. - # This is a CXX-only module — CUDA kernels are JIT-compiled at runtime. - # - Python_add_library(_deep_gemm_C MODULE WITH_SOABI - "${deepgemm_SOURCE_DIR}/csrc/python_api.cpp") - - # The pybind11 module name must be _C to match DeepGEMM's Python imports. - set_target_properties(_deep_gemm_C PROPERTIES OUTPUT_NAME "_C") - - target_compile_definitions(_deep_gemm_C PRIVATE - "-DTORCH_EXTENSION_NAME=_C") - - target_include_directories(_deep_gemm_C PRIVATE - "${deepgemm_SOURCE_DIR}/csrc" - "${deepgemm_SOURCE_DIR}/deep_gemm/include" - "${deepgemm_SOURCE_DIR}/third-party/cutlass/include" - "${deepgemm_SOURCE_DIR}/third-party/cutlass/tools/util/include" - "${deepgemm_SOURCE_DIR}/third-party/fmt/include") - - target_compile_options(_deep_gemm_C PRIVATE - $<$:-std=c++17> - $<$:-O3> - $<$:-Wno-psabi> - $<$:-Wno-deprecated-declarations>) - - # torch_python is required because DeepGEMM uses pybind11 type casters - # for at::Tensor (via PYBIND11_MODULE), unlike vLLM's own extensions which - # use torch::Library custom ops. - find_library(TORCH_PYTHON_LIBRARY torch_python - PATHS "${TORCH_INSTALL_PREFIX}/lib" - REQUIRED) - - target_link_libraries(_deep_gemm_C PRIVATE - torch ${TORCH_LIBRARIES} "${TORCH_PYTHON_LIBRARY}" - CUDA::cudart CUDA::nvrtc) - - # Install the shared library into the vendored package directory - install(TARGETS _deep_gemm_C - LIBRARY DESTINATION vllm/third_party/deep_gemm - COMPONENT _deep_gemm_C) + # Build _C once per interpreter in DEEPGEMM_PYTHON_INTERPRETERS (":"- + # separated paths) so the wheel imports cleanly on every supported Python. + # Unset → fall back to the build interpreter (editable / source builds). + # The compile is delegated to tools/build_deepgemm_C.py and always runs + # against the build interpreter's torch — target Pythons don't need torch. + # Note: empty-but-set env vars are still DEFINED in cmake; treat empty as + # unset so an empty interpreter list falls back to the build interpreter + # rather than silently skipping the per-Python build. + if(NOT "$ENV{DEEPGEMM_PYTHON_INTERPRETERS}" STREQUAL "") + string(REPLACE ":" ";" _dg_pythons "$ENV{DEEPGEMM_PYTHON_INTERPRETERS}") + else() + set(_dg_pythons "${Python_EXECUTABLE}") + endif() + message(STATUS "DeepGEMM _C will be built for: ${_dg_pythons}") + + # Header set fed to add_custom_command's DEPENDS so a header-only edit + # (in upstream DeepGEMM or its vendored cutlass/fmt) re-triggers the + # rebuild. add_custom_command does no implicit header scanning, unlike + # add_library. + file(GLOB_RECURSE _dg_headers + "${deepgemm_SOURCE_DIR}/csrc/*.h" + "${deepgemm_SOURCE_DIR}/csrc/*.hpp" + "${deepgemm_SOURCE_DIR}/deep_gemm/include/*.h" + "${deepgemm_SOURCE_DIR}/deep_gemm/include/*.hpp" + "${deepgemm_SOURCE_DIR}/deep_gemm/include/*.cuh") + + set(_dg_markers) + set(_dg_seen_soabis) + foreach(_pybin IN LISTS _dg_pythons) + execute_process( + COMMAND "${_pybin}" -c + "import sysconfig; print(sysconfig.get_config_var('SOABI'))" + OUTPUT_VARIABLE _dg_soabi + OUTPUT_STRIP_TRAILING_WHITESPACE + COMMAND_ERROR_IS_FATAL ANY) + # Dedup so duplicate paths (or two paths resolving to the same CPython) + # don't register conflicting build rules. + if(_dg_soabi IN_LIST _dg_seen_soabis) + continue() + endif() + list(APPEND _dg_seen_soabis "${_dg_soabi}") + set(_dg_dir "${CMAKE_CURRENT_BINARY_DIR}/deepgemm_C_${_dg_soabi}") + set(_dg_marker "${_dg_dir}/.built") + add_custom_command( + OUTPUT "${_dg_marker}" + COMMAND "${Python_EXECUTABLE}" + "${CMAKE_SOURCE_DIR}/tools/build_deepgemm_C.py" + "${deepgemm_SOURCE_DIR}" "${_dg_dir}" "${_pybin}" + COMMAND "${CMAKE_COMMAND}" -E touch "${_dg_marker}" + DEPENDS "${CMAKE_SOURCE_DIR}/tools/build_deepgemm_C.py" + "${deepgemm_SOURCE_DIR}/csrc/python_api.cpp" + ${_dg_headers} + COMMENT "Building DeepGEMM _C for ${_pybin}" + VERBATIM) + list(APPEND _dg_markers "${_dg_marker}") + install(DIRECTORY "${_dg_dir}/" + DESTINATION vllm/third_party/deep_gemm + COMPONENT _deep_gemm_C + FILES_MATCHING PATTERN "_C.cpython-*.so") + endforeach() + add_custom_target(_deep_gemm_C ALL DEPENDS ${_dg_markers}) # # Vendor DeepGEMM Python package files diff --git a/docker/Dockerfile b/docker/Dockerfile index fd0622e2416a..fcd006d8dee2 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -301,6 +301,15 @@ RUN --mount=type=cache,target=/root/.cache/uv \ python3 use_existing_torch.py --prefix; \ fi +# Provision a bare interpreter for each CPython covered by `requires-python` +# so DeepGEMM `_C` is built once per Python and bundled side-by-side in the +# wheel; cmake reads DEEPGEMM_PYTHON_INTERPRETERS in deepgemm.cmake's +# foreach loop. The matrix is derived from pyproject.toml. +COPY tools/setup_deepgemm_pythons.sh tools/build_deepgemm_C.py tools/ +ENV DEEPGEMM_VENV_PREFIX=/opt/dgenv +RUN --mount=type=cache,target=/root/.cache/uv \ + tools/setup_deepgemm_pythons.sh > /tmp/dg_pythons.txt + # Build the vLLM wheel # if USE_SCCACHE is set, use sccache to speed up compilation # AWS credentials mounted at ~/.aws/credentials for sccache S3 auth (optional) @@ -328,6 +337,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \ && export VLLM_PRECOMPILED_WHEEL_COMMIT="${VLLM_MERGE_BASE_COMMIT}" \ && export VLLM_MAIN_CUDA_VERSION="${VLLM_MAIN_CUDA_VERSION}" \ && export VLLM_DOCKER_BUILD_CONTEXT=1 \ + && export DEEPGEMM_PYTHON_INTERPRETERS=$(cat /tmp/dg_pythons.txt) \ && sccache --show-stats \ && python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38 \ && sccache --show-stats; \ @@ -345,6 +355,7 @@ RUN --mount=type=cache,target=/root/.cache/ccache \ export VLLM_USE_PRECOMPILED="${VLLM_USE_PRECOMPILED}" && \ export VLLM_PRECOMPILED_WHEEL_COMMIT="${VLLM_MERGE_BASE_COMMIT}" && \ export VLLM_DOCKER_BUILD_CONTEXT=1 && \ + export DEEPGEMM_PYTHON_INTERPRETERS=$(cat /tmp/dg_pythons.txt) && \ python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38; \ fi diff --git a/tools/build_deepgemm_C.py b/tools/build_deepgemm_C.py new file mode 100644 index 000000000000..e1fc8b43b681 --- /dev/null +++ b/tools/build_deepgemm_C.py @@ -0,0 +1,87 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +"""Build DeepGEMM's `_C` pybind11 extension for a target Python. + +Driven from `cmake/external_projects/deepgemm.cmake`. The driver is the +build interpreter (which has torch); the *target* Python is only used for +its header path and SOABI. This avoids needing torch installed in N venvs +to produce N matching `.so` files. + +Usage: python build_deepgemm_C.py +""" + +import json +import os +import subprocess +import sys +from pathlib import Path + +import torch +from torch.utils import cpp_extension + +if len(sys.argv) != 4: + sys.exit(f"usage: {sys.argv[0]} ") + +src = Path(sys.argv[1]).resolve() +out = Path(sys.argv[2]).resolve() +target_py = sys.argv[3] +out.mkdir(parents=True, exist_ok=True) + +info = json.loads( + subprocess.check_output( + [ + target_py, + "-c", + "import sysconfig, json; " + "print(json.dumps({k: sysconfig.get_config_var(k) " + "for k in ('EXT_SUFFIX', 'INCLUDEPY')}))", + ] + ).decode() +) + +cuda_home = cpp_extension.CUDA_HOME +if cuda_home is None: + sys.exit("CUDA_HOME not found; cannot build DeepGEMM _C") +# CCCL lives outside the standard CUDAToolkit search, mirroring DeepGEMM's +# own setup.py. +includes = [ + info["INCLUDEPY"], + f"{cuda_home}/include", + f"{cuda_home}/include/cccl", + str(src / "csrc"), + str(src / "deep_gemm/include"), + str(src / "third-party/cutlass/include"), + str(src / "third-party/cutlass/tools/util/include"), + str(src / "third-party/fmt/include"), + *cpp_extension.include_paths(device_type="cuda"), +] + +cmd = [ + os.environ.get("CXX", "g++"), + "-shared", + "-fPIC", + "-std=c++17", + "-O3", + "-g0", + "-Wno-psabi", + "-Wno-deprecated-declarations", + "-DTORCH_API_INCLUDE_EXTENSION_H", + "-DTORCH_EXTENSION_NAME=_C", + f"-D_GLIBCXX_USE_CXX11_ABI={int(torch.compiled_with_cxx11_abi())}", + *(f"-I{p}" for p in includes), + str(src / "csrc/python_api.cpp"), + *(f"-L{p}" for p in cpp_extension.library_paths(device_type="cuda")), + f"-L{cuda_home}/lib64", + "-ltorch", + "-ltorch_python", + "-ltorch_cpu", + "-ltorch_cuda", + "-lc10", + "-lc10_cuda", + "-lcudart", + "-lnvrtc", + "-o", + str(out / f"_C{info['EXT_SUFFIX']}"), +] +print("[build_deepgemm_C] " + " ".join(cmd), flush=True) +subprocess.check_call(cmd) diff --git a/tools/check_wheel_deepgemm.py b/tools/check_wheel_deepgemm.py new file mode 100644 index 000000000000..6f8a03ffd3dc --- /dev/null +++ b/tools/check_wheel_deepgemm.py @@ -0,0 +1,41 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +"""Assert the installed vLLM has a `_C.cpython-X.Y-*.so` for every CPython +covered by `requires-python`. Fails closed if a Python's `.so` is missing +from the wheel — i.e. the regression that surfaced in #41476/#41512. + +Run from a CI test job after vLLM is installed, e.g. the H100 deepgemm +kernel tests in .buildkite/test_areas/kernels.yaml. +""" + +import importlib.util +import os +import sys +from pathlib import Path + +import regex as re +import tomllib + +SO_RE = re.compile(r"^_C\.cpython-(\d)(\d+)-") + + +def required_pythons() -> list[str]: + pyproject = Path(__file__).resolve().parent.parent / "pyproject.toml" + spec = tomllib.loads(pyproject.read_text())["project"]["requires-python"] + m = re.match(r">=3\.(\d+),<3\.(\d+)", spec) + if not m: + sys.exit(f"unexpected requires-python format: {spec!r}") + return [f"3.{v}" for v in range(int(m[1]), int(m[2]))] + + +spec = importlib.util.find_spec("vllm.third_party.deep_gemm") +if spec is None or spec.origin is None: + sys.exit("vllm.third_party.deep_gemm not importable; is vllm installed?") +pkg_dir = Path(spec.origin).parent + +found = {f"{m[1]}.{m[2]}" for f in os.listdir(pkg_dir) if (m := SO_RE.match(f))} +required = required_pythons() +missing = [v for v in required if v not in found] +print(f"deepgemm _C: found {sorted(found)}, required {required}, missing {missing}") +sys.exit(1 if missing else 0) diff --git a/tools/setup_deepgemm_pythons.sh b/tools/setup_deepgemm_pythons.sh new file mode 100755 index 000000000000..215bfde44df4 --- /dev/null +++ b/tools/setup_deepgemm_pythons.sh @@ -0,0 +1,49 @@ +#!/usr/bin/env bash +# Provision bare Python interpreters for the DeepGEMM `_C` per-Python build +# and print a colon-separated list of their paths to stdout. +# +# Each target Python only needs a working interpreter — torch is not +# installed since `tools/build_deepgemm_C.py` runs from the build interpreter. +# uv re-uses any matching system Python and downloads a managed build +# otherwise. +# +# Usage: +# export DEEPGEMM_PYTHON_INTERPRETERS=$(tools/setup_deepgemm_pythons.sh) +# python setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38 +# +# With no args, expands to every CPython covered by `requires-python` in +# pyproject.toml. Pass explicit versions (e.g. `3.10 3.11`) to override. +# +# Skip this script if you don't have uv: set DEEPGEMM_PYTHON_INTERPRETERS +# directly to existing interpreter paths. Editable / single-Python builds +# don't need the env var at all (cmake falls back to the build interpreter). +# +# Optional: DEEPGEMM_VENV_PREFIX (default: /tmp/dgenv). +set -euo pipefail + +if [ "$#" -eq 0 ]; then + # Derive the matrix from `requires-python = ">=3.X,<3.Y"` in pyproject.toml. + pyproject="$(dirname "$0")/../pyproject.toml" + spec=$(grep -E '^requires-python' "$pyproject" \ + | grep -oE '>=3\.[0-9]+,<3\.[0-9]+') + lo=${spec#>=3.}; lo=${lo%%,*} + hi=${spec##*<3.} + set -- $(seq "$lo" $((hi - 1)) | sed 's/^/3./') +fi + +prefix="${DEEPGEMM_VENV_PREFIX:-/tmp/dgenv}" +mkdir -p "$prefix" + +paths="" +for V in "$@"; do + venv="$prefix/$V" + # Force a managed (uv-downloaded) Python so dev headers are bundled. + # System Pythons on the build base may lack headers (manylinux's + # /opt/python/cpXY-cpXY are off PATH; an apt-installed python3.X often + # has no -dev), and the per-Python build needs Python.h. + [ -x "$venv/bin/python" ] || \ + uv venv --python "$V" "$venv" --python-preference only-managed --seed \ + >/dev/null + paths="$paths:$venv/bin/python" +done +echo "${paths#:}"