diff --git a/contrib/Dockerfile.manylinux b/contrib/Dockerfile.manylinux index 14f1234c66..2469bba126 100644 --- a/contrib/Dockerfile.manylinux +++ b/contrib/Dockerfile.manylinux @@ -354,11 +354,14 @@ RUN echo "/usr/local/nixl/lib/$ARCH-linux-gnu" > /etc/ld.so.conf.d/nixl.conf && # Create the wheel # No need to specifically add path to libcuda.so here, meson finds the stubs and links them ARG WHL_PYTHON_VERSIONS="3.10,3.11,3.12,3.13,3.14" +ARG WHL_TORCH_VERSIONS="2.11,2.12" ARG WHL_PLATFORM="manylinux_2_28_$ARCH" RUN IFS=',' read -ra PYTHON_VERSIONS <<< "$WHL_PYTHON_VERSIONS" && \ - export UV_INDEX="https://download.pytorch.org/whl/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d .)" && \ - export UV_INDEX_STRATEGY=unsafe-best-match && \ - if [ "$BUILD_NIXL_EP" = "true" ]; then EP_BUILD_FLAG="--build-nixl-ep"; else EP_BUILD_FLAG=""; fi && \ + if [ "$BUILD_NIXL_EP" = "true" ]; then \ + EP_BUILD_FLAGS="--build-nixl-ep --torch-versions $WHL_TORCH_VERSIONS"; \ + else \ + EP_BUILD_FLAGS=""; \ + fi && \ rm -rf dist && mkdir -p dist && \ for PYTHON_VERSION in "${PYTHON_VERSIONS[@]}"; do \ export PATH=$VIRTUAL_ENV/bin:$PATH && \ @@ -368,7 +371,7 @@ RUN IFS=',' read -ra PYTHON_VERSIONS <<< "$WHL_PYTHON_VERSIONS" && \ --ucx-plugins-dir /usr/lib64/ucx \ --nixl-plugins-dir $NIXL_PLUGIN_DIR \ --output-dir dist \ - $EP_BUILD_FLAG ; \ + $EP_BUILD_FLAGS ; \ done # Copy the meta package wheel to the dist directory, which will be used to push to PyPI. diff --git a/contrib/build-wheel.sh b/contrib/build-wheel.sh index 3a4b4a58d3..6b3491119b 100755 --- a/contrib/build-wheel.sh +++ b/contrib/build-wheel.sh @@ -15,7 +15,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -# Parse arguments PYTHON_VERSION="3.12" ARCH=$(uname -m) WHL_PLATFORM="manylinux_2_39_$ARCH" @@ -23,8 +22,8 @@ UCX_PLUGINS_DIR="/usr/lib64/ucx" NIXL_PLUGINS_DIR="/usr/local/nixl/lib/$ARCH-linux-gnu/plugins" OUTPUT_DIR="dist" BUILD_NIXL_EP="false" +TORCH_VERSIONS="" -# Parse arguments while [[ $# -gt 0 ]]; do case $1 in --python-version) @@ -60,6 +59,7 @@ while [[ $# -gt 0 ]]; do echo " --ucx-plugins-dir: Directory to find UCX plugins in (default: $UCX_PLUGINS_DIR)" echo " --nixl-plugins-dir: Directory to find NIXL plugins in (default: $NIXL_PLUGINS_DIR)" echo " --build-nixl-ep: Build wheel with nixl_ep package included (requires CUDA sm90-compatible environment)" + echo " --torch-versions: Comma-separated list of torch versions to build the wheel for (default: $TORCH_VERSIONS)" echo " --help: Show this help message" echo "" echo "Must be executed from the root of the NIXL repository." @@ -69,6 +69,11 @@ while [[ $# -gt 0 ]]; do BUILD_NIXL_EP="true" shift ;; + --torch-versions) + TORCH_VERSIONS=$2 + shift + shift + ;; *) echo "Unknown argument: $1" exit 1 @@ -76,10 +81,14 @@ while [[ $# -gt 0 ]]; do esac done +if [ "$BUILD_NIXL_EP" = "true" ] && [ -z "$TORCH_VERSIONS" ]; then + echo "ERROR: --build-nixl-ep requires --torch-versions (e.g. --torch-versions 2.11,2.12)" >&2 + exit 1 +fi + set -e set -x -# Build the wheel TMP_DIR=$(mktemp -d) CUDA_MAJOR=$(nvcc --version | grep -Eo 'release [0-9]+\.[0-9]+' | cut -d' ' -f2 | cut -d'.' -f1) @@ -88,21 +97,218 @@ if [ "$CUDA_MAJOR" -ne 12 ] && [ "$CUDA_MAJOR" -ne 13 ]; then echo "Invalid CUDA_MAJOR: '$CUDA_MAJOR'" exit 1 fi +AUDITWHEEL_EXCLUDES="--exclude libcuda* --exclude libcufile* --exclude libssl* --exclude libcrypto* --exclude libefa* --exclude libhwloc* --exclude libfabric* --exclude libtorch* --exclude libc10* --exclude libdoca*" + PKG_NAME="nixl-cu${CUDA_MAJOR}" +CU_TAG="cu$(nvcc --version | grep -Eo 'release [0-9]+\.[0-9]+' | cut -d' ' -f2 | tr -d .)" ./contrib/tomlutil.py --wheel-name $PKG_NAME pyproject.toml -if [ "$BUILD_NIXL_EP" = "true" ]; then - uv build --wheel --out-dir $TMP_DIR --python $PYTHON_VERSION \ - -Csetup-args=-Dbuild_nixl_ep=true \ - -Csetup-args=-Dbuild_examples=true + +TORCH_STABLE_INDEX="https://download.pytorch.org/whl/${CU_TAG}" +TORCH_NIGHTLY_INDEX="https://download.pytorch.org/whl/nightly/${CU_TAG}" + +# Build deps for the per-iteration venv; torch is installed separately. +BUILD_DEPS=( + "meson" + "meson-python" + "pybind11" + "patchelf" + "pyyaml" + "types-PyYAML" + "setuptools>=80.9.0" +) + +# Slugify a dotted version (e.g. "2.13" -> "213", "3.10" -> "310") so it can +# be used unambiguously as a path component. +slug() { echo "${1//./}"; } + +# Path for a per-iteration build venv. One venv per (python, torch) tuple +# so torch's transitive footprint (nvidia-*, triton, sympy, …) never bleeds +# across torch versions. Lives in /workspace, not /tmp, so it inherits the +# image's UV_CACHE_DIR layout and is visible to debugging. +venv_path() { + local VER=${1:-} + if [ -n "$VER" ]; then + echo "/workspace/venv-torch$(slug "$VER")-py$(slug "$PYTHON_VERSION")" + else + echo "/workspace/venv-py$(slug "$PYTHON_VERSION")" + fi +} + +# Echo "stable", "nightly", or "unavailable" depending on whether +# torch==${VER}.* resolves from the stable cu index, the nightly cu +# index (with --pre), or neither. +torch_classify() { + local VER=$1 + local CLASS="unavailable" + local PROBE="/workspace/venv-probe-py$(slug "$PYTHON_VERSION")" + rm -rf "$PROBE" + if uv venv "$PROBE" --python "$PYTHON_VERSION" >/dev/null 2>&1; then + if uv pip install --dry-run \ + --python "$PROBE/bin/python" \ + --index-url "$TORCH_STABLE_INDEX" \ + "torch==${VER}.*" >/dev/null 2>&1; then + CLASS="stable" + elif uv pip install --dry-run --pre \ + --python "$PROBE/bin/python" \ + --extra-index-url "$TORCH_STABLE_INDEX" \ + --extra-index-url "$TORCH_NIGHTLY_INDEX" \ + --index-strategy unsafe-best-match \ + "torch==${VER}.*" >/dev/null 2>&1; then + CLASS="nightly" + fi + fi + rm -rf "$PROBE" + echo "$CLASS" +} + +# Install torch from the cu index, isolated from PyPI: with PyPI as a +# fallback its plain `torch==X.Y.0` beats cu nightly's `X.Y.0.dev*+cuXX` +# (PEP 440: final > pre-release). +install_torch() { + local VENV_PATH=$1 + local VER=$2 + local CHANNEL=$3 + local MAJOR="${VER%%.*}" + local MINOR="${VER##*.}" + + if [ "$CHANNEL" = "nightly" ]; then + uv pip install \ + --python "$VENV_PATH/bin/python" \ + --index-url "$TORCH_NIGHTLY_INDEX" \ + --pre \ + "torch>=${MAJOR}.${MINOR}.0.dev0,<${MAJOR}.$((MINOR + 1))" + else + uv pip install \ + --python "$VENV_PATH/bin/python" \ + --index-url "$TORCH_STABLE_INDEX" \ + "torch==${VER}.*" + fi +} + +# Build the wheel for the current PYTHON_VERSION (and optional torch VER). +# Each iteration uses a fresh venv so torch's dependencies +# (nvidia-* wheels, triton, sympy, …) do not leak across iterations. +build_wheel() { + local OUT_DIR=$1 + local VER=${2:-} + + local VENV_PATH + VENV_PATH=$(venv_path "$VER") + local CHANNEL="stable" + [ -n "$VER" ] && CHANNEL=$(torch_classify "$VER") + + echo "=== Provisioning ${VENV_PATH} (python ${PYTHON_VERSION}${VER:+, torch ${VER} [${CHANNEL}]}) ===" + rm -rf "$VENV_PATH" + uv venv "$VENV_PATH" --python "$PYTHON_VERSION" + uv pip install --python "$VENV_PATH/bin/python" "${BUILD_DEPS[@]}" + [ -n "$VER" ] && install_torch "$VENV_PATH" "$VER" "$CHANNEL" + + # Activate so meson's `find_installation('python3')` resolves to this + # venv's interpreter (which has the right torch). + # shellcheck disable=SC1091 + source "$VENV_PATH/bin/activate" + + local BUILD_ARGS=( + --wheel + --no-build-isolation + --out-dir "$OUT_DIR" + --python "$VENV_PATH/bin/python" + ) + if [ "$BUILD_NIXL_EP" = "true" ]; then + BUILD_ARGS+=( + -Csetup-args=-Dbuild_nixl_ep=true + -Csetup-args=-Dbuild_examples=true + ) + fi + uv build "${BUILD_ARGS[@]}" + + deactivate + # torch + nvidia-* in each venv is several GB; tear down so the docker + # layer does not get too large across the (python, torch) matrix. + rm -rf "$VENV_PATH" +} + +repair_wheel() { + local IN_DIR=$1 + local OUT_DIR=$2 + mkdir -p "$OUT_DIR" + auditwheel repair $AUDITWHEEL_EXCLUDES "$IN_DIR"/nixl*.whl --plat "$WHL_PLATFORM" --wheel-dir "$OUT_DIR" + ./contrib/wheel_add_ucx_plugins.py --ucx-plugins-dir "$UCX_PLUGINS_DIR" --nixl-plugins-dir "$NIXL_PLUGINS_DIR" "$OUT_DIR"/*.whl +} + +# Echo the path of the single .whl in $1, or exit if the count is not 1. +get_wheel_path() { + local dir=$1 wheels + shopt -s nullglob + wheels=("$dir"/*.whl) + shopt -u nullglob + if [ ${#wheels[@]} -ne 1 ]; then + echo "expected 1 wheel in $dir, got ${#wheels[@]}: ${wheels[*]}" >&2 + exit 1 + fi + echo "${wheels[0]}" +} + +if [ "$BUILD_NIXL_EP" = "true" ] && [ -n "$TORCH_VERSIONS" ]; then + # Multi-torch: build the full wheel with the first torch, then merge + # the per-torch .so from the others into it. + IFS=',' read -ra TORCH_REQUESTED <<< "$TORCH_VERSIONS" + + # Filter to torch versions actually resolvable for this (Python, CUDA) combo. + TORCH_ARRAY=() + SKIPPED=() + for TORCH in "${TORCH_REQUESTED[@]}"; do + if [ "$(torch_classify "$TORCH")" = "unavailable" ]; then + SKIPPED+=("$TORCH") + else + TORCH_ARRAY+=("$TORCH") + fi + done + + if [ ${#SKIPPED[@]} -gt 0 ]; then + echo "=== Skipping torch versions (no wheel on index for Python ${PYTHON_VERSION} + ${CU_TAG}): ${SKIPPED[*]} ===" + fi + if [ ${#TORCH_ARRAY[@]} -eq 0 ]; then + echo "ERROR: none of the requested torch versions (${TORCH_REQUESTED[*]}) are available for Python ${PYTHON_VERSION} + ${CU_TAG}" + exit 1 + fi + echo "=== Building for torch versions: ${TORCH_ARRAY[*]} ===" + + FIRST_TORCH="${TORCH_ARRAY[0]}" + echo "=== Building wheel with torch ${FIRST_TORCH} ===" + build_wheel "$TMP_DIR" "$FIRST_TORCH" + repair_wheel "$TMP_DIR" "$TMP_DIR/dist" + BASE_WHL=$(get_wheel_path "$TMP_DIR/dist") + + for ((i=1; i<${#TORCH_ARRAY[@]}; i++)); do + TORCH="${TORCH_ARRAY[$i]}" + echo "=== Building nixl_ep .so for torch ${TORCH} ===" + + EP_TMP=$(mktemp -d) + build_wheel "$EP_TMP" "$TORCH" + repair_wheel "$EP_TMP" "$EP_TMP/dist" + + # Merge only the torch-versioned .so. Both wheels were built + # against the same outer C++ build, so its DT_NEEDED entries + # (libucp-.so etc.) match what auditwheel already bundled + # into $BASE_WHL. + TORCH_MM=$(echo "$TORCH" | tr -d '.') + EP_WHL=$(get_wheel_path "$EP_TMP/dist") + ./contrib/wheel_merge.py \ + --base-wheel "$BASE_WHL" \ + --source-wheel "$EP_WHL" \ + --pattern "nixl_ep_cpp_torch${TORCH_MM}.*" \ + --target-dir "nixl_ep_cu${CUDA_MAJOR}" + + rm -rf "$EP_TMP" + done + + cp "$BASE_WHL" "$OUTPUT_DIR" else - uv build --wheel --out-dir $TMP_DIR --python $PYTHON_VERSION + build_wheel "$TMP_DIR" + repair_wheel "$TMP_DIR" "$TMP_DIR/dist" + cp "$(get_wheel_path "$TMP_DIR/dist")" "$OUTPUT_DIR" fi -# Bundle libraries -mkdir $TMP_DIR/dist -auditwheel repair --exclude 'libcuda*' --exclude 'libcufile*' --exclude 'libssl*' --exclude 'libcrypto*' --exclude 'libefa*' --exclude 'libhwloc*' --exclude 'libfabric*' --exclude 'libtorch*' --exclude 'libc10*' --exclude 'libdoca*' $TMP_DIR/nixl*.whl --plat $WHL_PLATFORM --wheel-dir $TMP_DIR/dist -./contrib/wheel_add_ucx_plugins.py --ucx-plugins-dir $UCX_PLUGINS_DIR --nixl-plugins-dir $NIXL_PLUGINS_DIR $TMP_DIR/dist/*.whl -cp $TMP_DIR/dist/*.whl $OUTPUT_DIR - # Clean up rm -rf "$TMP_DIR" diff --git a/contrib/wheel_merge.py b/contrib/wheel_merge.py new file mode 100755 index 0000000000..f858f8899e --- /dev/null +++ b/contrib/wheel_merge.py @@ -0,0 +1,157 @@ +#!/usr/bin/env python3 + +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Merge file(s) from one wheel into another, regenerating RECORD.""" + +from __future__ import annotations + +import argparse +import base64 +import csv +import fnmatch +import hashlib +import io +import os +import sys +import zipfile + + +def _sha256_b64(data: bytes) -> str: + digest = hashlib.sha256(data).digest() + return base64.urlsafe_b64encode(digest).rstrip(b"=").decode("ascii") + + +def _record_bytes(entries: list[tuple[str, bytes]], record_path: str) -> bytes: + rows = [ + [name, f"sha256={_sha256_b64(data)}", str(len(data))] + for name, data in entries + if name != record_path + ] + rows.append([record_path, "", ""]) + buf = io.StringIO() + csv.writer(buf).writerows(rows) + return buf.getvalue().encode("utf-8") + + +def merge( + base_wheel: str, + source_wheel: str, + pattern: str, + target_dir: str, +) -> list[str]: + """Merge files matching `pattern` from `source_wheel` into `base_wheel`. + + `pattern` is a fnmatch glob applied to the basename of each entry in + the source wheel; matches are placed under `target_dir/` inside the + base wheel. `base_wheel` is rewritten atomically with a regenerated + RECORD. Returns the list of merged entry names (relative to the + wheel root). + """ + target_dir = target_dir.rstrip("/") + + # Pull matching files out of the source wheel, rewriting their path so + # they land under target_dir/. + merged: dict[str, tuple[zipfile.ZipInfo, bytes]] = {} + with zipfile.ZipFile(source_wheel, "r") as zsrc: + for info in zsrc.infolist(): + name = os.path.basename(info.filename) + if not fnmatch.fnmatch(name, pattern): + continue + new_name = f"{target_dir}/{name}" + new_info = zipfile.ZipInfo(filename=new_name) + new_info.compress_type = info.compress_type + new_info.external_attr = info.external_attr + new_info.date_time = info.date_time + merged[new_name] = (new_info, zsrc.read(info)) + + if not merged: + raise SystemExit(f"no files matched {pattern!r} in {source_wheel}") + + # Read base wheel; pull out RECORD path so we can regenerate it. + by_name: dict[str, tuple[zipfile.ZipInfo, bytes]] = {} + record_path: str | None = None + with zipfile.ZipFile(base_wheel, "r") as zin: + for info in zin.infolist(): + if info.filename.endswith(".dist-info/RECORD"): + record_path = info.filename + continue + by_name[info.filename] = (info, zin.read(info)) + if record_path is None: + raise SystemExit(f"no .dist-info/RECORD found in {base_wheel}") + + # Apply merge (source overrides base if a name collides). + by_name.update(merged) + + # Sort for stable output; nice for diffing two wheels. + ordered = sorted(by_name) + record = _record_bytes([(n, by_name[n][1]) for n in ordered], record_path) + + record_info = zipfile.ZipInfo(filename=record_path) + record_info.compress_type = zipfile.ZIP_DEFLATED + + tmp_path = f"{base_wheel}.tmp" + with zipfile.ZipFile( + tmp_path, "w", compression=zipfile.ZIP_DEFLATED, compresslevel=9 + ) as zout: + for name in ordered: + info, data = by_name[name] + zout.writestr(info, data) + zout.writestr(record_info, record) + os.replace(tmp_path, base_wheel) + + return sorted(merged) + + +def main() -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--base-wheel", + required=True, + help="wheel to merge into (rewritten in place)", + ) + parser.add_argument( + "--source-wheel", + required=True, + help="wheel to extract files from", + ) + parser.add_argument( + "--pattern", + required=True, + help="basename glob of files to merge (e.g. 'nixl_ep_cpp_torch212.*')", + ) + parser.add_argument( + "--target-dir", + required=True, + help="directory inside the base wheel for the merged files " + "(e.g. 'nixl_ep_cu13')", + ) + args = parser.parse_args() + + merged = merge( + base_wheel=args.base_wheel, + source_wheel=args.source_wheel, + pattern=args.pattern, + target_dir=args.target_dir, + ) + print(f"merged {len(merged)} file(s) into {args.base_wheel}:") + for name in merged: + print(f" {name}") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/examples/device/ep/meson.build b/examples/device/ep/meson.build index a9ba19fb7a..848886e40b 100644 --- a/examples/device/ep/meson.build +++ b/examples/device/ep/meson.build @@ -78,9 +78,19 @@ nixl_ep_inc_dirs = [ torch_inc_dirs, ] +# Torch-versioned .so name so multiple torch ABIs coexist in a single +# wheel. Defined up here because TORCH_EXTENSION_NAME (consumed by +# pybind11's PYBIND11_MODULE macro to generate the PyInit_ symbol) +# must match the .so filename Python loads, otherwise import fails with +# "dynamic module does not define module export function". +torch_ver = run_command(py, '-c', + 'import torch; print("".join(torch.__version__.split(".")[:2]))', + check: true).stdout().strip() +nixl_ep_ext_name = 'nixl_ep_cpp_torch' + torch_ver + nixl_ep_cpp_args = [ '-DHAVE_CUDA', - '-DTORCH_EXTENSION_NAME=nixl_ep_cpp', + '-DTORCH_EXTENSION_NAME=' + nixl_ep_ext_name, '-Wno-deprecated-declarations', '-Wno-unused-variable', '-Wno-sign-compare', @@ -90,7 +100,7 @@ nixl_ep_cpp_args = [ nixl_ep_cuda_args = [ '-DHAVE_CUDA', - '-DTORCH_EXTENSION_NAME=nixl_ep_cpp', + '-DTORCH_EXTENSION_NAME=' + nixl_ep_ext_name, '--expt-relaxed-constexpr', # Allow calling constexpr __host__ functions from __device__ functions '-arch=sm_90', # Only compile for sm90 (overrides global -gencode flags) '--ptxas-options=--register-usage-level=10', # Allow more register usage (matches setup.py) @@ -135,9 +145,12 @@ nixl_ep_install_rpath = join_paths(get_option('prefix'), get_option('libdir')) nixl_ep_install_rpath += ':' + join_paths(get_option('prefix'), get_option('libdir'), 'plugins') nixl_ep_install_rpath += ':' + torch_lib_dir -nixl_ep_ext = py.extension_module('nixl_ep_cpp', +# CUDA-versioned install dir so cu12 and cu13 wheels don't collide. +nixl_ep_install_dir = 'nixl_ep_' + cuda_wheel_dir.split('_')[-1] # nixl_ep_cu12 or nixl_ep_cu13 + +nixl_ep_ext = py.extension_module(nixl_ep_ext_name, nixl_ep_sources, - subdir: 'nixl_ep', + subdir: nixl_ep_install_dir, dependencies: [ nixl_dep, pybind_dep, @@ -165,11 +178,12 @@ custom_target('nixl_ep_py_copy', input: nixl_ep_py_files, command: [ 'bash', '-c', - 'cp -r @0@/nixl_ep @1@/ && cp @2@ @1@/nixl_ep/ && touch @3@'.format( + 'mkdir -p @1@/@4@ && cp @0@/nixl_ep/*.py @1@/@4@/ && cp @2@ @1@/@4@/ && touch @3@'.format( meson.current_source_dir(), meson.current_build_dir(), nixl_ep_ext.full_path(), - join_paths(meson.current_build_dir(), 'nixl_ep_py.stamp') + join_paths(meson.current_build_dir(), 'nixl_ep_py.stamp'), + nixl_ep_install_dir, ) ], depends: nixl_ep_ext, @@ -180,6 +194,6 @@ py.install_sources( 'nixl_ep/__init__.py', 'nixl_ep/buffer.py', 'nixl_ep/utils.py', - subdir: 'nixl_ep', + subdir: nixl_ep_install_dir, pure: false, ) diff --git a/examples/device/ep/nixl_ep/__init__.py b/examples/device/ep/nixl_ep/__init__.py index 488ad2fc29..00e941598c 100644 --- a/examples/device/ep/nixl_ep/__init__.py +++ b/examples/device/ep/nixl_ep/__init__.py @@ -18,11 +18,21 @@ # See the License for the specific language governing permissions and # limitations under the License. +import importlib +import sys + import torch -from . import nixl_ep_cpp as _nixl_ep_cpp -from .buffer import Buffer -from .utils import EventOverlap +_torch_mm = "".join(torch.__version__.split(".")[:2]) +_nixl_ep_cpp = importlib.import_module(f".nixl_ep_cpp_torch{_torch_mm}", __package__) +# Alias the torch-versioned extension as `nixl_ep_cpp` so the static +# `from .nixl_ep_cpp import ...` imports in buffer.py / utils.py resolve. +sys.modules[f"{__package__}.nixl_ep_cpp"] = _nixl_ep_cpp + +# The submodules below import names from `nixl_ep_cpp`, so the dynamic +# import above must run first; that's why these aren't at the top. +from .buffer import Buffer # noqa: E402 +from .utils import EventOverlap # noqa: E402 topk_idx_t = getattr(_nixl_ep_cpp, "topk_idx_t", torch.int64) Config = _nixl_ep_cpp.Config diff --git a/pyproject.toml b/pyproject.toml index cd08c35c80..a77d768313 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,6 +34,11 @@ dependencies = ["torch", "numpy"] [tool.mypy] mypy_path = ["src/bindings/python/nixl-meta"] ignore_missing_imports = true +# Two `nixl_ep/__init__.py` files coexist legitimately: the meta-dispatcher +# under `src/bindings/python/nixl-meta/nixl_ep/` and the actual EP source +# under `examples/device/ep/nixl_ep/`. Without explicit_package_bases mypy +# refuses to resolve them and errors with "Duplicate module named nixl_ep". +explicit_package_bases = true [tool.isort] profile = "black" diff --git a/src/bindings/python/nixl-meta/meson.build b/src/bindings/python/nixl-meta/meson.build index 6cb893f7f7..579bbdf1a9 100644 --- a/src/bindings/python/nixl-meta/meson.build +++ b/src/bindings/python/nixl-meta/meson.build @@ -34,16 +34,27 @@ source_root = meson.project_source_root() root_license_path = join_paths(source_root, 'LICENSE') license_path = fs.copyfile(root_license_path) -nixl_sources = files('nixl/__init__.py') - subdir('nixl') +subdir('nixl_ep') uv = find_program('uv', required: false) if uv.found() wheel_name = 'nixl-@0@-py3-none-any.whl'.format(meson.project_version()) + # Inputs intentionally point at the build-dir COPIES (return values of + # fs.copyfile() captured in the subdirs), not the source files. This way + # ninja runs the copies before `uv build` and incremental rebuilds pick + # up edits to the source __init__.py files. meta_wheel = custom_target( 'build_nixl_meta', - input: [pyproject_toml, readme_md, license_path] + nixl_sources, + input: [ + pyproject_toml, + readme_md, + license_path, + nixl_init_copy, + nixl_api_copy, + nixl_logging_copy, + nixl_ep_init_copy, + ], output: [wheel_name], command: [uv, 'build', '--wheel', '--out-dir', build_dir, build_dir], install: false, diff --git a/src/bindings/python/nixl-meta/nixl/meson.build b/src/bindings/python/nixl-meta/nixl/meson.build index d71f55189b..509fcecc10 100644 --- a/src/bindings/python/nixl-meta/nixl/meson.build +++ b/src/bindings/python/nixl-meta/nixl/meson.build @@ -14,6 +14,6 @@ # limitations under the License. fs = import('fs') -fs.copyfile('__init__.py') -fs.copyfile('_api.py') -fs.copyfile('logging.py') +nixl_init_copy = fs.copyfile('__init__.py') +nixl_api_copy = fs.copyfile('_api.py') +nixl_logging_copy = fs.copyfile('logging.py') diff --git a/src/bindings/python/nixl-meta/nixl_ep/__init__.py b/src/bindings/python/nixl-meta/nixl_ep/__init__.py new file mode 100644 index 0000000000..9d5017e95b --- /dev/null +++ b/src/bindings/python/nixl-meta/nixl_ep/__init__.py @@ -0,0 +1,77 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""nixl_ep meta-dispatcher: selects the correct CUDA and torch ABI backend.""" + +import importlib +import sys +from typing import TYPE_CHECKING + + +def _get_torch_cuda_major() -> int | None: + """Return the CUDA major version that torch was built for, or None.""" + from torch.version import cuda as _torch_cuda_ver + + return int(_torch_cuda_ver.split(".")[0]) if _torch_cuda_ver else None + + +def _load_ep_module() -> str: + cuda_major = _get_torch_cuda_major() + if cuda_major is not None: + pip_name = f"nixl-cu{cuda_major}" + mod_name = f"nixl_ep_cu{cuda_major}" + try: + return importlib.import_module(mod_name).__name__ + except ModuleNotFoundError as e: + if e.name != mod_name: + raise + raise ImportError( + f"torch reports CUDA {cuda_major} but {pip_name} is not installed" + ) from e + # CPU-only torch — use whatever backend is installed + for mod_name in ("nixl_ep_cu13", "nixl_ep_cu12"): + try: + return importlib.import_module(mod_name).__name__ + except ModuleNotFoundError as e: + if e.name != mod_name: + raise + continue + raise ImportError("No nixl_ep CUDA backend found") + + +_pkg = sys.modules[_load_ep_module()] + +submodules = ["buffer", "utils"] +for sub_name in submodules: + # Import submodule from actual wheel + module = importlib.import_module(f"{_pkg.__name__}.{sub_name}") + # Make it accessible as nixl_ep.buffer, nixl_ep.utils + sys.modules[f"nixl_ep.{sub_name}"] = module + # Also add the submodule itself to the nixl_ep namespace + setattr(sys.modules[__name__], sub_name, module) + + # Expose all public symbols from the submodule under the nixl_ep namespace + for attr in dir(module): + if not attr.startswith("_"): + setattr(sys.modules[__name__], attr, getattr(module, attr)) + +# Expose public symbols from the backend __init__ (Config, topk_idx_t, etc.) +for attr in dir(_pkg): + if not attr.startswith("_"): + setattr(sys.modules[__name__], attr, getattr(_pkg, attr)) + +if TYPE_CHECKING: + from nixl_ep.buffer import Buffer # noqa: F401 + from nixl_ep.utils import EventOverlap # noqa: F401 diff --git a/src/bindings/python/nixl-meta/nixl_ep/meson.build b/src/bindings/python/nixl-meta/nixl_ep/meson.build new file mode 100644 index 0000000000..320b68a24f --- /dev/null +++ b/src/bindings/python/nixl-meta/nixl_ep/meson.build @@ -0,0 +1,17 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +fs = import('fs') +nixl_ep_init_copy = fs.copyfile('__init__.py') diff --git a/src/bindings/python/nixl-meta/pyproject.toml.in b/src/bindings/python/nixl-meta/pyproject.toml.in index b9a957510f..23a95a4994 100644 --- a/src/bindings/python/nixl-meta/pyproject.toml.in +++ b/src/bindings/python/nixl-meta/pyproject.toml.in @@ -36,4 +36,4 @@ cu12 = ["nixl-cu12==@VERSION@"] cu13 = ["nixl-cu13==@VERSION@"] [tool.setuptools] -packages = ["nixl"] +packages = ["nixl", "nixl_ep"]