From 13825b5e34d5554fdcc9c77b3832eda31a149301 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Mon, 9 Mar 2026 09:14:25 -0500 Subject: [PATCH 01/43] ensure 'torch' CUDA wheels are installed in CI --- .gitignore | 2 + ci/download-torch-wheels.sh | 40 +++++ ci/test_wheel_pylibwholegraph.sh | 4 +- .../all_cuda-129_arch-aarch64.yaml | 2 + .../all_cuda-129_arch-x86_64.yaml | 2 + .../all_cuda-131_arch-aarch64.yaml | 2 + .../all_cuda-131_arch-x86_64.yaml | 2 + conda/recipes/cugraph-pyg/recipe.yaml | 2 +- dependencies.yaml | 146 +++++++++++------- python/cugraph-pyg/pyproject.toml | 2 +- python/pylibwholegraph/pyproject.toml | 1 + 11 files changed, 147 insertions(+), 58 deletions(-) create mode 100755 ci/download-torch-wheels.sh diff --git a/.gitignore b/.gitignore index 1ccc2780..8d4f88e2 100644 --- a/.gitignore +++ b/.gitignore @@ -40,6 +40,8 @@ wheels/ wheelhouse/ _skbuild/ cufile.log +*.tar.gz +*.whl ## Patching *.diff diff --git a/ci/download-torch-wheels.sh b/ci/download-torch-wheels.sh new file mode 100755 index 00000000..7313a006 --- /dev/null +++ b/ci/download-torch-wheels.sh @@ -0,0 +1,40 @@ +#!/bin/bash +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# [description] +# +# Downloads a CUDA variant of 'torch' from the correct index, based on CUDA major version. +# +# This exists to avoid using 'pip --extra-index-url', which has these undesirable properties: +# +# - allows for CPU-only 'torch' to be downloaded from pypi.org +# - allows for other non-torch packages like 'numpy' to be downloaded from the PyTorch indices +# - increases solve complexity for 'pip' +# + +set -e -u -o pipefail + +TORCH_WHEEL_DIR="${1}" + +# Ensure CUDA-enabled 'torch' packages are always used. +# +# Downloading + passing the downloaded file as a requirement forces the use of this +# package and ensures 'pip' considers all of its requirements. +# +# Not appending this to PIP_CONSTRAINT, because we don't want the torch '--extra-index-url' +# to leak outside of this script into other 'pip {download,install}'' calls. +rapids-dependency-file-generator \ + --output requirements \ + --file-key "torch_only" \ + --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};dependencies=${RAPIDS_DEPENDENCIES};require_gpu_pytorch=true" \ +| tee ./torch-constraints.txt + +rapids-pip-retry download \ + --isolated \ + --prefer-binary \ + --no-deps \ + -d "${TORCH_WHEEL_DIR}" \ + --constraint "${PIP_CONSTRAINT}" \ + --constraint ./torch-constraints.txt \ + 'torch' diff --git a/ci/test_wheel_pylibwholegraph.sh b/ci/test_wheel_pylibwholegraph.sh index 252c6cfa..ac065e68 100755 --- a/ci/test_wheel_pylibwholegraph.sh +++ b/ci/test_wheel_pylibwholegraph.sh @@ -2,9 +2,7 @@ # SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 -set -e # abort the script on error -set -o pipefail # piped commands propagate their error -set -E # ERR traps are inherited by subcommands +set -euo pipefail # Delete system libnccl.so to ensure the wheel is used. # (but only do this in CI, to avoid breaking local dev environments) diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml index efb98a8c..36a21955 100644 --- a/conda/environments/all_cuda-129_arch-aarch64.yaml +++ b/conda/environments/all_cuda-129_arch-aarch64.yaml @@ -27,6 +27,7 @@ dependencies: - nccl>=2.19 - ninja - numpy>=1.23,<3.0 +- ogb - packaging - pandas - pre-commit @@ -42,6 +43,7 @@ dependencies: - rmm==26.4.*,>=0.0.0a0 - scikit-build-core>=0.11.0 - scipy +- sentence-transformers - setuptools>=77.0.0 - torchdata - wheel diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml index cc7de24e..1085bba4 100644 --- a/conda/environments/all_cuda-129_arch-x86_64.yaml +++ b/conda/environments/all_cuda-129_arch-x86_64.yaml @@ -27,6 +27,7 @@ dependencies: - nccl>=2.19 - ninja - numpy>=1.23,<3.0 +- ogb - packaging - pandas - pre-commit @@ -42,6 +43,7 @@ dependencies: - rmm==26.4.*,>=0.0.0a0 - scikit-build-core>=0.11.0 - scipy +- sentence-transformers - setuptools>=77.0.0 - torchdata - wheel diff --git a/conda/environments/all_cuda-131_arch-aarch64.yaml b/conda/environments/all_cuda-131_arch-aarch64.yaml index 65502230..0ebe16a4 100644 --- a/conda/environments/all_cuda-131_arch-aarch64.yaml +++ b/conda/environments/all_cuda-131_arch-aarch64.yaml @@ -27,6 +27,7 @@ dependencies: - nccl>=2.19 - ninja - numpy>=1.23,<3.0 +- ogb - packaging - pandas - pre-commit @@ -42,6 +43,7 @@ dependencies: - rmm==26.4.*,>=0.0.0a0 - scikit-build-core>=0.11.0 - scipy +- sentence-transformers - setuptools>=77.0.0 - torchdata - wheel diff --git a/conda/environments/all_cuda-131_arch-x86_64.yaml b/conda/environments/all_cuda-131_arch-x86_64.yaml index f8bf95a2..58f1fe21 100644 --- a/conda/environments/all_cuda-131_arch-x86_64.yaml +++ b/conda/environments/all_cuda-131_arch-x86_64.yaml @@ -27,6 +27,7 @@ dependencies: - nccl>=2.19 - ninja - numpy>=1.23,<3.0 +- ogb - packaging - pandas - pre-commit @@ -42,6 +43,7 @@ dependencies: - rmm==26.4.*,>=0.0.0a0 - scikit-build-core>=0.11.0 - scipy +- sentence-transformers - setuptools>=77.0.0 - torchdata - wheel diff --git a/conda/recipes/cugraph-pyg/recipe.yaml b/conda/recipes/cugraph-pyg/recipe.yaml index 3243a6c8..ea70f7b9 100644 --- a/conda/recipes/cugraph-pyg/recipe.yaml +++ b/conda/recipes/cugraph-pyg/recipe.yaml @@ -40,7 +40,7 @@ requirements: # This is intentionally spelled 'pytorch' (not 'pytorch-gpu' and not using build string selectors) # because we want it to be possible to at least install `cugraph-pyg` in an environment without a GPU, # to support use cases like building container images. - - pytorch >=2.3 + - pytorch >=2.6 - pytorch_geometric >=2.5,<2.8 tests: diff --git a/dependencies.yaml b/dependencies.yaml index 1f10f263..1848ed1b 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -31,6 +31,7 @@ files: - rapids_build_skbuild - test_cpp - test_python_common + - test_python_cugraph_pyg - test_python_pylibwholegraph checks: output: none @@ -60,10 +61,9 @@ files: - depends_on_cudf - depends_on_pytorch - depends_on_cuml - - depends_on_ogb - - depends_on_sentence_transformers - py_version - test_python_common + - test_pythong_cugraph_pyg - depends_on_pylibwholegraph - depends_on_cugraph_pyg test_pylibwholegraph: @@ -76,6 +76,10 @@ files: - test_python_common - depends_on_pylibwholegraph - test_python_pylibwholegraph + torch_only: + output: none + includes: + - depends_on_pytorch py_build_libwholegraph: output: pyproject pyproject_dir: python/libwholegraph @@ -165,9 +169,8 @@ files: - depends_on_pytorch - depends_on_cuml - depends_on_cugraph - - depends_on_ogb - - depends_on_sentence_transformers - test_python_common + - test_python_cugraph_pyg cugraph_pyg_dev: matrix: cuda: ["12.9", "13.1"] @@ -298,7 +301,7 @@ dependencies: - output_types: [conda, pyproject, requirements] packages: - *numpy - - packaging + - &packaging packaging - pandas rapids_build_skbuild: common: @@ -331,104 +334,141 @@ dependencies: - pytest-benchmark - pytest-cov - pytest-xdist + specific: + - output_types: [conda] + matrices: + - matrix: + no_pytorch: "true" + packages: + - matrix: + packages: + - torchdata + - pydantic + test_python_cugraph_pyg: + specific: + - output_types: [conda, requirements, pyproject] + matrices: + - matrix: + no_pytorch: "true" + packages: + - matrix: + packages: + - ogb + - sentence-transformers test_python_pylibwholegraph: common: - output_types: [conda, pyproject, requirements] packages: + - *packaging - pytest-forked - scipy depends_on_pytorch: - common: - - output_types: [conda] - packages: - - torchdata - - pydantic specific: - - output_types: [requirements] + # conda: choose between GPU and CPU-only pytorch + - output_types: conda matrices: - matrix: no_pytorch: "true" packages: - matrix: - cuda: "12.*" + require_gpu_pytorch: "true" packages: - - --extra-index-url=https://download.pytorch.org/whl/cu126 - - matrix: - cuda: "13.*" - packages: - - --extra-index-url=https://download.pytorch.org/whl/cu130 + - pytorch-gpu>=2.6 + # Default to falling back to whatever 'pytorch' is pulled in via cugraph-pyg's dependencies. - matrix: packages: - - output_types: [requirements, pyproject] + # conda: optionally pins to older versions based on 'dependencies' key + - output_types: conda matrices: - matrix: no_pytorch: "true" packages: - matrix: cuda: "12.*" + dependencies: "oldest" packages: - - torch>=2.3 + - pytorch==2.6 - matrix: cuda: "13.*" + dependencies: "oldest" packages: - - &pytorch_pip torch>=2.9.0 + - pytorch==2.9 + # Default to falling back to whatever 'pytorch' is pulled in via cugraph-pyg's dependencies. - matrix: packages: - - *pytorch_pip - - output_types: [conda] + - output_types: pyproject matrices: - # Prevent fallback to CPU-only pytorch when we want a CUDA variant. + # avoid pulling in 'torch' in places like DLFW builds that prefer to install it other ways - matrix: - require_gpu: "true" + no_pytorch: "true" packages: - - pytorch-gpu - # Default to falling back to whatever 'pytorch' is pulled in via cugraph-pyg's dependencies. - matrix: packages: - depends_on_nccl: - common: - - output_types: conda - packages: - - nccl>=2.19 - specific: - - output_types: [pyproject, requirements] + - &pytorch_pip torch>=2.6 + # wheels: handle GPU vs. CPU and version pinning together + # + # The 'pytorch.org' indices referenced in --extra-index-url below host CPU-only variants too, + # so requirements like '>=' are not safe. + # + # Using '==' and a version with the CUDA specifier like '+cu130' is the most reliable way to ensure + # the packages we want are pulled (at the expense of needing to maintain this list). + # + # 'torch' tightly pins wheels to a single {major}.{minor} CTK version. + # + # This list only contains entries exactly matching CUDA {major}.{minor} that we test in RAPIDS CI, + # to ensure a loud error alerts us to the need to update this list (or CI scripts) when new + # CTKs are added to the support matrix. + - output_types: requirements matrices: + # avoid pulling in 'torch' in places like DLFW builds that prefer to install it other ways - matrix: - cuda: "12.*" - cuda_suffixed: "true" + no_pytorch: "true" packages: - - nvidia-nccl-cu12>=2.19 + # matrices below ensure CUDA 'torch' packages are used - matrix: + cuda: "12.9" + dependencies: "oldest" + require_gpu_pytorch: "true" packages: - depends_on_ogb: - common: - - output_types: [conda] - packages: - - ogb - specific: - - output_types: [requirements, pyproject] - matrices: + - &torch_cu129_index --extra-index-url=https://download.pytorch.org/whl/cu129 + - torch==2.8.0+cu129 - matrix: - no_pytorch: "true" + cuda: "12.9" + require_gpu_pytorch: "true" packages: + - *torch_cu129_index + - torch==2.10.0+cu129 - matrix: + cuda: "13.0" + dependencies: "oldest" + require_gpu_pytorch: "true" packages: - - ogb - # for MovieLens example - depends_on_sentence_transformers: + - &torch_index_cu13 --extra-index-url=https://download.pytorch.org/whl/cu130 + - torch==2.8.0+cu130 + - matrix: + cuda: "13.0" + require_gpu_pytorch: "true" + packages: + - *torch_index_cu13 + - torch==2.10.0+cu130 + - matrix: + packages: + - *pytorch_pip + depends_on_nccl: common: - - output_types: [conda] + - output_types: conda packages: - - sentence-transformers + - nccl>=2.19 specific: - - output_types: [requirements, pyproject] + - output_types: [pyproject, requirements] matrices: - matrix: - no_pytorch: "true" + cuda: "12.*" + cuda_suffixed: "true" packages: + - nvidia-nccl-cu12>=2.19 - matrix: packages: - - sentence-transformers depends_on_pyg: common: - output_types: [conda] diff --git a/python/cugraph-pyg/pyproject.toml b/python/cugraph-pyg/pyproject.toml index 1013f7af..39ef7d2d 100644 --- a/python/cugraph-pyg/pyproject.toml +++ b/python/cugraph-pyg/pyproject.toml @@ -55,7 +55,7 @@ test = [ "pytest-cov", "pytest-xdist", "sentence-transformers", - "torch>=2.9.0", + "torch>=2.6", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. [tool.setuptools.dynamic] diff --git a/python/pylibwholegraph/pyproject.toml b/python/pylibwholegraph/pyproject.toml index dd4dda49..4682c260 100644 --- a/python/pylibwholegraph/pyproject.toml +++ b/python/pylibwholegraph/pyproject.toml @@ -33,6 +33,7 @@ dependencies = [ [project.optional-dependencies] test = [ + "packaging", "pytest", "pytest-benchmark", "pytest-cov", From 698f1152e62105cee0aca0b53738343f7d21700b Mon Sep 17 00:00:00 2001 From: James Lamb Date: Mon, 9 Mar 2026 09:22:23 -0500 Subject: [PATCH 02/43] help git understand the diff --- dependencies.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dependencies.yaml b/dependencies.yaml index 1848ed1b..8a2d6f52 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -456,11 +456,11 @@ dependencies: - *pytorch_pip depends_on_nccl: common: - - output_types: conda + - output_types: [conda] packages: - nccl>=2.19 specific: - - output_types: [pyproject, requirements] + - output_types: [requirements, pyproject] matrices: - matrix: cuda: "12.*" From 1c457b8f7970466922423c8ac549740eb55ae6e6 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Mon, 9 Mar 2026 10:24:08 -0500 Subject: [PATCH 03/43] use rapids-generate-pip-constraints, fix typo --- ci/test_wheel_cugraph-pyg.sh | 18 +++++++++--------- ci/test_wheel_pylibwholegraph.sh | 18 ++++++++++-------- dependencies.yaml | 2 +- 3 files changed, 20 insertions(+), 18 deletions(-) diff --git a/ci/test_wheel_cugraph-pyg.sh b/ci/test_wheel_cugraph-pyg.sh index c05e18af..15a41602 100755 --- a/ci/test_wheel_cugraph-pyg.sh +++ b/ci/test_wheel_cugraph-pyg.sh @@ -15,13 +15,12 @@ LIBWHOLEGRAPH_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_ PYLIBWHOLEGRAPH_WHEELHOUSE=$(rapids-download-from-github "$(rapids-package-name "wheel_python" pylibwholegraph --stable --cuda "$RAPIDS_CUDA_VERSION")") CUGRAPH_PYG_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-github python) -CUDA_MAJOR="${RAPIDS_CUDA_VERSION%%.*}" +# generate constraints (possibly pinning to oldest support versions of dependencies) +rapids-generate-pip-constraints test_cugraph_pyg "${PIP_CONSTRAINT}" -if [[ "${CUDA_MAJOR}" == "12" ]]; then - PYTORCH_INDEX="https://download.pytorch.org/whl/cu126" -else - PYTORCH_INDEX="https://download.pytorch.org/whl/cu130" -fi +# ensure a CUDA variant of 'torch' is used +TORCH_WHEEL_DIR="$(mktemp -d)" +./ci/download-torch-wheels.sh "${TORCH_WHEEL_DIR}" # notes: # @@ -30,12 +29,13 @@ fi # its dependencies are available from pypi.org # rapids-pip-retry install \ - -v \ - --extra-index-url "${PYTORCH_INDEX}" \ + --prefer-binary \ + --constraint "${PIP_CONSTRAINT}" \ --extra-index-url 'https://pypi.nvidia.com' \ "${LIBWHOLEGRAPH_WHEELHOUSE}"/*.whl \ "$(echo "${PYLIBWHOLEGRAPH_WHEELHOUSE}"/pylibwholegraph_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)" \ - "$(echo "${CUGRAPH_PYG_WHEELHOUSE}"/cugraph_pyg_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)[test]" + "$(echo "${CUGRAPH_PYG_WHEELHOUSE}"/cugraph_pyg_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)[test]" \ + "${TORCH_WHEEL_DIR}"/torch-*.whl # RAPIDS_DATASET_ROOT_DIR is used by test scripts export RAPIDS_DATASET_ROOT_DIR="$(realpath datasets)" diff --git a/ci/test_wheel_pylibwholegraph.sh b/ci/test_wheel_pylibwholegraph.sh index ac065e68..33a857db 100755 --- a/ci/test_wheel_pylibwholegraph.sh +++ b/ci/test_wheel_pylibwholegraph.sh @@ -16,23 +16,25 @@ RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" LIBWHOLEGRAPH_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github cpp) PYLIBWHOLEGRAPH_WHEELHOUSE=$(rapids-download-from-github "$(rapids-package-name "wheel_python" pylibwholegraph --stable --cuda "$RAPIDS_CUDA_VERSION")") -# determine pytorch source -if [[ "${CUDA_MAJOR}" == "12" ]]; then - PYTORCH_INDEX="https://download.pytorch.org/whl/cu126" -else - PYTORCH_INDEX="https://download.pytorch.org/whl/cu130" -fi RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"} RAPIDS_COVERAGE_DIR=${RAPIDS_COVERAGE_DIR:-"${PWD}/coverage-results"} mkdir -p "${RAPIDS_TESTS_DIR}" "${RAPIDS_COVERAGE_DIR}" +# generate constraints (possibly pinning to oldest support versions of dependencies) +rapids-generate-pip-constraints test_pylibwholegraph "${PIP_CONSTRAINT}" + +# ensure a CUDA variant of 'torch' is used +TORCH_WHEEL_DIR="$(mktemp -d)" +./ci/download-torch-wheels.sh "${TORCH_WHEEL_DIR}" + # echo to expand wildcard before adding `[extra]` requires for pip rapids-logger "Installing Packages" rapids-pip-retry install \ - --extra-index-url ${PYTORCH_INDEX} \ + --prefer-binary \ + --constraint "${PIP_CONSTRAINT}" \ "$(echo "${PYLIBWHOLEGRAPH_WHEELHOUSE}"/pylibwholegraph*.whl)[test]" \ "${LIBWHOLEGRAPH_WHEELHOUSE}"/*.whl \ - 'torch>=2.3' + "${TORCH_WHEEL_DIR}"/torch-*.whl rapids-logger "pytest pylibwholegraph" cd python/pylibwholegraph/pylibwholegraph/tests diff --git a/dependencies.yaml b/dependencies.yaml index 8a2d6f52..568f4a4e 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -63,7 +63,7 @@ files: - depends_on_cuml - py_version - test_python_common - - test_pythong_cugraph_pyg + - test_python_cugraph_pyg - depends_on_pylibwholegraph - depends_on_cugraph_pyg test_pylibwholegraph: From 066d5c4787889ca6707c7eb107886f92431d05d0 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Mon, 9 Mar 2026 11:41:49 -0500 Subject: [PATCH 04/43] handle the fallback case better, other fixes --- ci/download-torch-wheels.sh | 12 ++++++++++++ ci/test_python.sh | 4 ++-- ci/test_wheel_cugraph-pyg.sh | 27 +++++++++++++++++++-------- ci/test_wheel_pylibwholegraph.sh | 23 +++++++++++++++++------ ci/validate_wheel.sh | 22 ++++++++++++++++++++++ dependencies.yaml | 11 +++++++---- python/cugraph-pyg/pyproject.toml | 1 - 7 files changed, 79 insertions(+), 21 deletions(-) diff --git a/ci/download-torch-wheels.sh b/ci/download-torch-wheels.sh index 7313a006..d2ecde66 100755 --- a/ci/download-torch-wheels.sh +++ b/ci/download-torch-wheels.sh @@ -17,6 +17,18 @@ set -e -u -o pipefail TORCH_WHEEL_DIR="${1}" +# skip download attempt on CUDA versions where we know there isn't a 'torch' CUDA wheel. +CUDA_MAJOR=$(echo "${RAPIDS_CUDA_VERSION}" | cut -d'.' -f1) +CUDA_MINOR=$(echo "${RAPIDS_CUDA_VERSION}" | cut -d'.' -f2) +if \ + { [ "${CUDA_MAJOR}" -eq 12 ] && [ "${CUDA_MINOR}" -lt 9 ]; } \ + || { [ "${CUDA_MAJOR}" -eq 13 ] && [ "${CUDA_MINOR}" -gt 0 ]; }; \ + || [ "${CUDA_MAJOR}" -gt 13 ]; +then + rapids-logger "Skipping 'torch' wheel download. (requires CUDA 12.9+ or 13.0, found ${RAPIDS_CUDA_VERSION})" + exit 0 +fi + # Ensure CUDA-enabled 'torch' packages are always used. # # Downloading + passing the downloaded file as a requirement forces the use of this diff --git a/ci/test_python.sh b/ci/test_python.sh index 09aeb27e..6b661236 100755 --- a/ci/test_python.sh +++ b/ci/test_python.sh @@ -37,7 +37,7 @@ if [[ "${RUNNER_ARCH}" != "ARM64" ]]; then rapids-dependency-file-generator \ --output conda \ --file-key test_cugraph_pyg \ - --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};require_gpu=true" \ + --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};require_gpu_pytorch=true" \ --prepend-channel "${CPP_CHANNEL}" \ --prepend-channel "${PYTHON_CHANNEL}" \ --prepend-channel "${PYTHON_NOARCH_CHANNEL}" \ @@ -76,7 +76,7 @@ if [[ "${RUNNER_ARCH}" != "ARM64" ]]; then rapids-dependency-file-generator \ --output conda \ --file-key test_pylibwholegraph \ - --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};require_gpu=true" \ + --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};require_gpu_pytorch=true" \ --prepend-channel "${CPP_CHANNEL}" \ --prepend-channel "${PYTHON_CHANNEL}" \ | tee env.yaml diff --git a/ci/test_wheel_cugraph-pyg.sh b/ci/test_wheel_cugraph-pyg.sh index 15a41602..60c03dc0 100755 --- a/ci/test_wheel_cugraph-pyg.sh +++ b/ci/test_wheel_cugraph-pyg.sh @@ -18,10 +18,27 @@ CUGRAPH_PYG_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_ # generate constraints (possibly pinning to oldest support versions of dependencies) rapids-generate-pip-constraints test_cugraph_pyg "${PIP_CONSTRAINT}" -# ensure a CUDA variant of 'torch' is used +PIP_INSTALL_ARGS=( + --prefer-binary + --constraint "${PIP_CONSTRAINT}" + --extra-index-url 'https://pypi.nvidia.com' + "${LIBWHOLEGRAPH_WHEELHOUSE}"/*.whl + "$(echo "${PYLIBWHOLEGRAPH_WHEELHOUSE}"/pylibwholegraph_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)" + "$(echo "${CUGRAPH_PYG_WHEELHOUSE}"/cugraph_pyg_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)[test]" +) + +# ensure a CUDA variant of 'torch' is used (if one is available) TORCH_WHEEL_DIR="$(mktemp -d)" ./ci/download-torch-wheels.sh "${TORCH_WHEEL_DIR}" +# 'cugraph-pyg' is still expected to be importable +# and testable in an environment where 'torch' isn't installed. +if [ -z "$(ls -A ${TORCH_WHEEL_DIR} 2>/dev/null)" ]; then + rapids-echo-stderr "No 'torch' wheels downloaded." +else + PIP_INSTALL_ARGS+=("${TORCH_WHEEL_DIR}"/torch-*.whl) +fi + # notes: # # * echo to expand wildcard before adding `[extra]` requires for pip @@ -29,13 +46,7 @@ TORCH_WHEEL_DIR="$(mktemp -d)" # its dependencies are available from pypi.org # rapids-pip-retry install \ - --prefer-binary \ - --constraint "${PIP_CONSTRAINT}" \ - --extra-index-url 'https://pypi.nvidia.com' \ - "${LIBWHOLEGRAPH_WHEELHOUSE}"/*.whl \ - "$(echo "${PYLIBWHOLEGRAPH_WHEELHOUSE}"/pylibwholegraph_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)" \ - "$(echo "${CUGRAPH_PYG_WHEELHOUSE}"/cugraph_pyg_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)[test]" \ - "${TORCH_WHEEL_DIR}"/torch-*.whl + "${PIP_INSTALL_ARGS[@]}" # RAPIDS_DATASET_ROOT_DIR is used by test scripts export RAPIDS_DATASET_ROOT_DIR="$(realpath datasets)" diff --git a/ci/test_wheel_pylibwholegraph.sh b/ci/test_wheel_pylibwholegraph.sh index 33a857db..e7b06090 100755 --- a/ci/test_wheel_pylibwholegraph.sh +++ b/ci/test_wheel_pylibwholegraph.sh @@ -23,18 +23,29 @@ mkdir -p "${RAPIDS_TESTS_DIR}" "${RAPIDS_COVERAGE_DIR}" # generate constraints (possibly pinning to oldest support versions of dependencies) rapids-generate-pip-constraints test_pylibwholegraph "${PIP_CONSTRAINT}" -# ensure a CUDA variant of 'torch' is used +PIP_INSTALL_ARGS=( + --prefer-binary + --constraint "${PIP_CONSTRAINT}" + "$(echo "${PYLIBWHOLEGRAPH_WHEELHOUSE}"/pylibwholegraph*.whl)[test]" + "${LIBWHOLEGRAPH_WHEELHOUSE}"/*.whl +) + +# ensure a CUDA variant of 'torch' is used (if one is available) TORCH_WHEEL_DIR="$(mktemp -d)" ./ci/download-torch-wheels.sh "${TORCH_WHEEL_DIR}" +# 'cugraph-pyg' is still expected to be importable +# and testable in an environment where 'torch' isn't installed. +if [ -z "$(ls -A ${TORCH_WHEEL_DIR} 2>/dev/null)" ]; then + rapids-echo-stderr "No 'torch' wheels downloaded." +else + PIP_INSTALL_ARGS+=("${TORCH_WHEEL_DIR}"/torch-*.whl) +fi + # echo to expand wildcard before adding `[extra]` requires for pip rapids-logger "Installing Packages" rapids-pip-retry install \ - --prefer-binary \ - --constraint "${PIP_CONSTRAINT}" \ - "$(echo "${PYLIBWHOLEGRAPH_WHEELHOUSE}"/pylibwholegraph*.whl)[test]" \ - "${LIBWHOLEGRAPH_WHEELHOUSE}"/*.whl \ - "${TORCH_WHEEL_DIR}"/torch-*.whl + "${PIP_INSTALL_ARGS[@]}" rapids-logger "pytest pylibwholegraph" cd python/pylibwholegraph/pylibwholegraph/tests diff --git a/ci/validate_wheel.sh b/ci/validate_wheel.sh index 42d0a8bf..944e8f76 100755 --- a/ci/validate_wheel.sh +++ b/ci/validate_wheel.sh @@ -9,6 +9,10 @@ wheel_dir_relative_path=$2 RAPIDS_CUDA_MAJOR="${RAPIDS_CUDA_VERSION%%.*}" +python -m pip install \ + --prefer-binary \ + 'pkginfo>=1.12.1.2' + cd "${package_dir}" rapids-logger "validate packages with 'pydistcheck'" @@ -43,3 +47,21 @@ rapids-logger "validate packages with 'twine'" twine check \ --strict \ "$(echo ${wheel_dir_relative_path}/*.whl)" + +rapids-logger "validating that the wheel doesn't depend on 'torch' (even in an extra)" +WHEEL_FILE="$(${wheel_dir_relative_path}/*.whl)" + +# NOTE: group of specifiers after 'torch' to avoid a false positive like 'torch-geometric' +unzip -p "${WHEEL_FILE}" '*.dist-info/METADATA' \ +| grep -E '^Requires-Dist:.*torch[><=!~ ]+.*' \ +| tee matches.txt + +if wc -l < ./matches.txt; then + echo -n "Wheel '${WHEEL_FILE}' appears to depend on 'torch'. Remove that dependency. " + echo -n "We prefer to not declare a 'torch' dependency and allow it to be managed separately, " + echo "to ensure tight control over the variants installed (including for DLFW builds)." + exit 1 +else + echo "No dependency on 'torch' found" + exit 0 +fi diff --git a/dependencies.yaml b/dependencies.yaml index 568f4a4e..03ed13d3 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -166,7 +166,6 @@ files: table: project.optional-dependencies key: test includes: - - depends_on_pytorch - depends_on_cuml - depends_on_cugraph - test_python_common @@ -451,9 +450,13 @@ dependencies: packages: - *torch_index_cu13 - torch==2.10.0+cu130 - - matrix: - packages: - - *pytorch_pip + # + # (empty) + # + # Intentionally no fallback entry, to ensure a loud error alerts us to the need + # to update this list and/or CI scripts when new CUDA versions, Python versions, + # etc. are added to the RAPIDS support matrix. + # depends_on_nccl: common: - output_types: [conda] diff --git a/python/cugraph-pyg/pyproject.toml b/python/cugraph-pyg/pyproject.toml index 39ef7d2d..10c06e77 100644 --- a/python/cugraph-pyg/pyproject.toml +++ b/python/cugraph-pyg/pyproject.toml @@ -55,7 +55,6 @@ test = [ "pytest-cov", "pytest-xdist", "sentence-transformers", - "torch>=2.6", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. [tool.setuptools.dynamic] From 6f73e44263f2f80712dd82851579fcef32a17842 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Mon, 9 Mar 2026 13:16:14 -0500 Subject: [PATCH 05/43] echo wheel name --- ci/validate_wheel.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/validate_wheel.sh b/ci/validate_wheel.sh index 944e8f76..ef5d085c 100755 --- a/ci/validate_wheel.sh +++ b/ci/validate_wheel.sh @@ -49,7 +49,7 @@ twine check \ "$(echo ${wheel_dir_relative_path}/*.whl)" rapids-logger "validating that the wheel doesn't depend on 'torch' (even in an extra)" -WHEEL_FILE="$(${wheel_dir_relative_path}/*.whl)" +WHEEL_FILE="$(echo ${wheel_dir_relative_path}/*.whl)" # NOTE: group of specifiers after 'torch' to avoid a false positive like 'torch-geometric' unzip -p "${WHEEL_FILE}" '*.dist-info/METADATA' \ From 271eb7ef1b08cf56a4eb6ad9c70ea8a70ac5e409 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Mon, 9 Mar 2026 13:29:27 -0500 Subject: [PATCH 06/43] more pin fiddling --- conda/recipes/cugraph-pyg/recipe.yaml | 2 +- dependencies.yaml | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/conda/recipes/cugraph-pyg/recipe.yaml b/conda/recipes/cugraph-pyg/recipe.yaml index ea70f7b9..3243a6c8 100644 --- a/conda/recipes/cugraph-pyg/recipe.yaml +++ b/conda/recipes/cugraph-pyg/recipe.yaml @@ -40,7 +40,7 @@ requirements: # This is intentionally spelled 'pytorch' (not 'pytorch-gpu' and not using build string selectors) # because we want it to be possible to at least install `cugraph-pyg` in an environment without a GPU, # to support use cases like building container images. - - pytorch >=2.6 + - pytorch >=2.3 - pytorch_geometric >=2.5,<2.8 tests: diff --git a/dependencies.yaml b/dependencies.yaml index 03ed13d3..bb414d39 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -372,7 +372,7 @@ dependencies: - matrix: require_gpu_pytorch: "true" packages: - - pytorch-gpu>=2.6 + - pytorch-gpu>=2.3 # Default to falling back to whatever 'pytorch' is pulled in via cugraph-pyg's dependencies. - matrix: packages: @@ -386,7 +386,7 @@ dependencies: cuda: "12.*" dependencies: "oldest" packages: - - pytorch==2.6 + - pytorch==2.4.1 - matrix: cuda: "13.*" dependencies: "oldest" @@ -403,7 +403,7 @@ dependencies: packages: - matrix: packages: - - &pytorch_pip torch>=2.6 + - &pytorch_pip torch>=2.5 # wheels: handle GPU vs. CPU and version pinning together # # The 'pytorch.org' indices referenced in --extra-index-url below host CPU-only variants too, From 5a4064e7bf27a2548b32012375996f976d23e4e9 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Mon, 9 Mar 2026 15:44:49 -0500 Subject: [PATCH 07/43] fix validation script --- ci/validate_wheel.sh | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/ci/validate_wheel.sh b/ci/validate_wheel.sh index ef5d085c..88ba85aa 100755 --- a/ci/validate_wheel.sh +++ b/ci/validate_wheel.sh @@ -9,10 +9,6 @@ wheel_dir_relative_path=$2 RAPIDS_CUDA_MAJOR="${RAPIDS_CUDA_VERSION%%.*}" -python -m pip install \ - --prefer-binary \ - 'pkginfo>=1.12.1.2' - cd "${package_dir}" rapids-logger "validate packages with 'pydistcheck'" @@ -52,11 +48,12 @@ rapids-logger "validating that the wheel doesn't depend on 'torch' (even in an e WHEEL_FILE="$(echo ${wheel_dir_relative_path}/*.whl)" # NOTE: group of specifiers after 'torch' to avoid a false positive like 'torch-geometric' +# Use '|| true' so grep not finding any matches (exit 1) does not kill the script under set -e unzip -p "${WHEEL_FILE}" '*.dist-info/METADATA' \ | grep -E '^Requires-Dist:.*torch[><=!~ ]+.*' \ -| tee matches.txt +| tee matches.txt || true -if wc -l < ./matches.txt; then +if [[ -s ./matches.txt ]]; then echo -n "Wheel '${WHEEL_FILE}' appears to depend on 'torch'. Remove that dependency. " echo -n "We prefer to not declare a 'torch' dependency and allow it to be managed separately, " echo "to ensure tight control over the variants installed (including for DLFW builds)." From 0d7215ec76e8fedc46ad5aa0a7128c56aa19db55 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Tue, 10 Mar 2026 10:19:40 -0500 Subject: [PATCH 08/43] just wheels changes --- .../all_cuda-129_arch-aarch64.yaml | 2 - .../all_cuda-129_arch-x86_64.yaml | 2 - .../all_cuda-131_arch-aarch64.yaml | 2 - .../all_cuda-131_arch-x86_64.yaml | 2 - dependencies.yaml | 116 ++++++++---------- python/cugraph-pyg/pyproject.toml | 1 + python/pylibwholegraph/pyproject.toml | 1 - 7 files changed, 53 insertions(+), 73 deletions(-) diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml index 36a21955..efb98a8c 100644 --- a/conda/environments/all_cuda-129_arch-aarch64.yaml +++ b/conda/environments/all_cuda-129_arch-aarch64.yaml @@ -27,7 +27,6 @@ dependencies: - nccl>=2.19 - ninja - numpy>=1.23,<3.0 -- ogb - packaging - pandas - pre-commit @@ -43,7 +42,6 @@ dependencies: - rmm==26.4.*,>=0.0.0a0 - scikit-build-core>=0.11.0 - scipy -- sentence-transformers - setuptools>=77.0.0 - torchdata - wheel diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml index 1085bba4..cc7de24e 100644 --- a/conda/environments/all_cuda-129_arch-x86_64.yaml +++ b/conda/environments/all_cuda-129_arch-x86_64.yaml @@ -27,7 +27,6 @@ dependencies: - nccl>=2.19 - ninja - numpy>=1.23,<3.0 -- ogb - packaging - pandas - pre-commit @@ -43,7 +42,6 @@ dependencies: - rmm==26.4.*,>=0.0.0a0 - scikit-build-core>=0.11.0 - scipy -- sentence-transformers - setuptools>=77.0.0 - torchdata - wheel diff --git a/conda/environments/all_cuda-131_arch-aarch64.yaml b/conda/environments/all_cuda-131_arch-aarch64.yaml index 0ebe16a4..65502230 100644 --- a/conda/environments/all_cuda-131_arch-aarch64.yaml +++ b/conda/environments/all_cuda-131_arch-aarch64.yaml @@ -27,7 +27,6 @@ dependencies: - nccl>=2.19 - ninja - numpy>=1.23,<3.0 -- ogb - packaging - pandas - pre-commit @@ -43,7 +42,6 @@ dependencies: - rmm==26.4.*,>=0.0.0a0 - scikit-build-core>=0.11.0 - scipy -- sentence-transformers - setuptools>=77.0.0 - torchdata - wheel diff --git a/conda/environments/all_cuda-131_arch-x86_64.yaml b/conda/environments/all_cuda-131_arch-x86_64.yaml index 58f1fe21..f8bf95a2 100644 --- a/conda/environments/all_cuda-131_arch-x86_64.yaml +++ b/conda/environments/all_cuda-131_arch-x86_64.yaml @@ -27,7 +27,6 @@ dependencies: - nccl>=2.19 - ninja - numpy>=1.23,<3.0 -- ogb - packaging - pandas - pre-commit @@ -43,7 +42,6 @@ dependencies: - rmm==26.4.*,>=0.0.0a0 - scikit-build-core>=0.11.0 - scipy -- sentence-transformers - setuptools>=77.0.0 - torchdata - wheel diff --git a/dependencies.yaml b/dependencies.yaml index bb414d39..407be53c 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -31,7 +31,6 @@ files: - rapids_build_skbuild - test_cpp - test_python_common - - test_python_cugraph_pyg - test_python_pylibwholegraph checks: output: none @@ -61,9 +60,10 @@ files: - depends_on_cudf - depends_on_pytorch - depends_on_cuml + - depends_on_ogb + - depends_on_sentence_transformers - py_version - test_python_common - - test_python_cugraph_pyg - depends_on_pylibwholegraph - depends_on_cugraph_pyg test_pylibwholegraph: @@ -76,10 +76,6 @@ files: - test_python_common - depends_on_pylibwholegraph - test_python_pylibwholegraph - torch_only: - output: none - includes: - - depends_on_pytorch py_build_libwholegraph: output: pyproject pyproject_dir: python/libwholegraph @@ -166,10 +162,12 @@ files: table: project.optional-dependencies key: test includes: + - depends_on_pytorch - depends_on_cuml - depends_on_cugraph + - depends_on_ogb + - depends_on_sentence_transformers - test_python_common - - test_python_cugraph_pyg cugraph_pyg_dev: matrix: cuda: ["12.9", "13.1"] @@ -300,7 +298,7 @@ dependencies: - output_types: [conda, pyproject, requirements] packages: - *numpy - - &packaging packaging + - packaging - pandas rapids_build_skbuild: common: @@ -333,68 +331,19 @@ dependencies: - pytest-benchmark - pytest-cov - pytest-xdist - specific: - - output_types: [conda] - matrices: - - matrix: - no_pytorch: "true" - packages: - - matrix: - packages: - - torchdata - - pydantic - test_python_cugraph_pyg: - specific: - - output_types: [conda, requirements, pyproject] - matrices: - - matrix: - no_pytorch: "true" - packages: - - matrix: - packages: - - ogb - - sentence-transformers test_python_pylibwholegraph: common: - output_types: [conda, pyproject, requirements] packages: - - *packaging - pytest-forked - scipy depends_on_pytorch: + common: + - output_types: [conda] + packages: + - torchdata + - pydantic specific: - # conda: choose between GPU and CPU-only pytorch - - output_types: conda - matrices: - - matrix: - no_pytorch: "true" - packages: - - matrix: - require_gpu_pytorch: "true" - packages: - - pytorch-gpu>=2.3 - # Default to falling back to whatever 'pytorch' is pulled in via cugraph-pyg's dependencies. - - matrix: - packages: - # conda: optionally pins to older versions based on 'dependencies' key - - output_types: conda - matrices: - - matrix: - no_pytorch: "true" - packages: - - matrix: - cuda: "12.*" - dependencies: "oldest" - packages: - - pytorch==2.4.1 - - matrix: - cuda: "13.*" - dependencies: "oldest" - packages: - - pytorch==2.9 - # Default to falling back to whatever 'pytorch' is pulled in via cugraph-pyg's dependencies. - - matrix: - packages: - output_types: pyproject matrices: # avoid pulling in 'torch' in places like DLFW builds that prefer to install it other ways @@ -457,13 +406,23 @@ dependencies: # to update this list and/or CI scripts when new CUDA versions, Python versions, # etc. are added to the RAPIDS support matrix. # + - output_types: [conda] + matrices: + # Prevent fallback to CPU-only pytorch when we want a CUDA variant. + - matrix: + require_gpu: "true" + packages: + - pytorch-gpu + # Default to falling back to whatever 'pytorch' is pulled in via cugraph-pyg's dependencies. + - matrix: + packages: depends_on_nccl: common: - - output_types: [conda] + - output_types: conda packages: - nccl>=2.19 specific: - - output_types: [requirements, pyproject] + - output_types: [pyproject, requirements] matrices: - matrix: cuda: "12.*" @@ -472,6 +431,35 @@ dependencies: - nvidia-nccl-cu12>=2.19 - matrix: packages: + depends_on_ogb: + common: + - output_types: [conda] + packages: + - ogb + specific: + - output_types: [requirements, pyproject] + matrices: + - matrix: + no_pytorch: "true" + packages: + - matrix: + packages: + - ogb + # for MovieLens example + depends_on_sentence_transformers: + common: + - output_types: [conda] + packages: + - sentence-transformers + specific: + - output_types: [requirements, pyproject] + matrices: + - matrix: + no_pytorch: "true" + packages: + - matrix: + packages: + - sentence-transformers depends_on_pyg: common: - output_types: [conda] diff --git a/python/cugraph-pyg/pyproject.toml b/python/cugraph-pyg/pyproject.toml index 10c06e77..52c8db00 100644 --- a/python/cugraph-pyg/pyproject.toml +++ b/python/cugraph-pyg/pyproject.toml @@ -55,6 +55,7 @@ test = [ "pytest-cov", "pytest-xdist", "sentence-transformers", + "torch>=2.5", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. [tool.setuptools.dynamic] diff --git a/python/pylibwholegraph/pyproject.toml b/python/pylibwholegraph/pyproject.toml index 4682c260..dd4dda49 100644 --- a/python/pylibwholegraph/pyproject.toml +++ b/python/pylibwholegraph/pyproject.toml @@ -33,7 +33,6 @@ dependencies = [ [project.optional-dependencies] test = [ - "packaging", "pytest", "pytest-benchmark", "pytest-cov", From fc30204c43ec8c299d5c806d3e77f4a72c0a3a68 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Tue, 10 Mar 2026 10:21:28 -0500 Subject: [PATCH 09/43] even fewer changes --- ci/test_python.sh | 4 ++-- dependencies.yaml | 2 +- python/cugraph-pyg/pyproject.toml | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ci/test_python.sh b/ci/test_python.sh index 6b661236..09aeb27e 100755 --- a/ci/test_python.sh +++ b/ci/test_python.sh @@ -37,7 +37,7 @@ if [[ "${RUNNER_ARCH}" != "ARM64" ]]; then rapids-dependency-file-generator \ --output conda \ --file-key test_cugraph_pyg \ - --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};require_gpu_pytorch=true" \ + --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};require_gpu=true" \ --prepend-channel "${CPP_CHANNEL}" \ --prepend-channel "${PYTHON_CHANNEL}" \ --prepend-channel "${PYTHON_NOARCH_CHANNEL}" \ @@ -76,7 +76,7 @@ if [[ "${RUNNER_ARCH}" != "ARM64" ]]; then rapids-dependency-file-generator \ --output conda \ --file-key test_pylibwholegraph \ - --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};require_gpu_pytorch=true" \ + --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};require_gpu=true" \ --prepend-channel "${CPP_CHANNEL}" \ --prepend-channel "${PYTHON_CHANNEL}" \ | tee env.yaml diff --git a/dependencies.yaml b/dependencies.yaml index 407be53c..2516bd0c 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -352,7 +352,7 @@ dependencies: packages: - matrix: packages: - - &pytorch_pip torch>=2.5 + - &pytorch_pip torch>=2.9.0 # wheels: handle GPU vs. CPU and version pinning together # # The 'pytorch.org' indices referenced in --extra-index-url below host CPU-only variants too, diff --git a/python/cugraph-pyg/pyproject.toml b/python/cugraph-pyg/pyproject.toml index 52c8db00..1013f7af 100644 --- a/python/cugraph-pyg/pyproject.toml +++ b/python/cugraph-pyg/pyproject.toml @@ -55,7 +55,7 @@ test = [ "pytest-cov", "pytest-xdist", "sentence-transformers", - "torch>=2.5", + "torch>=2.9.0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. [tool.setuptools.dynamic] From 97e2c0284c816eea3004895ce2b2261f97fb232c Mon Sep 17 00:00:00 2001 From: James Lamb Date: Tue, 10 Mar 2026 10:34:30 -0500 Subject: [PATCH 10/43] revert gitignore changes --- .gitignore | 2 -- 1 file changed, 2 deletions(-) diff --git a/.gitignore b/.gitignore index 8d4f88e2..1ccc2780 100644 --- a/.gitignore +++ b/.gitignore @@ -40,8 +40,6 @@ wheels/ wheelhouse/ _skbuild/ cufile.log -*.tar.gz -*.whl ## Patching *.diff From 355d5aa4e4eb8c31edf58785372f7cce1fda0991 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Tue, 10 Mar 2026 10:36:39 -0500 Subject: [PATCH 11/43] add 'torch_only' --- dependencies.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dependencies.yaml b/dependencies.yaml index 2516bd0c..52bd1260 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -180,6 +180,10 @@ files: - depends_on_pyg - depends_on_pytorch - test_python_common + torch_only: + output: none + includes: + - depends_on_pytorch channels: - rapidsai-nightly - rapidsai From 4aad5b4f4cc24903301d21f30b2633342083e64f Mon Sep 17 00:00:00 2001 From: James Lamb Date: Tue, 10 Mar 2026 10:42:00 -0500 Subject: [PATCH 12/43] testing --- ci/test_wheel_cugraph-pyg.sh | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/ci/test_wheel_cugraph-pyg.sh b/ci/test_wheel_cugraph-pyg.sh index 60c03dc0..11d7e99e 100755 --- a/ci/test_wheel_cugraph-pyg.sh +++ b/ci/test_wheel_cugraph-pyg.sh @@ -11,9 +11,19 @@ package_name="cugraph-pyg" RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" # Download the libwholegraph, pylibwholegraph, and cugraph-pyg built in the previous step -LIBWHOLEGRAPH_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github cpp) -PYLIBWHOLEGRAPH_WHEELHOUSE=$(rapids-download-from-github "$(rapids-package-name "wheel_python" pylibwholegraph --stable --cuda "$RAPIDS_CUDA_VERSION")") -CUGRAPH_PYG_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-github python) +# LIBWHOLEGRAPH_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github cpp) +# PYLIBWHOLEGRAPH_WHEELHOUSE=$(rapids-download-from-github "$(rapids-package-name "wheel_python" pylibwholegraph --stable --cuda "$RAPIDS_CUDA_VERSION")") +# CUGRAPH_PYG_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-github python) +# rapids-get-pr-artifact cugraph-gnn 425 python wheel --pkg_name cugraph_pyg --stable +CUGRAPH_PYGH_WHEELHOUSE=$( + RAPIDS_PY_WHEEL_NAME="${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-get-pr-artifact --pkg_name cugraph-pyg cugraph-gnn 425 python wheel +) +LIBWHOLEGRAPH_WHEELHOUSE=$( + RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-get-pr-artifact cugraph-gnn 425 cpp wheel +) +PYLIBWHOLEGRAPH_WHEELHOUSE=$( + rapids-get-pr-artifact cugraph-gnn 425 python wheel --pkg_name pylibwholegraph --stable +) # generate constraints (possibly pinning to oldest support versions of dependencies) rapids-generate-pip-constraints test_cugraph_pyg "${PIP_CONSTRAINT}" @@ -48,6 +58,10 @@ fi rapids-pip-retry install \ "${PIP_INSTALL_ARGS[@]}" +python -c "import cugraph_pyg" +echo "--- DONE ---" +exit 0 + # RAPIDS_DATASET_ROOT_DIR is used by test scripts export RAPIDS_DATASET_ROOT_DIR="$(realpath datasets)" mkdir -p "${RAPIDS_DATASET_ROOT_DIR}" From 426c5ff2757e5e4c550df172963d1ee950e48983 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Tue, 10 Mar 2026 11:25:20 -0500 Subject: [PATCH 13/43] more updates --- ci/download-torch-wheels.sh | 2 +- ci/test_wheel_cugraph-pyg.sh | 26 +++++++++++++------------- dependencies.yaml | 9 ++------- 3 files changed, 16 insertions(+), 21 deletions(-) diff --git a/ci/download-torch-wheels.sh b/ci/download-torch-wheels.sh index d2ecde66..21e84051 100755 --- a/ci/download-torch-wheels.sh +++ b/ci/download-torch-wheels.sh @@ -22,7 +22,7 @@ CUDA_MAJOR=$(echo "${RAPIDS_CUDA_VERSION}" | cut -d'.' -f1) CUDA_MINOR=$(echo "${RAPIDS_CUDA_VERSION}" | cut -d'.' -f2) if \ { [ "${CUDA_MAJOR}" -eq 12 ] && [ "${CUDA_MINOR}" -lt 9 ]; } \ - || { [ "${CUDA_MAJOR}" -eq 13 ] && [ "${CUDA_MINOR}" -gt 0 ]; }; \ + || { [ "${CUDA_MAJOR}" -eq 13 ] && [ "${CUDA_MINOR}" -gt 0 ]; } \ || [ "${CUDA_MAJOR}" -gt 13 ]; then rapids-logger "Skipping 'torch' wheel download. (requires CUDA 12.9+ or 13.0, found ${RAPIDS_CUDA_VERSION})" diff --git a/ci/test_wheel_cugraph-pyg.sh b/ci/test_wheel_cugraph-pyg.sh index 11d7e99e..6ef94920 100755 --- a/ci/test_wheel_cugraph-pyg.sh +++ b/ci/test_wheel_cugraph-pyg.sh @@ -11,19 +11,19 @@ package_name="cugraph-pyg" RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" # Download the libwholegraph, pylibwholegraph, and cugraph-pyg built in the previous step -# LIBWHOLEGRAPH_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github cpp) -# PYLIBWHOLEGRAPH_WHEELHOUSE=$(rapids-download-from-github "$(rapids-package-name "wheel_python" pylibwholegraph --stable --cuda "$RAPIDS_CUDA_VERSION")") -# CUGRAPH_PYG_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-github python) -# rapids-get-pr-artifact cugraph-gnn 425 python wheel --pkg_name cugraph_pyg --stable -CUGRAPH_PYGH_WHEELHOUSE=$( - RAPIDS_PY_WHEEL_NAME="${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-get-pr-artifact --pkg_name cugraph-pyg cugraph-gnn 425 python wheel -) -LIBWHOLEGRAPH_WHEELHOUSE=$( - RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-get-pr-artifact cugraph-gnn 425 cpp wheel -) -PYLIBWHOLEGRAPH_WHEELHOUSE=$( - rapids-get-pr-artifact cugraph-gnn 425 python wheel --pkg_name pylibwholegraph --stable -) +LIBWHOLEGRAPH_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github cpp) +PYLIBWHOLEGRAPH_WHEELHOUSE=$(rapids-download-from-github "$(rapids-package-name "wheel_python" pylibwholegraph --stable --cuda "$RAPIDS_CUDA_VERSION")") +CUGRAPH_PYG_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-github python) +# CUGRAPH_GNN_COMMIT=5a4064e7bf27a2548b32012375996f976d23e4e9 +# CUGRAPH_PYG_WHEELHOUSE=$( +# RAPIDS_PY_WHEEL_NAME="cugraph-pyg_cu12" RAPIDS_PY_WHEEL_PURE="1" rapids-get-pr-artifact cugraph-gnn 425 python wheel "${CUGRAPH_GNN_COMMIT}" +# ) +# LIBWHOLEGRAPH_WHEELHOUSE=$( +# RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-get-pr-artifact cugraph-gnn 425 cpp wheel "${CUGRAPH_GNN_COMMIT}" +# ) +# PYLIBWHOLEGRAPH_WHEELHOUSE=$( +# rapids-get-pr-artifact cugraph-gnn 425 python wheel --pkg_name pylibwholegraph --stable "${CUGRAPH_GNN_COMMIT}" +# ) # generate constraints (possibly pinning to oldest support versions of dependencies) rapids-generate-pip-constraints test_cugraph_pyg "${PIP_CONSTRAINT}" diff --git a/dependencies.yaml b/dependencies.yaml index 52bd1260..bdcd0d6c 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -403,13 +403,8 @@ dependencies: packages: - *torch_index_cu13 - torch==2.10.0+cu130 - # - # (empty) - # - # Intentionally no fallback entry, to ensure a loud error alerts us to the need - # to update this list and/or CI scripts when new CUDA versions, Python versions, - # etc. are added to the RAPIDS support matrix. - # + - matrix: + packages: - output_types: [conda] matrices: # Prevent fallback to CPU-only pytorch when we want a CUDA variant. From 7ac88d3d54108aa28171eb9f755fa8e0301a539f Mon Sep 17 00:00:00 2001 From: James Lamb Date: Tue, 10 Mar 2026 13:57:51 -0500 Subject: [PATCH 14/43] make 'torch' optional everywhere --- .pre-commit-config.yaml | 3 +- ci/run_cugraph_pyg_pytests.sh | 8 +--- ci/test_wheel_cugraph-pyg.sh | 46 ++++++++----------- ci/test_wheel_pylibwholegraph.sh | 7 +-- dependencies.yaml | 1 - pyproject.toml | 18 +++++++- .../cugraph_pyg/tensor/dist_matrix.py | 2 +- .../cugraph-pyg/cugraph_pyg/tensor/utils.py | 4 +- python/cugraph-pyg/pyproject.toml | 1 - .../pylibwholegraph/tests/conftest.py | 10 ++++ .../test_wholememory_binding.py | 4 +- .../pylibwholegraph/test_wholememory_io.py | 6 +-- .../ops/test_graph_add_csr_self_loop.py | 7 +-- .../ops/test_graph_append_unique.py | 6 ++- .../ops/test_wholegraph_gather_scatter.py | 7 +-- ...h_unweighted_sample_without_replacement.py | 9 +++- ...aph_weighted_sample_without_replacement.py | 7 ++- .../ops/test_wholememory_cython_binding.py | 9 ++-- .../pylibwholegraph/torch/comm.py | 14 +++--- .../pylibwholegraph/torch/data_loader.py | 9 ++-- .../pylibwholegraph/torch/dlpack_utils.py | 7 +-- .../pylibwholegraph/torch/embedding.py | 23 ++++++---- .../pylibwholegraph/torch/gnn_model.py | 13 ++++-- .../pylibwholegraph/torch/graph_ops.py | 14 +++--- .../pylibwholegraph/torch/graph_structure.py | 12 +++-- .../pylibwholegraph/torch/initialize.py | 7 +-- .../pylibwholegraph/torch/tensor.py | 13 +++--- .../pylibwholegraph/torch/utils.py | 7 +-- .../pylibwholegraph/torch/wholegraph_env.py | 8 ++-- .../pylibwholegraph/torch/wholegraph_ops.py | 10 ++-- .../pylibwholegraph/torch/wholememory_ops.py | 12 +++-- .../pylibwholegraph/utils/imports.py | 46 +++++++++++++++++++ 32 files changed, 216 insertions(+), 134 deletions(-) create mode 100644 python/pylibwholegraph/pylibwholegraph/tests/conftest.py create mode 100644 python/pylibwholegraph/pylibwholegraph/utils/imports.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d995d56c..e16dc623 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -19,8 +19,9 @@ repos: rev: v0.14.3 hooks: - id: ruff-check - args: [--fix] + args: [--config, "pyproject.toml"] - id: ruff-format + args: [--config, "pyproject.toml"] - repo: https://github.com/asottile/yesqa rev: v1.3.0 hooks: diff --git a/ci/run_cugraph_pyg_pytests.sh b/ci/run_cugraph_pyg_pytests.sh index 4431a013..2635d755 100755 --- a/ci/run_cugraph_pyg_pytests.sh +++ b/ci/run_cugraph_pyg_pytests.sh @@ -1,5 +1,5 @@ #!/bin/bash -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 set -euo pipefail @@ -9,12 +9,6 @@ cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/cugraph-pyg/cugraph_ pytest --cache-clear --benchmark-disable "$@" . -# Used to skip certain examples in CI due to memory limitations -export CI=true - -# Enable legacy behavior of torch.load for examples relying on ogb -export TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD=1 - # Test examples (disabled due to lack of memory) #for e in "$(pwd)"/examples/*.py; do # echo "running example $e" diff --git a/ci/test_wheel_cugraph-pyg.sh b/ci/test_wheel_cugraph-pyg.sh index 6ef94920..64ad708d 100755 --- a/ci/test_wheel_cugraph-pyg.sh +++ b/ci/test_wheel_cugraph-pyg.sh @@ -11,19 +11,19 @@ package_name="cugraph-pyg" RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" # Download the libwholegraph, pylibwholegraph, and cugraph-pyg built in the previous step -LIBWHOLEGRAPH_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github cpp) -PYLIBWHOLEGRAPH_WHEELHOUSE=$(rapids-download-from-github "$(rapids-package-name "wheel_python" pylibwholegraph --stable --cuda "$RAPIDS_CUDA_VERSION")") -CUGRAPH_PYG_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-github python) -# CUGRAPH_GNN_COMMIT=5a4064e7bf27a2548b32012375996f976d23e4e9 -# CUGRAPH_PYG_WHEELHOUSE=$( -# RAPIDS_PY_WHEEL_NAME="cugraph-pyg_cu12" RAPIDS_PY_WHEEL_PURE="1" rapids-get-pr-artifact cugraph-gnn 425 python wheel "${CUGRAPH_GNN_COMMIT}" -# ) -# LIBWHOLEGRAPH_WHEELHOUSE=$( -# RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-get-pr-artifact cugraph-gnn 425 cpp wheel "${CUGRAPH_GNN_COMMIT}" -# ) -# PYLIBWHOLEGRAPH_WHEELHOUSE=$( -# rapids-get-pr-artifact cugraph-gnn 425 python wheel --pkg_name pylibwholegraph --stable "${CUGRAPH_GNN_COMMIT}" -# ) +# LIBWHOLEGRAPH_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github cpp) +# PYLIBWHOLEGRAPH_WHEELHOUSE=$(rapids-download-from-github "$(rapids-package-name "wheel_python" pylibwholegraph --stable --cuda "$RAPIDS_CUDA_VERSION")") +# CUGRAPH_PYG_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-github python) +CUGRAPH_GNN_COMMIT=5a4064e7bf27a2548b32012375996f976d23e4e9 +CUGRAPH_PYG_WHEELHOUSE=$( + RAPIDS_PY_WHEEL_NAME="cugraph-pyg_cu12" RAPIDS_PY_WHEEL_PURE="1" rapids-get-pr-artifact cugraph-gnn 425 python wheel "${CUGRAPH_GNN_COMMIT}" +) +LIBWHOLEGRAPH_WHEELHOUSE=$( + RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-get-pr-artifact cugraph-gnn 425 cpp wheel "${CUGRAPH_GNN_COMMIT}" +) +PYLIBWHOLEGRAPH_WHEELHOUSE=$( + rapids-get-pr-artifact cugraph-gnn 425 python wheel --pkg_name pylibwholegraph --stable "${CUGRAPH_GNN_COMMIT}" +) # generate constraints (possibly pinning to oldest support versions of dependencies) rapids-generate-pip-constraints test_cugraph_pyg "${PIP_CONSTRAINT}" @@ -73,20 +73,10 @@ popd export TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD=1 rapids-logger "pytest cugraph-pyg (single GPU)" -pushd python/cugraph-pyg/cugraph_pyg -python -m pytest \ - --cache-clear \ - --benchmark-disable \ - tests - -# Test examples (disabled due to lack of memory) -#for e in "$(pwd)"/examples/*.py; do -# rapids-logger "running example $e" -# (yes || true) | python -m torch.distributed.run --nnodes 1 --nproc_per_node 1 $e --dataset_root "${RAPIDS_DATASET_ROOT_DIR}/ogb_datasets" -#done - -# rapids-logger "running bitcoin example" -# (yes || true) | python -m torch.distributed.run --nnodes 1 --nproc_per_node 1 "$(pwd)"/examples/fraud/bitcoin_mnmg.py --dataset_root "${RAPIDS_DATASET_ROOT_DIR}" --embedding_dir "${RAPIDS_DATASET_ROOT_DIR}/bitcoin_embeddings" -# python "$(pwd)"/examples/fraud/bitcoin_rf.py --dataset_root "${RAPIDS_DATASET_ROOT_DIR}" --embedding_dir "${RAPIDS_DATASET_ROOT_DIR}/bitcoin_embeddings" +./ci/run_cugraph_pyg_pytests.sh + +rapids-logger "testing that cugraph-pyg is importable without 'torch'" +pip uninstall --yes 'torch' +python -c "import cugraph_pyg; print(cugraph_pyg.__version__)" popd diff --git a/ci/test_wheel_pylibwholegraph.sh b/ci/test_wheel_pylibwholegraph.sh index e7b06090..e97cda35 100755 --- a/ci/test_wheel_pylibwholegraph.sh +++ b/ci/test_wheel_pylibwholegraph.sh @@ -48,9 +48,4 @@ rapids-pip-retry install \ "${PIP_INSTALL_ARGS[@]}" rapids-logger "pytest pylibwholegraph" -cd python/pylibwholegraph/pylibwholegraph/tests -python -m pytest \ - --cache-clear \ - --forked \ - --import-mode=append \ - . +ci/run_pylibwholegraph_pytests.sh diff --git a/dependencies.yaml b/dependencies.yaml index bdcd0d6c..1ca11bc0 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -162,7 +162,6 @@ files: table: project.optional-dependencies key: test includes: - - depends_on_pytorch - depends_on_cuml - depends_on_cugraph - depends_on_ogb diff --git a/pyproject.toml b/pyproject.toml index fbe24671..b038729a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. All rights reserved. # SPDX-License-Identifier: Apache-2.0 [tool.ruff] @@ -11,4 +11,20 @@ exclude = [ ignore = [ # whitespace before : "E203", + # (flake8-tidy-imports) banned-api + "TID251" +] + +[tool.ruff.lint.flake8-tidy-imports.banned-api] +"torch".msg = "Use the 'torch' fixture instead of 'import torch' in tests (see conftest.py)." + +[tool.ruff.lint.per-file-ignores] +# allow importing 'torch' directly in cugraph-pyg examples +"python/cugraph-pyg/cugraph_pyg/examples/*" = [ + "TID251" +] + +# allow importing 'torch' directly in pylibwholegraph examples +"python/pylibwholegraph/examples/" = [ + "TID251" ] diff --git a/python/cugraph-pyg/cugraph_pyg/tensor/dist_matrix.py b/python/cugraph-pyg/cugraph_pyg/tensor/dist_matrix.py index 2c811245..ec331f3d 100644 --- a/python/cugraph-pyg/cugraph_pyg/tensor/dist_matrix.py +++ b/python/cugraph-pyg/cugraph_pyg/tensor/dist_matrix.py @@ -106,7 +106,7 @@ def __setitem__( self._col[idx] = val[0] self._row[idx] = val[1] - def __getitem__(self, idx: torch.Tensor) -> torch.Tensor: + def __getitem__(self, idx: "torch.Tensor") -> torch.Tensor: if self._format != "coo": raise ValueError("Getting is currently only supported for COO format") if idx.dim() != 1: diff --git a/python/cugraph-pyg/cugraph_pyg/tensor/utils.py b/python/cugraph-pyg/cugraph_pyg/tensor/utils.py index d8780000..fb994bc9 100644 --- a/python/cugraph-pyg/cugraph_pyg/tensor/utils.py +++ b/python/cugraph-pyg/cugraph_pyg/tensor/utils.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from typing import Union, List @@ -96,7 +96,7 @@ def create_wg_dist_tensor( def create_wg_dist_tensor_from_files( file_list: List[str], shape: list, - dtype: torch.dtype, + dtype: "torch.dtype", location: str = "cpu", partition_book: Union[List[int], None] = None, backend: str = "nccl", diff --git a/python/cugraph-pyg/pyproject.toml b/python/cugraph-pyg/pyproject.toml index 1013f7af..10c06e77 100644 --- a/python/cugraph-pyg/pyproject.toml +++ b/python/cugraph-pyg/pyproject.toml @@ -55,7 +55,6 @@ test = [ "pytest-cov", "pytest-xdist", "sentence-transformers", - "torch>=2.9.0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. [tool.setuptools.dynamic] diff --git a/python/pylibwholegraph/pylibwholegraph/tests/conftest.py b/python/pylibwholegraph/pylibwholegraph/tests/conftest.py new file mode 100644 index 00000000..1d80ddf8 --- /dev/null +++ b/python/pylibwholegraph/pylibwholegraph/tests/conftest.py @@ -0,0 +1,10 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +import pytest + + +@pytest.fixture +def torch(): + """Pass this to any test case that needs 'torch' to be installed""" + return pytest.importorskip("torch") diff --git a/python/pylibwholegraph/pylibwholegraph/tests/pylibwholegraph/test_wholememory_binding.py b/python/pylibwholegraph/pylibwholegraph/tests/pylibwholegraph/test_wholememory_binding.py index 7e11b731..c80afd9c 100644 --- a/python/pylibwholegraph/pylibwholegraph/tests/pylibwholegraph/test_wholememory_binding.py +++ b/python/pylibwholegraph/pylibwholegraph/tests/pylibwholegraph/test_wholememory_binding.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2019-2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 import pytest @@ -6,7 +6,6 @@ from pylibwholegraph.utils.multiprocess import multiprocess_run from pylibwholegraph.torch.initialize import init_torch_env_and_create_wm_comm from pylibwholegraph.torch.dlpack_utils import torch_import_from_dlpack -import torch # Run with: @@ -14,6 +13,7 @@ def single_test_case(wm_comm, mt, ml, malloc_size, granularity): + torch = pytest.importorskip("torch") world_rank = wm_comm.get_rank() print("Rank=%d testing mt=%s, ml=%s" % (world_rank, mt, ml)) h = wmb.malloc(malloc_size, wm_comm, mt, ml, granularity) diff --git a/python/pylibwholegraph/pylibwholegraph/tests/pylibwholegraph/test_wholememory_io.py b/python/pylibwholegraph/pylibwholegraph/tests/pylibwholegraph/test_wholememory_io.py index c9419c75..29380240 100644 --- a/python/pylibwholegraph/pylibwholegraph/tests/pylibwholegraph/test_wholememory_io.py +++ b/python/pylibwholegraph/pylibwholegraph/tests/pylibwholegraph/test_wholememory_io.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2019-2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 import pytest @@ -7,13 +7,11 @@ from pylibwholegraph.torch.initialize import init_torch_env_and_create_wm_comm from pylibwholegraph.torch.dlpack_utils import torch_import_from_dlpack from pylibwholegraph.test_utils.test_comm import random_partition -import torch import numpy as np import os import random from functools import partial - gpu_count = None @@ -49,6 +47,7 @@ def load_routine_func( round_robin_size=0, entry_partition=None, ): + torch = pytest.importorskip("torch") wm_comm, _ = init_torch_env_and_create_wm_comm( world_rank, world_size, world_rank, world_size ) @@ -181,6 +180,7 @@ def test_wholememory_load( storage_offset, round_robin_size, partition_method, + torch, ): if embedding_stride < storage_offset + embedding_dim: pytest.skip( diff --git a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_add_csr_self_loop.py b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_add_csr_self_loop.py index 821cf457..85cd0417 100644 --- a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_add_csr_self_loop.py +++ b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_add_csr_self_loop.py @@ -1,13 +1,13 @@ -# SPDX-FileCopyrightText: Copyright (c) 2019-2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 import pytest -import torch from pylibwholegraph.test_utils.test_comm import gen_csr_graph import pylibwholegraph.torch.graph_ops as wg_ops def host_add_csr_self_loop(csr_row_ptr_tensor, csr_col_ptr_tensor): + torch = pytest.importorskip("torch") row_num = csr_row_ptr_tensor.shape[0] - 1 edge_num = csr_col_ptr_tensor.shape[0] output_csr_row_ptr_tensor = torch.empty( @@ -28,6 +28,7 @@ def host_add_csr_self_loop(csr_row_ptr_tensor, csr_col_ptr_tensor): def routine_func(**kwargs): + torch = pytest.importorskip("torch") target_node_count = kwargs["target_node_count"] neighbor_node_count = kwargs["neighbor_node_count"] edge_num = kwargs["edge_num"] @@ -58,7 +59,7 @@ def routine_func(**kwargs): @pytest.mark.parametrize("target_node_count", [101, 113]) @pytest.mark.parametrize("neighbor_node_count", [157, 1987]) @pytest.mark.parametrize("edge_num", [1001, 2305]) -def test_add_csr_self_loop(target_node_count, neighbor_node_count, edge_num): +def test_add_csr_self_loop(target_node_count, neighbor_node_count, edge_num, torch): gpu_count = torch.cuda.device_count() assert gpu_count > 0 routine_func( diff --git a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_append_unique.py b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_append_unique.py index e325ef51..5856d943 100644 --- a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_append_unique.py +++ b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_append_unique.py @@ -1,12 +1,12 @@ -# SPDX-FileCopyrightText: Copyright (c) 2019-2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 import pytest -import torch import pylibwholegraph.torch.graph_ops as wg_ops def host_neighbor_raw_to_unique(unique_node_tensor, neighbor_node_tensor): + torch = pytest.importorskip("torch") output_neighbor_raw_to_unique = torch.empty( (neighbor_node_tensor.size(0)), dtype=torch.int32 ) @@ -19,6 +19,7 @@ def host_neighbor_raw_to_unique(unique_node_tensor, neighbor_node_tensor): def routine_func(**kwargs): + torch = pytest.importorskip("torch") target_node_count = kwargs["target_node_count"] neighbor_node_count = kwargs["neighbor_node_count"] target_node_dtype = kwargs["target_node_dtype"] @@ -80,6 +81,7 @@ def test_append_unique( neighbor_node_count, target_node_dtype, need_neighbor_raw_to_unique, + torch, ): gpu_count = torch.cuda.device_count() assert gpu_count > 0 diff --git a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_gather_scatter.py b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_gather_scatter.py index a3b8849b..cd1af84f 100644 --- a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_gather_scatter.py +++ b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_gather_scatter.py @@ -1,20 +1,20 @@ -# SPDX-FileCopyrightText: Copyright (c) 2019-2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 +import pytest import pylibwholegraph.binding.wholememory_binding as wmb from pylibwholegraph.utils.multiprocess import multiprocess_run from pylibwholegraph.torch.initialize import init_torch_env_and_create_wm_comm from pylibwholegraph.torch.dlpack_utils import torch_import_from_dlpack from pylibwholegraph.test_utils.test_comm import random_partition -import torch import pylibwholegraph.torch.wholememory_ops as wm_ops - # PYTHONPATH=../:$PYTHONPATH python3 -m pytest \ # ../tests/wholegraph_torch/ops/test_wholegraph_gather_scatter.py -s def gen_int_embedding(indice_tensor, embedding_dim, output_type): + torch = pytest.importorskip("torch") if embedding_dim == 0: embedding_dim = 1 # unsqueeze 2D for input (2D is required for scatter op) indice_count = indice_tensor.shape[0] @@ -41,6 +41,7 @@ def scatter_gather_test_cast( use_python_binding=True, entry_partition=None, ): + torch = pytest.importorskip("torch") world_rank = wm_comm.get_rank() world_size = wm_comm.get_size() print( diff --git a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_unweighted_sample_without_replacement.py b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_unweighted_sample_without_replacement.py index c436e9d1..6719f5ea 100644 --- a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_unweighted_sample_without_replacement.py +++ b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_unweighted_sample_without_replacement.py @@ -1,11 +1,10 @@ -# SPDX-FileCopyrightText: Copyright (c) 2019-2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 import pytest import pylibwholegraph.binding.wholememory_binding as wmb from pylibwholegraph.utils.multiprocess import multiprocess_run from pylibwholegraph.torch.initialize import init_torch_env_and_create_wm_comm -import torch from functools import partial from pylibwholegraph.test_utils.test_comm import ( gen_csr_graph, @@ -19,8 +18,11 @@ import pylibwholegraph.torch.wholegraph_ops as wg_ops import random +torch = pytest.importorskip("torch") + def unweighte_sample_without_replacement_base(random_values, M, N): + torch = pytest.importorskip("torch") a = torch.empty((M,), dtype=torch.int32) Q = torch.arange(N, dtype=torch.int32) for i in range(M): @@ -39,6 +41,7 @@ def host_unweighted_sample_without_replacement_func( max_sample_count, random_seed, ): + torch = pytest.importorskip("torch") output_dest_tensor = torch.empty((total_sample_count,), dtype=col_id_dtype) output_center_localid_tensor = torch.empty((total_sample_count,), dtype=torch.int32) output_edge_gid_tensor = torch.empty((total_sample_count,), dtype=torch.int64) @@ -211,6 +214,7 @@ def host_unweighted_sample_without_replacement( def routine_func(world_rank: int, world_size: int, **kwargs): + torch = pytest.importorskip("torch") wm_comm, _ = init_torch_env_and_create_wm_comm( world_rank, world_size, world_rank, world_size ) @@ -368,6 +372,7 @@ def test_wholegraph_unweighted_sample( wholememory_type, need_center_local_output, need_edge_output, + torch, ): gpu_count = wmb.fork_get_gpu_count() assert gpu_count > 0 diff --git a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_weighted_sample_without_replacement.py b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_weighted_sample_without_replacement.py index 10ef139e..1f47f7fc 100644 --- a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_weighted_sample_without_replacement.py +++ b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_weighted_sample_without_replacement.py @@ -1,11 +1,10 @@ -# SPDX-FileCopyrightText: Copyright (c) 2019-2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 import pytest from pylibwholegraph.utils.multiprocess import multiprocess_run from pylibwholegraph.torch.initialize import init_torch_env_and_create_wm_comm import pylibwholegraph.binding.wholememory_binding as wmb -import torch import random from functools import partial from pylibwholegraph.test_utils.test_comm import ( @@ -32,6 +31,7 @@ def host_weighted_sample_without_replacement_func( max_sample_count, random_seed, ): + torch = pytest.importorskip("torch") output_dest_tensor = torch.empty((total_sample_count,), dtype=col_id_dtype) output_center_localid_tensor = torch.empty((total_sample_count,), dtype=torch.int32) output_edge_gid_tensor = torch.empty((total_sample_count,), dtype=torch.int64) @@ -116,6 +116,7 @@ def host_weighted_sample_without_replacement( col_id_dtype, random_seed, ): + torch = pytest.importorskip("torch") center_nodes_count = center_nodes.size(0) output_sample_offset_tensor = host_get_sample_offset_tensor( host_csr_row_ptr, center_nodes, max_sample_count @@ -166,6 +167,7 @@ def host_weighted_sample_without_replacement( def routine_func(world_rank: int, world_size: int, **kwargs): + torch = pytest.importorskip("torch") wm_comm, _ = init_torch_env_and_create_wm_comm( world_rank, world_size, world_rank, world_size ) @@ -372,6 +374,7 @@ def test_wholegraph_weighted_sample( wholememory_type, need_center_local_output, need_edge_output, + torch, ): gpu_count = wmb.fork_get_gpu_count() assert gpu_count > 0 diff --git a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholememory_cython_binding.py b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholememory_cython_binding.py index a4726383..0e3310c2 100644 --- a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholememory_cython_binding.py +++ b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholememory_cython_binding.py @@ -1,9 +1,8 @@ -# SPDX-FileCopyrightText: Copyright (c) 2019-2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 import pytest import pylibwholegraph.binding.wholememory_binding as wmb -import torch from pylibwholegraph.torch.wholegraph_env import ( get_stream, get_wholegraph_env_fns, @@ -14,7 +13,7 @@ import time -def test_smoke(): +def test_smoke(torch): torch.cuda.set_device(0) output_len = 128 embed_dim = 10 @@ -55,7 +54,7 @@ def test_smoke(): assert wmb.py_get_wholememory_tensor_count() == 0 -def test_loop_memory(): +def test_loop_memory(torch): torch.cuda.set_device(0) embedding_dim = 1 output_len = 1 @@ -107,7 +106,7 @@ def test_loop_memory(): @pytest.mark.parametrize("output_len", list(range(1, 100, 17))) @pytest.mark.parametrize("embed_dim", list(range(1, 128, 23))) -def test_random_alloc(output_len, embed_dim): +def test_random_alloc(output_len, embed_dim, torch): torch.cuda.set_device(0) input_tensor = torch.rand((embed_dim,), device="cuda") indice_tensor = torch.arange(output_len, device="cuda") diff --git a/python/pylibwholegraph/pylibwholegraph/torch/comm.py b/python/pylibwholegraph/pylibwholegraph/torch/comm.py index 634473f7..85be715a 100644 --- a/python/pylibwholegraph/pylibwholegraph/torch/comm.py +++ b/python/pylibwholegraph/pylibwholegraph/torch/comm.py @@ -1,10 +1,8 @@ -# SPDX-FileCopyrightText: Copyright (c) 2019-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 -import torch -import torch.distributed as dist -import torch.utils.dlpack import pylibwholegraph.binding.wholememory_binding as wmb +from pylibwholegraph.utils.imports import import_optional from .utils import ( str_to_wmb_wholememory_distributed_backend_type, wholememory_distributed_backend_type_to_str, @@ -12,6 +10,8 @@ str_to_wmb_wholememory_location, ) +torch = import_optional("torch") + global_communicators = {} local_node_communicator = None local_device_communicator = None @@ -140,13 +140,13 @@ def create_group_communicator(group_size: int = -1, comm_stride: int = 1): :param comm_stride: Stride of each rank in each group :return: WholeMemoryCommunicator """ - world_size = dist.get_world_size() + world_size = torch.distributed.get_world_size() if group_size == -1: group_size = world_size strided_group_size = group_size * comm_stride assert world_size % strided_group_size == 0 strided_group_count = world_size // strided_group_size - world_rank = dist.get_rank() + world_rank = torch.distributed.get_rank() strided_group_idx = world_rank // strided_group_size idx_in_strided_group = world_rank % strided_group_size inner_group_idx = idx_in_strided_group % comm_stride @@ -161,7 +161,7 @@ def create_group_communicator(group_size: int = -1, comm_stride: int = 1): tmp_wm_uid = wmb.PyWholeMemoryUniqueID() uid_th = torch.utils.dlpack.from_dlpack(tmp_wm_uid.__dlpack__()) uid_th_cuda = uid_th.cuda() - dist.broadcast(uid_th_cuda, group_root_rank) + torch.distributed.broadcast(uid_th_cuda, group_root_rank) uid_th.copy_(uid_th_cuda.cpu()) if strided_group_idx == strided_group and inner_group_idx == inner_group: wm_uid_th = torch.utils.dlpack.from_dlpack(wm_uid.__dlpack__()) diff --git a/python/pylibwholegraph/pylibwholegraph/torch/data_loader.py b/python/pylibwholegraph/pylibwholegraph/torch/data_loader.py index 35dd8e60..b87801f9 100644 --- a/python/pylibwholegraph/pylibwholegraph/torch/data_loader.py +++ b/python/pylibwholegraph/pylibwholegraph/torch/data_loader.py @@ -1,12 +1,13 @@ -# SPDX-FileCopyrightText: Copyright (c) 2019-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 import numpy as np -import torch -from torch.utils.data import Dataset +from pylibwholegraph.utils.imports import import_optional +torch = import_optional("torch") -class NodeClassificationDataset(Dataset): + +class NodeClassificationDataset(torch.utils.data.Dataset): def __init__(self, raw_dataset): self.dataset = raw_dataset diff --git a/python/pylibwholegraph/pylibwholegraph/torch/dlpack_utils.py b/python/pylibwholegraph/pylibwholegraph/torch/dlpack_utils.py index 25f36bf3..74e1ae82 100644 --- a/python/pylibwholegraph/pylibwholegraph/torch/dlpack_utils.py +++ b/python/pylibwholegraph/pylibwholegraph/torch/dlpack_utils.py @@ -1,8 +1,9 @@ -# SPDX-FileCopyrightText: Copyright (c) 2019-2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 -import torch -import torch.utils.dlpack +from pylibwholegraph.utils.imports import import_optional + +torch = import_optional("torch") def torch_import_from_dlpack(dp): diff --git a/python/pylibwholegraph/pylibwholegraph/torch/embedding.py b/python/pylibwholegraph/pylibwholegraph/torch/embedding.py index aad0a552..93bba90a 100644 --- a/python/pylibwholegraph/pylibwholegraph/torch/embedding.py +++ b/python/pylibwholegraph/pylibwholegraph/torch/embedding.py @@ -1,8 +1,8 @@ -# SPDX-FileCopyrightText: Copyright (c) 2019-2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 import pylibwholegraph.binding.wholememory_binding as wmb -import torch +from pylibwholegraph.utils.imports import import_optional from .utils import torch_dtype_to_wholememory_dtype, get_file_size from .utils import str_to_wmb_wholememory_location, str_to_wmb_wholememory_memory_type from .utils import ( @@ -20,6 +20,9 @@ from .wholegraph_env import wrap_torch_tensor, get_wholegraph_env_fns, get_stream +torch = import_optional("torch") + + class WholeMemoryOptimizer(object): """ Sparse Optimizer for WholeMemoryEmbedding. @@ -211,8 +214,8 @@ class EmbeddingLookupFn(torch.autograd.Function): @staticmethod def forward( ctx, - indice: torch.Tensor, - dummy_input: torch.Tensor, + indice: "torch.Tensor", + dummy_input: "torch.Tensor", wm_embedding, is_training: bool = False, force_dtype: Union[torch.dtype, None] = None, @@ -226,7 +229,7 @@ def forward( return output_tensor @staticmethod - def backward(ctx, grad_outputs: torch.Tensor): + def backward(ctx, grad_outputs: "torch.Tensor"): indice, output_tensor, dummy_input = ctx.saved_tensors wm_embedding = ctx.wm_embedding wm_embedding.add_gradients(indice, grad_outputs) @@ -273,7 +276,7 @@ def need_grad(self): def gather( self, - indice: torch.Tensor, + indice: "torch.Tensor", *, is_training: bool = False, force_dtype: Union[torch.dtype, None] = None, @@ -304,7 +307,7 @@ def gather( ) return output_tensor - def add_gradients(self, indice: torch.Tensor, grad_outputs: torch.Tensor): + def add_gradients(self, indice: "torch.Tensor", grad_outputs: "torch.Tensor"): self.sparse_indices.append(indice) self.sparse_grads.append(grad_outputs) @@ -373,7 +376,7 @@ def create_embedding( comm: WholeMemoryCommunicator, memory_type: str, memory_location: str, - dtype: torch.dtype, + dtype: "torch.dtype", sizes: List[int], *, cache_policy: Union[WholeMemoryCachePolicy, None] = None, @@ -462,7 +465,7 @@ def create_embedding_from_filelist( memory_type: str, memory_location: str, filelist: Union[List[str], str], - dtype: torch.dtype, + dtype: "torch.dtype", last_dim_size: int, *, cache_policy: Union[WholeMemoryCachePolicy, None] = None, @@ -547,7 +550,7 @@ def __init__(self, wm_embedding: WholeMemoryEmbedding): self.embedding_gather_fn = EmbeddingLookupFn.apply def forward( - self, indice: torch.Tensor, force_dtype: Union[torch.dtype, None] = None + self, indice: "torch.Tensor", force_dtype: Union[torch.dtype, None] = None ): return self.embedding_gather_fn( indice, diff --git a/python/pylibwholegraph/pylibwholegraph/torch/gnn_model.py b/python/pylibwholegraph/pylibwholegraph/torch/gnn_model.py index b779862c..888e07af 100644 --- a/python/pylibwholegraph/pylibwholegraph/torch/gnn_model.py +++ b/python/pylibwholegraph/pylibwholegraph/torch/gnn_model.py @@ -1,11 +1,12 @@ -# SPDX-FileCopyrightText: Copyright (c) 2019-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 -import torch +from pylibwholegraph.utils.imports import import_optional from .graph_structure import GraphStructure from .embedding import WholeMemoryEmbedding, WholeMemoryEmbeddingModule from .common_options import parse_max_neighbors -import torch.nn.functional as F + +torch = import_optional("torch") framework_name = None @@ -185,8 +186,10 @@ def forward(self, ids): sub_graph, ) if i != self.num_layer - 1: - x_feat = F.relu(x_feat) - x_feat = F.dropout(x_feat, self.dropout, training=self.training) + x_feat = torch.nn.functional.relu(x_feat) + x_feat = torch.nn.functional.dropout( + x_feat, self.dropout, training=self.training + ) out_feat = x_feat return out_feat diff --git a/python/pylibwholegraph/pylibwholegraph/torch/graph_ops.py b/python/pylibwholegraph/pylibwholegraph/torch/graph_ops.py index ae352444..c2bec6fe 100644 --- a/python/pylibwholegraph/pylibwholegraph/torch/graph_ops.py +++ b/python/pylibwholegraph/pylibwholegraph/torch/graph_ops.py @@ -1,7 +1,6 @@ -# SPDX-FileCopyrightText: Copyright (c) 2019-2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 - -import torch +from pylibwholegraph.utils.imports import import_optional import pylibwholegraph.binding.wholememory_binding as wmb from .wholegraph_env import ( get_stream, @@ -10,10 +9,12 @@ wrap_torch_tensor, ) +torch = import_optional("torch") + def append_unique( - target_node_tensor: torch.Tensor, - neighbor_node_tensor: torch.Tensor, + target_node_tensor: "torch.Tensor", + neighbor_node_tensor: "torch.Tensor", need_neighbor_raw_to_unique: bool = False, ): """ @@ -60,7 +61,8 @@ def append_unique( def add_csr_self_loop( - csr_row_ptr_tensor: torch.Tensor, csr_col_ptr_tensor: torch.Tensor + csr_row_ptr_tensor: "torch.Tensor", + csr_col_ptr_tensor: "torch.Tensor", ): """ Add self loop to sampled CSR graph diff --git a/python/pylibwholegraph/pylibwholegraph/torch/graph_structure.py b/python/pylibwholegraph/pylibwholegraph/torch/graph_structure.py index 700b94c9..bb6d75b3 100644 --- a/python/pylibwholegraph/pylibwholegraph/torch/graph_structure.py +++ b/python/pylibwholegraph/pylibwholegraph/torch/graph_structure.py @@ -1,12 +1,14 @@ -# SPDX-FileCopyrightText: Copyright (c) 2019-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 -import torch +from pylibwholegraph.utils.imports import import_optional from typing import Union, List from .tensor import WholeMemoryTensor from . import graph_ops from . import wholegraph_ops +torch = import_optional("torch") + class GraphStructure(object): r"""Graph structure storage @@ -67,7 +69,7 @@ def set_edge_attribute(self, attr_name: str, attr_tensor: WholeMemoryTensor): def unweighted_sample_without_replacement_one_hop( self, - center_nodes_tensor: torch.Tensor, + center_nodes_tensor: "torch.Tensor", max_sample_count: int, *, random_seed: Union[int, None] = None, @@ -98,7 +100,7 @@ def unweighted_sample_without_replacement_one_hop( def weighted_sample_without_replacement_one_hop( self, weight_name: str, - center_nodes_tensor: torch.Tensor, + center_nodes_tensor: "torch.Tensor", max_sample_count: int, *, random_seed: Union[int, None] = None, @@ -133,7 +135,7 @@ def weighted_sample_without_replacement_one_hop( def multilayer_sample_without_replacement( self, - node_ids: torch.Tensor, + node_ids: "torch.Tensor", max_neighbors: List[int], weight_name: Union[str, None] = None, ): diff --git a/python/pylibwholegraph/pylibwholegraph/torch/initialize.py b/python/pylibwholegraph/pylibwholegraph/torch/initialize.py index 3f83ee64..6523779b 100644 --- a/python/pylibwholegraph/pylibwholegraph/torch/initialize.py +++ b/python/pylibwholegraph/pylibwholegraph/torch/initialize.py @@ -1,9 +1,8 @@ -# SPDX-FileCopyrightText: Copyright (c) 2019-2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 import os -import torch -import torch.utils.dlpack +from pylibwholegraph.utils.imports import import_optional import pylibwholegraph.binding.wholememory_binding as wmb from .comm import ( set_world_info, @@ -13,6 +12,8 @@ ) from .utils import str_to_wmb_wholememory_log_level +torch = import_optional("torch") + def init( world_rank: int, diff --git a/python/pylibwholegraph/pylibwholegraph/torch/tensor.py b/python/pylibwholegraph/pylibwholegraph/torch/tensor.py index c9950b3e..e2de562b 100644 --- a/python/pylibwholegraph/pylibwholegraph/torch/tensor.py +++ b/python/pylibwholegraph/pylibwholegraph/torch/tensor.py @@ -1,8 +1,8 @@ -# SPDX-FileCopyrightText: Copyright (c) 2019-2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 import pylibwholegraph.binding.wholememory_binding as wmb -import torch +from pylibwholegraph.utils.imports import import_optional from .utils import ( torch_dtype_to_wholememory_dtype, wholememory_dtype_to_torch_dtype, @@ -15,6 +15,7 @@ from .dlpack_utils import torch_import_from_dlpack from .wholegraph_env import wrap_torch_tensor, get_wholegraph_env_fns, get_stream +torch = import_optional("torch") WholeMemoryMemoryType = wmb.WholeMemoryMemoryType WholeMemoryMemoryLocation = wmb.WholeMemoryMemoryLocation @@ -49,7 +50,7 @@ def get_comm(self): ) def gather( - self, indice: torch.Tensor, *, force_dtype: Union[torch.dtype, None] = None + self, indice: "torch.Tensor", *, force_dtype: Union[torch.dtype, None] = None ): assert indice.dim() == 1 embedding_dim = self.shape[1] if self.dim() == 2 else 1 @@ -71,7 +72,7 @@ def gather( ) return output_tensor.view(-1) if self.dim() == 1 else output_tensor - def scatter(self, input_tensor: torch.Tensor, indice: torch.Tensor): + def scatter(self, input_tensor: "torch.Tensor", indice: "torch.Tensor"): assert indice.dim() == 1 assert input_tensor.dim() == self.dim() assert indice.shape[0] == input_tensor.shape[0] @@ -201,7 +202,7 @@ def create_wholememory_tensor( memory_type: str, memory_location: str, sizes: List[int], - dtype: torch.dtype, + dtype: "torch.dtype", strides: List[int], tensor_entry_partition: Union[List[int], None] = None, ): @@ -250,7 +251,7 @@ def create_wholememory_tensor_from_filelist( memory_type: str, memory_location: str, filelist: Union[List[str], str], - dtype: torch.dtype, + dtype: "torch.dtype", last_dim_size: int = 0, last_dim_strides: int = -1, tensor_entry_partition: Union[List[int], None] = None, diff --git a/python/pylibwholegraph/pylibwholegraph/torch/utils.py b/python/pylibwholegraph/pylibwholegraph/torch/utils.py index a1b296da..4f27061a 100644 --- a/python/pylibwholegraph/pylibwholegraph/torch/utils.py +++ b/python/pylibwholegraph/pylibwholegraph/torch/utils.py @@ -1,15 +1,16 @@ -# SPDX-FileCopyrightText: Copyright (c) 2019-2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 import pylibwholegraph.binding.wholememory_binding as wmb -import torch +from pylibwholegraph.utils.imports import import_optional import os +torch = import_optional("torch") WholeMemoryDataType = wmb.WholeMemoryDataType -def torch_dtype_to_wholememory_dtype(torch_dtype: torch.dtype): +def torch_dtype_to_wholememory_dtype(torch_dtype: "torch.dtype"): """ Convert torch.dtype to WholeMemoryDataType :param torch_dtype: torch.dtype diff --git a/python/pylibwholegraph/pylibwholegraph/torch/wholegraph_env.py b/python/pylibwholegraph/pylibwholegraph/torch/wholegraph_env.py index f59418fe..8829b9d7 100644 --- a/python/pylibwholegraph/pylibwholegraph/torch/wholegraph_env.py +++ b/python/pylibwholegraph/pylibwholegraph/torch/wholegraph_env.py @@ -1,14 +1,16 @@ -# SPDX-FileCopyrightText: Copyright (c) 2019-2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 import os.path import importlib -import torch import pylibwholegraph import pylibwholegraph.binding.wholememory_binding as wmb +from pylibwholegraph.utils.imports import import_optional from typing import Union from .utils import wholememory_dtype_to_torch_dtype, torch_dtype_to_wholememory_dtype +torch = import_optional("torch") + default_wholegraph_env_context = None torch_cpp_ext_loaded = False torch_cpp_ext_lib = None @@ -46,7 +48,7 @@ def get_c_context(self): else: return id(self) - def set_tensor(self, t: torch.Tensor): + def set_tensor(self, t: "torch.Tensor"): self.tensor = t def get_handle(self): diff --git a/python/pylibwholegraph/pylibwholegraph/torch/wholegraph_ops.py b/python/pylibwholegraph/pylibwholegraph/torch/wholegraph_ops.py index c6808010..70b61ac4 100644 --- a/python/pylibwholegraph/pylibwholegraph/torch/wholegraph_ops.py +++ b/python/pylibwholegraph/pylibwholegraph/torch/wholegraph_ops.py @@ -1,8 +1,8 @@ -# SPDX-FileCopyrightText: Copyright (c) 2019-2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 -import torch import pylibwholegraph.binding.wholememory_binding as wmb +from pylibwholegraph.utils.imports import import_optional from .wholegraph_env import ( get_stream, TorchMemoryContext, @@ -12,11 +12,13 @@ from typing import Union import random +torch = import_optional("torch") + def unweighted_sample_without_replacement( wm_csr_row_ptr_tensor: wmb.PyWholeMemoryTensor, wm_csr_col_ptr_tensor: wmb.PyWholeMemoryTensor, - center_nodes_tensor: torch.Tensor, + center_nodes_tensor: "torch.Tensor", max_sample_count: int, random_seed: Union[int, None] = None, need_center_local_output: bool = False, @@ -85,7 +87,7 @@ def weighted_sample_without_replacement( wm_csr_row_ptr_tensor: wmb.PyWholeMemoryTensor, wm_csr_col_ptr_tensor: wmb.PyWholeMemoryTensor, wm_csr_weight_ptr_tensor: wmb.PyWholeMemoryTensor, - center_nodes_tensor: torch.Tensor, + center_nodes_tensor: "torch.Tensor", max_sample_count: int, random_seed: Union[int, None] = None, need_center_local_output: bool = False, diff --git a/python/pylibwholegraph/pylibwholegraph/torch/wholememory_ops.py b/python/pylibwholegraph/pylibwholegraph/torch/wholememory_ops.py index 9cb518c5..dfcf7041 100644 --- a/python/pylibwholegraph/pylibwholegraph/torch/wholememory_ops.py +++ b/python/pylibwholegraph/pylibwholegraph/torch/wholememory_ops.py @@ -1,8 +1,8 @@ -# SPDX-FileCopyrightText: Copyright (c) 2019-2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 -import torch import pylibwholegraph.binding.wholememory_binding as wmb +from pylibwholegraph.utils.imports import import_optional from .wholegraph_env import ( get_stream, get_wholegraph_env_fns, @@ -10,10 +10,12 @@ ) from .utils import wholememory_dtype_to_torch_dtype +torch = import_optional("torch") + def wholememory_gather_forward_functor( wholememory_tensor: wmb.PyWholeMemoryTensor, - indices_tensor: torch.Tensor, + indices_tensor: "torch.Tensor", requires_grad=False, torch_output_dtype=None, ): @@ -48,8 +50,8 @@ def wholememory_gather_forward_functor( def wholememory_scatter_functor( - input_tensor: torch.Tensor, - indices_tensor: torch.Tensor, + input_tensor: "torch.Tensor", + indices_tensor: "torch.Tensor", wholememory_tensor: wmb.PyWholeMemoryTensor, ): """ diff --git a/python/pylibwholegraph/pylibwholegraph/utils/imports.py b/python/pylibwholegraph/pylibwholegraph/utils/imports.py new file mode 100644 index 00000000..67be22fe --- /dev/null +++ b/python/pylibwholegraph/pylibwholegraph/utils/imports.py @@ -0,0 +1,46 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. +# SPDX-License-Identifier: Apache-2.0 + +from importlib import import_module + + +class MissingModule: + """ + Raises RuntimeError when any attribute is accessed on instances of this + class. + + Instances of this class are returned by import_optional() when a module + cannot be found, which allows for code to import optional dependencies, and + have only the code paths that use the module affected. + """ + + def __init__(self, mod_name): + self.name = mod_name + + def __getattr__(self, attr): + raise RuntimeError(f"This feature requires the '{self.name}' package/module") + + +def import_optional(mod, default_mod_class=MissingModule): + """ + import the "optional" module 'mod' and return the module object or object. + If the import raises ModuleNotFoundError, returns an instance of + default_mod_class. + + This method was written to support importing "optional" dependencies so + code can be written to run even if the dependency is not installed. + + Example + ------- + >> from pylibwholegraph.utils.imports import import_optional + >> torch = import_optional("torch") # torch is not installed + >> torch.set_num_threads(1) + Traceback (most recent call last): + File "", line 1, in + ... + RuntimeError: This feature requires the 'torch' package/module + """ + try: + return import_module(mod) + except ModuleNotFoundError: + return default_mod_class(mod_name=mod) From 104b8bfe46011e52410319c19621126554e87068 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Tue, 10 Mar 2026 14:21:23 -0500 Subject: [PATCH 15/43] more changes --- ci/test_wheel_cugraph-pyg.sh | 17 ++++++++--------- ci/test_wheel_pylibwholegraph.sh | 13 +++++++++++-- pyproject.toml | 6 ++++-- .../cugraph-pyg/cugraph_pyg/tests/conftest.py | 17 +++++++++++------ .../tests/tensor/test_dist_matrix_mg.py | 8 +++++--- .../pylibwholegraph/test_utils/test_comm.py | 8 ++++++-- .../pylibwholegraph/tests/conftest.py | 2 +- .../pylibwholegraph/test_wholememory_io.py | 2 ++ .../ops/test_graph_append_unique.py | 2 ++ ...graph_weighted_sample_without_replacement.py | 4 ++-- .../pylibwholegraph/torch/distributed_launch.py | 9 +++++---- .../pylibwholegraph/torch/wholegraph_env.py | 2 -- 12 files changed, 57 insertions(+), 33 deletions(-) diff --git a/ci/test_wheel_cugraph-pyg.sh b/ci/test_wheel_cugraph-pyg.sh index 64ad708d..4aed143f 100755 --- a/ci/test_wheel_cugraph-pyg.sh +++ b/ci/test_wheel_cugraph-pyg.sh @@ -43,8 +43,10 @@ TORCH_WHEEL_DIR="$(mktemp -d)" # 'cugraph-pyg' is still expected to be importable # and testable in an environment where 'torch' isn't installed. +torch_installed=true if [ -z "$(ls -A ${TORCH_WHEEL_DIR} 2>/dev/null)" ]; then rapids-echo-stderr "No 'torch' wheels downloaded." + torch_installed=false else PIP_INSTALL_ARGS+=("${TORCH_WHEEL_DIR}"/torch-*.whl) fi @@ -58,10 +60,6 @@ fi rapids-pip-retry install \ "${PIP_INSTALL_ARGS[@]}" -python -c "import cugraph_pyg" -echo "--- DONE ---" -exit 0 - # RAPIDS_DATASET_ROOT_DIR is used by test scripts export RAPIDS_DATASET_ROOT_DIR="$(realpath datasets)" mkdir -p "${RAPIDS_DATASET_ROOT_DIR}" @@ -72,11 +70,12 @@ popd # Enable legacy behavior of torch.load for examples relying on ogb export TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD=1 -rapids-logger "pytest cugraph-pyg (single GPU)" -./ci/run_cugraph_pyg_pytests.sh +if [[ "${torch_installed}" == "true" ]]; then + rapids-logger "pytest cugraph-pyg (single GPU, with 'torch')" + ./ci/run_cugraph_pyg_pytests.sh +fi -rapids-logger "testing that cugraph-pyg is importable without 'torch'" +rapids-logger "pytest cugraph-pyg (no 'torch')" pip uninstall --yes 'torch' python -c "import cugraph_pyg; print(cugraph_pyg.__version__)" - -popd +./ci/run_cugraph_pyg_pytests.sh diff --git a/ci/test_wheel_pylibwholegraph.sh b/ci/test_wheel_pylibwholegraph.sh index e97cda35..6125f20d 100755 --- a/ci/test_wheel_pylibwholegraph.sh +++ b/ci/test_wheel_pylibwholegraph.sh @@ -36,8 +36,10 @@ TORCH_WHEEL_DIR="$(mktemp -d)" # 'cugraph-pyg' is still expected to be importable # and testable in an environment where 'torch' isn't installed. +torch_installed=true if [ -z "$(ls -A ${TORCH_WHEEL_DIR} 2>/dev/null)" ]; then rapids-echo-stderr "No 'torch' wheels downloaded." + torch_installed=false else PIP_INSTALL_ARGS+=("${TORCH_WHEEL_DIR}"/torch-*.whl) fi @@ -47,5 +49,12 @@ rapids-logger "Installing Packages" rapids-pip-retry install \ "${PIP_INSTALL_ARGS[@]}" -rapids-logger "pytest pylibwholegraph" -ci/run_pylibwholegraph_pytests.sh +if [[ "${torch_installed}" == "true" ]]; then + rapids-logger "pytest pylibwholegraph (with 'torch')" + ./ci/run_pylibwholegraph_pytests.sh +fi + +rapids-logger "pytest pylibwholegraph (no 'torch')" +pip uninstall --yes 'torch' +python -c "import pylibwholegraph; print(pylibwholegraph.__version__)" +./ci/run_pylibwholegraph_pytests.sh diff --git a/pyproject.toml b/pyproject.toml index b038729a..97a06025 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,9 @@ exclude = [ [tool.ruff.lint] ignore = [ # whitespace before : - "E203", + "E203" +] +select = [ # (flake8-tidy-imports) banned-api "TID251" ] @@ -25,6 +27,6 @@ ignore = [ ] # allow importing 'torch' directly in pylibwholegraph examples -"python/pylibwholegraph/examples/" = [ +"python/pylibwholegraph/examples/*" = [ "TID251" ] diff --git a/python/cugraph-pyg/cugraph_pyg/tests/conftest.py b/python/cugraph-pyg/cugraph_pyg/tests/conftest.py index f480aeb8..594dc3b7 100644 --- a/python/cugraph-pyg/cugraph_pyg/tests/conftest.py +++ b/python/cugraph-pyg/cugraph_pyg/tests/conftest.py @@ -1,9 +1,8 @@ -# SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 import pytest import os -import torch from pylibcugraph.comms import ( @@ -28,8 +27,14 @@ gpubenchmark = pytest_benchmark.plugin.benchmark +@pytest.fixture(scope="function") +def torch(): + """Pass this to any test case that needs 'torch' to be installed""" + return pytest.importorskip("torch") + + @pytest.fixture(scope="module") -def single_pytorch_worker(): +def single_pytorch_worker(torch): os.environ["MASTER_ADDR"] = "localhost" os.environ["MASTER_PORT"] = "12355" os.environ["LOCAL_RANK"] = "0" @@ -44,14 +49,14 @@ def single_pytorch_worker(): @pytest.fixture -def basic_pyg_graph_1(): +def basic_pyg_graph_1(torch): edge_index = torch.tensor([[0, 1, 2, 3], [0, 0, 1, 1]]) size = (4, 4) return edge_index, size @pytest.fixture -def basic_pyg_graph_2(): +def basic_pyg_graph_2(torch): edge_index = torch.tensor( [ [0, 1, 0, 2, 3, 0, 4, 0, 5, 0, 6, 7, 0, 8, 9], @@ -63,7 +68,7 @@ def basic_pyg_graph_2(): @pytest.fixture -def sample_pyg_hetero_data(): +def sample_pyg_hetero_data(torch): torch.manual_seed(12345) raw_data_dict = { "v0": torch.randn(6, 3), diff --git a/python/cugraph-pyg/cugraph_pyg/tests/tensor/test_dist_matrix_mg.py b/python/cugraph-pyg/cugraph_pyg/tests/tensor/test_dist_matrix_mg.py index 0ef4ca00..ae2d050e 100644 --- a/python/cugraph-pyg/cugraph_pyg/tests/tensor/test_dist_matrix_mg.py +++ b/python/cugraph-pyg/cugraph_pyg/tests/tensor/test_dist_matrix_mg.py @@ -1,10 +1,9 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 import os import pytest -import torch from cugraph_pyg.tensor import DistMatrix from pylibwholegraph.torch.initialize import init as wm_init @@ -13,6 +12,7 @@ def run_test_dist_matrix_creation(rank, world_size, device): """Test basic DistMatrix creation from tensors""" + torch = pytest.importorskip("torch") torch.cuda.set_device(rank) os.environ["MASTER_ADDR"] = "localhost" @@ -55,6 +55,7 @@ def run_test_dist_matrix_creation(rank, world_size, device): def run_test_dist_matrix_empty_creation(rank, world_size, device): """Test DistMatrix creation with empty initialization""" + torch = pytest.importorskip("torch") torch.cuda.set_device(rank) os.environ["MASTER_ADDR"] = "localhost" @@ -102,6 +103,7 @@ def run_test_dist_matrix_empty_creation(rank, world_size, device): def run_test_dist_matrix_invalid_cases(rank, world_size, device): """Test DistMatrix creation with invalid cases""" + torch = pytest.importorskip("torch") torch.cuda.set_device(rank) os.environ["MASTER_ADDR"] = "localhost" @@ -138,7 +140,7 @@ def run_test_dist_matrix_invalid_cases(rank, world_size, device): @pytest.mark.parametrize("device", ["cpu", "cuda"]) -def test_dist_matrix(device): +def test_dist_matrix(device, torch): """Run all DistMatrix tests""" world_size = torch.cuda.device_count() diff --git a/python/pylibwholegraph/pylibwholegraph/test_utils/test_comm.py b/python/pylibwholegraph/pylibwholegraph/test_utils/test_comm.py index 3197b53f..5860fbd5 100644 --- a/python/pylibwholegraph/pylibwholegraph/test_utils/test_comm.py +++ b/python/pylibwholegraph/pylibwholegraph/test_utils/test_comm.py @@ -1,8 +1,8 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 -import torch import numpy as np +import pytest import pylibwholegraph.binding.wholememory_binding as wmb from pylibwholegraph.torch.dlpack_utils import torch_import_from_dlpack from packaging import version @@ -17,6 +17,7 @@ def gen_csr_format_from_dense_matrix( csr_col_dtype, weight_dtype, ): + torch = pytest.importorskip("torch") row_num = matrix_tensor.shape[0] col_num = matrix_tensor.shape[1] assert row_num == graph_node_count @@ -49,6 +50,7 @@ def gen_csr_graph( csr_col_dtype=torch.int32, weight_dtype=torch.float32, ): + torch = pytest.importorskip("torch") if neighbor_node_count is None: neighbor_node_count = graph_node_count all_count = graph_node_count * neighbor_node_count @@ -95,6 +97,7 @@ def host_sample_all_neighbors( col_id_dtype, total_sample_count, ): + torch = pytest.importorskip("torch") output_dest_tensor = torch.empty((total_sample_count,), dtype=col_id_dtype) output_center_localid_tensor = torch.empty((total_sample_count,), dtype=torch.int32) output_edge_gid_tensor = torch.empty((total_sample_count,), dtype=torch.int64) @@ -133,6 +136,7 @@ def copy_host_1D_tensor_to_wholememory( def host_get_sample_offset_tensor(host_csr_row_ptr, center_nodes, max_sample_count): + torch = pytest.importorskip("torch") center_nodes_count = center_nodes.size(0) output_sample_offset_tensor = torch.empty( (center_nodes_count + 1,), dtype=torch.int32 diff --git a/python/pylibwholegraph/pylibwholegraph/tests/conftest.py b/python/pylibwholegraph/pylibwholegraph/tests/conftest.py index 1d80ddf8..4032ca07 100644 --- a/python/pylibwholegraph/pylibwholegraph/tests/conftest.py +++ b/python/pylibwholegraph/pylibwholegraph/tests/conftest.py @@ -4,7 +4,7 @@ import pytest -@pytest.fixture +@pytest.fixture(scope="function") def torch(): """Pass this to any test case that needs 'torch' to be installed""" return pytest.importorskip("torch") diff --git a/python/pylibwholegraph/pylibwholegraph/tests/pylibwholegraph/test_wholememory_io.py b/python/pylibwholegraph/pylibwholegraph/tests/pylibwholegraph/test_wholememory_io.py index 29380240..bf093dca 100644 --- a/python/pylibwholegraph/pylibwholegraph/tests/pylibwholegraph/test_wholememory_io.py +++ b/python/pylibwholegraph/pylibwholegraph/tests/pylibwholegraph/test_wholememory_io.py @@ -294,6 +294,7 @@ def store_routine_func( storage_offset, entry_partition, ): + torch = pytest.importorskip("torch") (wm_comm, _) = init_torch_env_and_create_wm_comm( world_rank, world_size, world_rank, world_size ) @@ -347,6 +348,7 @@ def test_wholememory_store( embedding_stride, storage_offset, partition_method, + torch, ): if embedding_stride < storage_offset + embedding_dim: pytest.skip( diff --git a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_append_unique.py b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_append_unique.py index 5856d943..77c2dfdb 100644 --- a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_append_unique.py +++ b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_append_unique.py @@ -4,6 +4,8 @@ import pytest import pylibwholegraph.torch.graph_ops as wg_ops +torch = pytest.importorski("torch") + def host_neighbor_raw_to_unique(unique_node_tensor, neighbor_node_tensor): torch = pytest.importorskip("torch") diff --git a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_weighted_sample_without_replacement.py b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_weighted_sample_without_replacement.py index 1f47f7fc..a597f5d7 100644 --- a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_weighted_sample_without_replacement.py +++ b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_weighted_sample_without_replacement.py @@ -355,7 +355,7 @@ def routine_func(world_rank: int, world_size: int, **kwargs): @pytest.mark.parametrize("graph_edge_count", [1043]) @pytest.mark.parametrize("max_sample_count", [11]) @pytest.mark.parametrize("center_node_count", [13]) -@pytest.mark.parametrize("center_node_dtype", [torch.int32, torch.int64]) +@pytest.mark.parametrize("center_node_dtype", ["int32", "int64"]) @pytest.mark.parametrize("col_id_dtype", [0, 1]) @pytest.mark.parametrize("csr_weight_dtype", [2, 3]) @pytest.mark.parametrize("wholememory_location", ([0, 1])) @@ -393,7 +393,7 @@ def test_wholegraph_weighted_sample( graph_edge_count=graph_edge_count, max_sample_count=max_sample_count, center_node_count=center_node_count, - center_node_dtype=center_node_dtype, + center_node_dtype=getattr(torch, center_node_dtype), col_id_dtype=col_id_dtype, csr_weight_dtype=csr_weight_dtype, wholememory_location=wholememory_location, diff --git a/python/pylibwholegraph/pylibwholegraph/torch/distributed_launch.py b/python/pylibwholegraph/pylibwholegraph/torch/distributed_launch.py index e7990546..50dcaae0 100644 --- a/python/pylibwholegraph/pylibwholegraph/torch/distributed_launch.py +++ b/python/pylibwholegraph/pylibwholegraph/torch/distributed_launch.py @@ -1,9 +1,12 @@ -# SPDX-FileCopyrightText: Copyright (c) 2019-2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 +from pylibwholegraph.utils.imports import import_optional import os from argparse import ArgumentParser +torch = import_optional("torch") + class DistributedConfig(object): def __init__(self): @@ -281,10 +284,8 @@ def distributed_launch_spawn(args, main_func): ) ) - import torch.multiprocessing as mp - if distributed_config.local_size > 1: - mp.spawn( + torch.multiprocessing.spawn( main_spawn_routine, nprocs=distributed_config.local_size, args=(main_func, distributed_config), diff --git a/python/pylibwholegraph/pylibwholegraph/torch/wholegraph_env.py b/python/pylibwholegraph/pylibwholegraph/torch/wholegraph_env.py index 8829b9d7..72f0d8fd 100644 --- a/python/pylibwholegraph/pylibwholegraph/torch/wholegraph_env.py +++ b/python/pylibwholegraph/pylibwholegraph/torch/wholegraph_env.py @@ -173,8 +173,6 @@ def get_cpp_extension_src_path(): def compile_cpp_extension(): - import torch.utils.cpp_extension - global torch_cpp_ext_loaded global torch_cpp_ext_lib cpp_extension_path = os.path.join(get_cpp_extension_src_path(), "torch_cpp_ext") From 4b479f7a5d5b39b6eb8c11a2dee16754670ec92c Mon Sep 17 00:00:00 2001 From: James Lamb Date: Tue, 10 Mar 2026 15:45:07 -0500 Subject: [PATCH 16/43] more torch fixes... unconditional references in argument defaults --- ci/test_python.sh | 12 +++++++ ci/test_wheel_cugraph-pyg.sh | 36 +++++++++---------- .../pylibwholegraph/test_utils/test_comm.py | 9 ++--- .../ops/test_graph_add_csr_self_loop.py | 3 +- ...h_unweighted_sample_without_replacement.py | 7 +++- ...aph_weighted_sample_without_replacement.py | 7 +++- 6 files changed, 49 insertions(+), 25 deletions(-) diff --git a/ci/test_python.sh b/ci/test_python.sh index 09aeb27e..faff188e 100755 --- a/ci/test_python.sh +++ b/ci/test_python.sh @@ -55,6 +55,12 @@ if [[ "${RUNNER_ARCH}" != "ARM64" ]]; then rapids-logger "Check GPU usage" nvidia-smi + # 'torch' is an optional dependency of 'cugraph_pyg'... confirm that it's available + # here, to reduce the risk of accidentally skipping most tests because it accidentally + # wasn't installed. + rapids-logger "Confirming that PyTorch is installed" + python -c "import torch; assert torch.cuda.is_available() is True" + rapids-logger "pytest cugraph_pyg (single GPU)" ./ci/run_cugraph_pyg_pytests.sh \ --junitxml="${RAPIDS_TESTS_DIR}/junit-cugraph-pyg.xml" \ @@ -88,6 +94,12 @@ if [[ "${RUNNER_ARCH}" != "ARM64" ]]; then conda activate test_pylibwholegraph set -u + # 'torch' is an optional dependency of 'cugraph_pyg'... confirm that it's available + # here, to reduce the risk of accidentally skipping most tests because it accidentally + # wasn't installed. + rapids-logger "Confirming that PyTorch is installed" + python -c "import torch; assert torch.cuda.is_available() is True" + rapids-print-env rapids-logger "Check GPU usage" diff --git a/ci/test_wheel_cugraph-pyg.sh b/ci/test_wheel_cugraph-pyg.sh index 4aed143f..f85ec63d 100755 --- a/ci/test_wheel_cugraph-pyg.sh +++ b/ci/test_wheel_cugraph-pyg.sh @@ -11,19 +11,19 @@ package_name="cugraph-pyg" RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" # Download the libwholegraph, pylibwholegraph, and cugraph-pyg built in the previous step -# LIBWHOLEGRAPH_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github cpp) -# PYLIBWHOLEGRAPH_WHEELHOUSE=$(rapids-download-from-github "$(rapids-package-name "wheel_python" pylibwholegraph --stable --cuda "$RAPIDS_CUDA_VERSION")") -# CUGRAPH_PYG_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-github python) -CUGRAPH_GNN_COMMIT=5a4064e7bf27a2548b32012375996f976d23e4e9 -CUGRAPH_PYG_WHEELHOUSE=$( - RAPIDS_PY_WHEEL_NAME="cugraph-pyg_cu12" RAPIDS_PY_WHEEL_PURE="1" rapids-get-pr-artifact cugraph-gnn 425 python wheel "${CUGRAPH_GNN_COMMIT}" -) -LIBWHOLEGRAPH_WHEELHOUSE=$( - RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-get-pr-artifact cugraph-gnn 425 cpp wheel "${CUGRAPH_GNN_COMMIT}" -) -PYLIBWHOLEGRAPH_WHEELHOUSE=$( - rapids-get-pr-artifact cugraph-gnn 425 python wheel --pkg_name pylibwholegraph --stable "${CUGRAPH_GNN_COMMIT}" -) +LIBWHOLEGRAPH_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github cpp) +PYLIBWHOLEGRAPH_WHEELHOUSE=$(rapids-download-from-github "$(rapids-package-name "wheel_python" pylibwholegraph --stable --cuda "$RAPIDS_CUDA_VERSION")") +CUGRAPH_PYG_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-github python) +# CUGRAPH_GNN_COMMIT=5a4064e7bf27a2548b32012375996f976d23e4e9 +# CUGRAPH_PYG_WHEELHOUSE=$( +# RAPIDS_PY_WHEEL_NAME="cugraph-pyg_cu12" RAPIDS_PY_WHEEL_PURE="1" rapids-get-pr-artifact cugraph-gnn 425 python wheel "${CUGRAPH_GNN_COMMIT}" +# ) +# LIBWHOLEGRAPH_WHEELHOUSE=$( +# RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-get-pr-artifact cugraph-gnn 425 cpp wheel "${CUGRAPH_GNN_COMMIT}" +# ) +# PYLIBWHOLEGRAPH_WHEELHOUSE=$( +# rapids-get-pr-artifact cugraph-gnn 425 python wheel --pkg_name pylibwholegraph --stable "${CUGRAPH_GNN_COMMIT}" +# ) # generate constraints (possibly pinning to oldest support versions of dependencies) rapids-generate-pip-constraints test_cugraph_pyg "${PIP_CONSTRAINT}" @@ -61,11 +61,11 @@ rapids-pip-retry install \ "${PIP_INSTALL_ARGS[@]}" # RAPIDS_DATASET_ROOT_DIR is used by test scripts -export RAPIDS_DATASET_ROOT_DIR="$(realpath datasets)" -mkdir -p "${RAPIDS_DATASET_ROOT_DIR}" -pushd "${RAPIDS_DATASET_ROOT_DIR}" -./get_test_data.sh --test -popd +# export RAPIDS_DATASET_ROOT_DIR="$(realpath datasets)" +# mkdir -p "${RAPIDS_DATASET_ROOT_DIR}" +# pushd "${RAPIDS_DATASET_ROOT_DIR}" +# ./get_test_data.sh --test +# popd # Enable legacy behavior of torch.load for examples relying on ogb export TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD=1 diff --git a/python/pylibwholegraph/pylibwholegraph/test_utils/test_comm.py b/python/pylibwholegraph/pylibwholegraph/test_utils/test_comm.py index 5860fbd5..bbfd2163 100644 --- a/python/pylibwholegraph/pylibwholegraph/test_utils/test_comm.py +++ b/python/pylibwholegraph/pylibwholegraph/test_utils/test_comm.py @@ -45,10 +45,11 @@ def gen_csr_format_from_dense_matrix( def gen_csr_graph( graph_node_count, graph_edge_count, - neighbor_node_count=None, - csr_row_dtype=torch.int64, - csr_col_dtype=torch.int32, - weight_dtype=torch.float32, + *, + neighbor_node_count, + csr_row_dtype, + csr_col_dtype, + weight_dtype, ): torch = pytest.importorskip("torch") if neighbor_node_count is None: diff --git a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_add_csr_self_loop.py b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_add_csr_self_loop.py index 85cd0417..94e9b2c9 100644 --- a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_add_csr_self_loop.py +++ b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_add_csr_self_loop.py @@ -37,8 +37,9 @@ def routine_func(**kwargs): target_node_count, edge_num, neighbor_node_count, - csr_row_dtype=torch.int32, + csr_row_dtype=torch.int64, csr_col_dtype=torch.int32, + weight_dtype=torch.float32, ) csr_row_ptr_tensor_cuda = csr_row_ptr_tensor.cuda() csr_col_ptr_tensor_cuda = csr_col_ptr_tensor.cuda() diff --git a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_unweighted_sample_without_replacement.py b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_unweighted_sample_without_replacement.py index 6719f5ea..b4439d42 100644 --- a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_unweighted_sample_without_replacement.py +++ b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_unweighted_sample_without_replacement.py @@ -380,7 +380,12 @@ def test_wholegraph_unweighted_sample( if col_id_dtype == wmb.WholeMemoryDataType.DtInt64: csr_col_dtype = torch.int64 host_csr_row_ptr, host_csr_col_ptr, _ = gen_csr_graph( - graph_node_count, graph_edge_count, csr_col_dtype=csr_col_dtype + graph_node_count, + graph_edge_count, + graph_node_count=None, + csr_row_dtype=torch.int64, + csr_col_dtype=csr_col_dtype, + weight_dtype=torch.float32, ) routine_func_partial = partial( routine_func, diff --git a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_weighted_sample_without_replacement.py b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_weighted_sample_without_replacement.py index a597f5d7..7e473f60 100644 --- a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_weighted_sample_without_replacement.py +++ b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_weighted_sample_without_replacement.py @@ -382,7 +382,12 @@ def test_wholegraph_weighted_sample( if col_id_dtype == 1: csr_col_dtype = torch.int64 host_csr_row_ptr, host_csr_col_ptr, host_csr_weight_ptr = gen_csr_graph( - graph_node_count, graph_edge_count, csr_col_dtype=csr_col_dtype + graph_node_count, + graph_edge_count, + neighbor_node_count=None, + csr_row_dtype=torch.int64, + csr_col_dtype=csr_col_dtype, + weight_dtype=torch.float32, ) routine_func_partial = partial( routine_func, From 7bbf218fe375fa26f1844a6d4a9188b437d6b06c Mon Sep 17 00:00:00 2001 From: James Lamb Date: Tue, 10 Mar 2026 16:38:56 -0500 Subject: [PATCH 17/43] handle more unconditional 'torch' references (this time in type hints) --- ci/run_cugraph_pyg_pytests.sh | 2 +- ci/run_pylibwholegraph_pytests.sh | 4 ++-- ci/test_wheel_cugraph-pyg.sh | 14 +++++++------ dependencies.yaml | 9 --------- .../cugraph_pyg/tensor/dist_matrix.py | 20 +++++++++---------- .../cugraph-pyg/cugraph_pyg/tests/conftest.py | 2 +- .../pylibwholegraph/torch/embedding.py | 6 +++--- .../pylibwholegraph/torch/tensor.py | 2 +- .../pylibwholegraph/torch/wholegraph_env.py | 2 +- 9 files changed, 27 insertions(+), 34 deletions(-) diff --git a/ci/run_cugraph_pyg_pytests.sh b/ci/run_cugraph_pyg_pytests.sh index 2635d755..cb281a29 100755 --- a/ci/run_cugraph_pyg_pytests.sh +++ b/ci/run_cugraph_pyg_pytests.sh @@ -7,7 +7,7 @@ set -euo pipefail # Support invoking run_cugraph_pyg_pytests.sh outside the script directory cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/cugraph-pyg/cugraph_pyg -pytest --cache-clear --benchmark-disable "$@" . +pytest --cache-clear --benchmark-disable "$@" # Test examples (disabled due to lack of memory) #for e in "$(pwd)"/examples/*.py; do diff --git a/ci/run_pylibwholegraph_pytests.sh b/ci/run_pylibwholegraph_pytests.sh index d9c858e1..8bc88e3c 100755 --- a/ci/run_pylibwholegraph_pytests.sh +++ b/ci/run_pylibwholegraph_pytests.sh @@ -1,5 +1,5 @@ #!/bin/bash -# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 set -euo pipefail @@ -7,4 +7,4 @@ set -euo pipefail # Support invoking run_pytests.sh outside the script directory cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/pylibwholegraph/pylibwholegraph/ -pytest --cache-clear --forked --import-mode=append "$@" tests +pytest --cache-clear --forked --import-mode=append "$@" diff --git a/ci/test_wheel_cugraph-pyg.sh b/ci/test_wheel_cugraph-pyg.sh index f85ec63d..6de536cd 100755 --- a/ci/test_wheel_cugraph-pyg.sh +++ b/ci/test_wheel_cugraph-pyg.sh @@ -14,7 +14,7 @@ RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" LIBWHOLEGRAPH_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github cpp) PYLIBWHOLEGRAPH_WHEELHOUSE=$(rapids-download-from-github "$(rapids-package-name "wheel_python" pylibwholegraph --stable --cuda "$RAPIDS_CUDA_VERSION")") CUGRAPH_PYG_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-github python) -# CUGRAPH_GNN_COMMIT=5a4064e7bf27a2548b32012375996f976d23e4e9 +# CUGRAPH_GNN_COMMIT=104b8bfe46011e52410319c19621126554e87068 # CUGRAPH_PYG_WHEELHOUSE=$( # RAPIDS_PY_WHEEL_NAME="cugraph-pyg_cu12" RAPIDS_PY_WHEEL_PURE="1" rapids-get-pr-artifact cugraph-gnn 425 python wheel "${CUGRAPH_GNN_COMMIT}" # ) @@ -61,11 +61,11 @@ rapids-pip-retry install \ "${PIP_INSTALL_ARGS[@]}" # RAPIDS_DATASET_ROOT_DIR is used by test scripts -# export RAPIDS_DATASET_ROOT_DIR="$(realpath datasets)" -# mkdir -p "${RAPIDS_DATASET_ROOT_DIR}" -# pushd "${RAPIDS_DATASET_ROOT_DIR}" -# ./get_test_data.sh --test -# popd +export RAPIDS_DATASET_ROOT_DIR="$(realpath datasets)" +mkdir -p "${RAPIDS_DATASET_ROOT_DIR}" +pushd "${RAPIDS_DATASET_ROOT_DIR}" +./get_test_data.sh --test +popd # Enable legacy behavior of torch.load for examples relying on ogb export TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD=1 @@ -77,5 +77,7 @@ fi rapids-logger "pytest cugraph-pyg (no 'torch')" pip uninstall --yes 'torch' + +PYTHONPATH=/opt/work/python/cugraph-pyg/ \ python -c "import cugraph_pyg; print(cugraph_pyg.__version__)" ./ci/run_cugraph_pyg_pytests.sh diff --git a/dependencies.yaml b/dependencies.yaml index 0c3023c0..abc150fd 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -385,15 +385,6 @@ dependencies: # Default to falling back to whatever 'pytorch' is pulled in via cugraph-pyg's dependencies. - matrix: packages: - - output_types: pyproject - matrices: - # avoid pulling in 'torch' in places like DLFW builds that prefer to install it other ways - - matrix: - no_pytorch: "true" - packages: - - matrix: - packages: - - &pytorch_pip torch>=2.9.0 # wheels: handle GPU vs. CPU and version pinning together # # The 'pytorch.org' indices referenced in --extra-index-url below host CPU-only variants too, diff --git a/python/cugraph-pyg/cugraph_pyg/tensor/dist_matrix.py b/python/cugraph-pyg/cugraph_pyg/tensor/dist_matrix.py index ec331f3d..c9560ff4 100644 --- a/python/cugraph-pyg/cugraph_pyg/tensor/dist_matrix.py +++ b/python/cugraph-pyg/cugraph_pyg/tensor/dist_matrix.py @@ -18,14 +18,14 @@ def __init__( self, src: Optional[ Union[ - Tuple[torch.Tensor, torch.Tensor], + Tuple["torch.Tensor", "torch.Tensor"], Tuple[DistTensor, DistTensor], str, List[str], ] ] = None, shape: Optional[Union[list, tuple]] = None, - dtype: Optional[torch.dtype] = None, + dtype: Optional["torch.dtype"] = None, device: Optional[Literal["cpu", "cuda"]] = "cpu", backend: Optional[Literal["nccl", "vmm"]] = "nccl", format: Optional[Literal["csc", "coo"]] = "coo", @@ -82,8 +82,8 @@ def __init__( def __setitem__( self, - idx: Union[torch.Tensor, slice], - val: Union[torch.Tensor, tuple[torch.Tensor, torch.Tensor]], + idx: Union["torch.Tensor", slice], + val: Union["torch.Tensor", tuple["torch.Tensor", "torch.Tensor"]], ): if isinstance(idx, slice): size = self._col.shape[0] @@ -106,7 +106,7 @@ def __setitem__( self._col[idx] = val[0] self._row[idx] = val[1] - def __getitem__(self, idx: "torch.Tensor") -> torch.Tensor: + def __getitem__(self, idx: "torch.Tensor") -> "torch.Tensor": if self._format != "coo": raise ValueError("Getting is currently only supported for COO format") if idx.dim() != 1: @@ -114,11 +114,11 @@ def __getitem__(self, idx: "torch.Tensor") -> torch.Tensor: return torch.stack([self._col[idx], self._row[idx]]) - def get_local_tensor(self) -> Tuple[torch.Tensor, torch.Tensor]: + def get_local_tensor(self) -> Tuple["torch.Tensor", "torch.Tensor"]: return (self._col.get_local_tensor(), self._row.get_local_tensor()) @property - def local_col(self) -> torch.Tensor: + def local_col(self) -> "torch.Tensor": world_size = torch.distributed.get_world_size() rank = torch.distributed.get_rank() @@ -134,7 +134,7 @@ def local_col(self) -> torch.Tensor: return self._col[ix] @property - def local_row(self) -> torch.Tensor: + def local_row(self) -> "torch.Tensor": world_size = torch.distributed.get_world_size() rank = torch.distributed.get_rank() @@ -150,7 +150,7 @@ def local_row(self) -> torch.Tensor: return self._row[ix] @property - def local_coo(self) -> torch.Tensor: + def local_coo(self) -> "torch.Tensor": return torch.stack([self.local_col, self.local_row]) @property @@ -158,5 +158,5 @@ def shape(self) -> Tuple[int, int]: return (self._col.shape[0], self._row.shape[0]) @property - def dtype(self) -> torch.dtype: + def dtype(self) -> "torch.dtype": return self._col.dtype diff --git a/python/cugraph-pyg/cugraph_pyg/tests/conftest.py b/python/cugraph-pyg/cugraph_pyg/tests/conftest.py index 594dc3b7..81864fbd 100644 --- a/python/cugraph-pyg/cugraph_pyg/tests/conftest.py +++ b/python/cugraph-pyg/cugraph_pyg/tests/conftest.py @@ -27,7 +27,7 @@ gpubenchmark = pytest_benchmark.plugin.benchmark -@pytest.fixture(scope="function") +@pytest.fixture(scope="module") def torch(): """Pass this to any test case that needs 'torch' to be installed""" return pytest.importorskip("torch") diff --git a/python/pylibwholegraph/pylibwholegraph/torch/embedding.py b/python/pylibwholegraph/pylibwholegraph/torch/embedding.py index 93bba90a..9dfdf600 100644 --- a/python/pylibwholegraph/pylibwholegraph/torch/embedding.py +++ b/python/pylibwholegraph/pylibwholegraph/torch/embedding.py @@ -218,7 +218,7 @@ def forward( dummy_input: "torch.Tensor", wm_embedding, is_training: bool = False, - force_dtype: Union[torch.dtype, None] = None, + force_dtype: Union["torch.dtype", None] = None, ): output_tensor = wm_embedding.gather( indice, is_training=is_training, force_dtype=force_dtype @@ -279,7 +279,7 @@ def gather( indice: "torch.Tensor", *, is_training: bool = False, - force_dtype: Union[torch.dtype, None] = None, + force_dtype: Union["torch.dtype", None] = None, ): assert indice.dim() == 1 embedding_dim = self.get_embedding_tensor().shape[1] @@ -550,7 +550,7 @@ def __init__(self, wm_embedding: WholeMemoryEmbedding): self.embedding_gather_fn = EmbeddingLookupFn.apply def forward( - self, indice: "torch.Tensor", force_dtype: Union[torch.dtype, None] = None + self, indice: "torch.Tensor", force_dtype: Union["torch.dtype", None] = None ): return self.embedding_gather_fn( indice, diff --git a/python/pylibwholegraph/pylibwholegraph/torch/tensor.py b/python/pylibwholegraph/pylibwholegraph/torch/tensor.py index e2de562b..73710ec8 100644 --- a/python/pylibwholegraph/pylibwholegraph/torch/tensor.py +++ b/python/pylibwholegraph/pylibwholegraph/torch/tensor.py @@ -50,7 +50,7 @@ def get_comm(self): ) def gather( - self, indice: "torch.Tensor", *, force_dtype: Union[torch.dtype, None] = None + self, indice: "torch.Tensor", *, force_dtype: Union["torch.dtype", None] = None ): assert indice.dim() == 1 embedding_dim = self.shape[1] if self.dim() == 2 else 1 diff --git a/python/pylibwholegraph/pylibwholegraph/torch/wholegraph_env.py b/python/pylibwholegraph/pylibwholegraph/torch/wholegraph_env.py index 72f0d8fd..f4981d06 100644 --- a/python/pylibwholegraph/pylibwholegraph/torch/wholegraph_env.py +++ b/python/pylibwholegraph/pylibwholegraph/torch/wholegraph_env.py @@ -156,7 +156,7 @@ def get_wholegraph_env_fns(use_default=True) -> int: return wholegraph_env_context.get_env_fns() -def wrap_torch_tensor(t: Union[torch.Tensor, None]) -> wmb.WrappedLocalTensor: +def wrap_torch_tensor(t: Union["torch.Tensor", None]) -> wmb.WrappedLocalTensor: py_desc = wmb.PyWholeMemoryTensorDescription() wm_t = wmb.WrappedLocalTensor() if t is None: From 4cacebfc1058481f413b80ac4cf7e8ad84010be9 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Tue, 10 Mar 2026 16:41:23 -0500 Subject: [PATCH 18/43] revert --- ci/test_wheel_cugraph-pyg.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/ci/test_wheel_cugraph-pyg.sh b/ci/test_wheel_cugraph-pyg.sh index 6de536cd..b227b348 100755 --- a/ci/test_wheel_cugraph-pyg.sh +++ b/ci/test_wheel_cugraph-pyg.sh @@ -78,6 +78,5 @@ fi rapids-logger "pytest cugraph-pyg (no 'torch')" pip uninstall --yes 'torch' -PYTHONPATH=/opt/work/python/cugraph-pyg/ \ python -c "import cugraph_pyg; print(cugraph_pyg.__version__)" ./ci/run_cugraph_pyg_pytests.sh From 36843b6c7b669e17242c80bb7587cee49dfd7487 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Wed, 11 Mar 2026 11:43:15 -0500 Subject: [PATCH 19/43] check in debugging code temporarily --- ci/test_wheel_cugraph-pyg.sh | 34 +++++++++---------- .../pylibwholegraph/torch/embedding.py | 2 ++ test.sh | 12 +++++++ 3 files changed, 31 insertions(+), 17 deletions(-) create mode 100644 test.sh diff --git a/ci/test_wheel_cugraph-pyg.sh b/ci/test_wheel_cugraph-pyg.sh index b227b348..df6f0032 100755 --- a/ci/test_wheel_cugraph-pyg.sh +++ b/ci/test_wheel_cugraph-pyg.sh @@ -11,19 +11,19 @@ package_name="cugraph-pyg" RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" # Download the libwholegraph, pylibwholegraph, and cugraph-pyg built in the previous step -LIBWHOLEGRAPH_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github cpp) -PYLIBWHOLEGRAPH_WHEELHOUSE=$(rapids-download-from-github "$(rapids-package-name "wheel_python" pylibwholegraph --stable --cuda "$RAPIDS_CUDA_VERSION")") -CUGRAPH_PYG_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-github python) -# CUGRAPH_GNN_COMMIT=104b8bfe46011e52410319c19621126554e87068 -# CUGRAPH_PYG_WHEELHOUSE=$( -# RAPIDS_PY_WHEEL_NAME="cugraph-pyg_cu12" RAPIDS_PY_WHEEL_PURE="1" rapids-get-pr-artifact cugraph-gnn 425 python wheel "${CUGRAPH_GNN_COMMIT}" -# ) -# LIBWHOLEGRAPH_WHEELHOUSE=$( -# RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-get-pr-artifact cugraph-gnn 425 cpp wheel "${CUGRAPH_GNN_COMMIT}" -# ) -# PYLIBWHOLEGRAPH_WHEELHOUSE=$( -# rapids-get-pr-artifact cugraph-gnn 425 python wheel --pkg_name pylibwholegraph --stable "${CUGRAPH_GNN_COMMIT}" -# ) +# LIBWHOLEGRAPH_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github cpp) +# PYLIBWHOLEGRAPH_WHEELHOUSE=$(rapids-download-from-github "$(rapids-package-name "wheel_python" pylibwholegraph --stable --cuda "$RAPIDS_CUDA_VERSION")") +# CUGRAPH_PYG_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-github python) +CUGRAPH_GNN_COMMIT=4cacebfc1058481f413b80ac4cf7e8ad84010be9 +CUGRAPH_PYG_WHEELHOUSE=$( + RAPIDS_PY_WHEEL_NAME="cugraph-pyg_cu12" RAPIDS_PY_WHEEL_PURE="1" rapids-get-pr-artifact cugraph-gnn 425 python wheel "${CUGRAPH_GNN_COMMIT}" +) +LIBWHOLEGRAPH_WHEELHOUSE=$( + RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-get-pr-artifact cugraph-gnn 425 cpp wheel "${CUGRAPH_GNN_COMMIT}" +) +PYLIBWHOLEGRAPH_WHEELHOUSE=$( + rapids-get-pr-artifact cugraph-gnn 425 python wheel --pkg_name pylibwholegraph --stable "${CUGRAPH_GNN_COMMIT}" +) # generate constraints (possibly pinning to oldest support versions of dependencies) rapids-generate-pip-constraints test_cugraph_pyg "${PIP_CONSTRAINT}" @@ -62,10 +62,10 @@ rapids-pip-retry install \ # RAPIDS_DATASET_ROOT_DIR is used by test scripts export RAPIDS_DATASET_ROOT_DIR="$(realpath datasets)" -mkdir -p "${RAPIDS_DATASET_ROOT_DIR}" -pushd "${RAPIDS_DATASET_ROOT_DIR}" -./get_test_data.sh --test -popd +# mkdir -p "${RAPIDS_DATASET_ROOT_DIR}" +# pushd "${RAPIDS_DATASET_ROOT_DIR}" +# ./get_test_data.sh --test +# popd # Enable legacy behavior of torch.load for examples relying on ogb export TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD=1 diff --git a/python/pylibwholegraph/pylibwholegraph/torch/embedding.py b/python/pylibwholegraph/pylibwholegraph/torch/embedding.py index 9dfdf600..37c6de3c 100644 --- a/python/pylibwholegraph/pylibwholegraph/torch/embedding.py +++ b/python/pylibwholegraph/pylibwholegraph/torch/embedding.py @@ -539,6 +539,8 @@ def destroy_embedding(wm_embedding: WholeMemoryEmbedding): wm_embedding.wmb_embedding = None +# TODO: all of these class overrides +# AttributeError: module 'torch' has no attribute 'autograd' class WholeMemoryEmbeddingModule(torch.nn.Module): """ torch.nn.Module wrapper of WholeMemoryEmbedding diff --git a/test.sh b/test.sh new file mode 100644 index 00000000..81eeb90a --- /dev/null +++ b/test.sh @@ -0,0 +1,12 @@ + + +docker run \ + --rm \ + --gpus all \ + --env GH_TOKEN=$(gh auth token) \ + -v $(pwd):/opt/work \ + -w /opt/work \ + -it rapidsai/citestwheel:26.04-cuda12.9.1-ubuntu22.04-py3.11 \ + bash + +ci/test_wheel_cugraph-pyg.sh From 11ed00e9a3e2b7bca947e3c1f31781cc431f2ae0 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Wed, 11 Mar 2026 13:27:52 -0500 Subject: [PATCH 20/43] classes that inherit from 'torch' also need to handle the dependency being optional --- .pre-commit-config.yaml | 2 +- ci/test_wheel_cugraph-pyg.sh | 46 +++--- ci/test_wheel_pylibwholegraph.sh | 11 +- .../pylibwholegraph/torch/data_loader.py | 25 ++- .../pylibwholegraph/torch/embedding.py | 126 +++++++++------ .../pylibwholegraph/torch/gnn_model.py | 152 ++++++++++-------- 6 files changed, 215 insertions(+), 147 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e16dc623..f2eecb04 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -19,7 +19,7 @@ repos: rev: v0.14.3 hooks: - id: ruff-check - args: [--config, "pyproject.toml"] + args: [--fix, --config, "pyproject.toml"] - id: ruff-format args: [--config, "pyproject.toml"] - repo: https://github.com/asottile/yesqa diff --git a/ci/test_wheel_cugraph-pyg.sh b/ci/test_wheel_cugraph-pyg.sh index df6f0032..077223b1 100755 --- a/ci/test_wheel_cugraph-pyg.sh +++ b/ci/test_wheel_cugraph-pyg.sh @@ -11,19 +11,19 @@ package_name="cugraph-pyg" RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" # Download the libwholegraph, pylibwholegraph, and cugraph-pyg built in the previous step -# LIBWHOLEGRAPH_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github cpp) -# PYLIBWHOLEGRAPH_WHEELHOUSE=$(rapids-download-from-github "$(rapids-package-name "wheel_python" pylibwholegraph --stable --cuda "$RAPIDS_CUDA_VERSION")") -# CUGRAPH_PYG_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-github python) -CUGRAPH_GNN_COMMIT=4cacebfc1058481f413b80ac4cf7e8ad84010be9 -CUGRAPH_PYG_WHEELHOUSE=$( - RAPIDS_PY_WHEEL_NAME="cugraph-pyg_cu12" RAPIDS_PY_WHEEL_PURE="1" rapids-get-pr-artifact cugraph-gnn 425 python wheel "${CUGRAPH_GNN_COMMIT}" -) -LIBWHOLEGRAPH_WHEELHOUSE=$( - RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-get-pr-artifact cugraph-gnn 425 cpp wheel "${CUGRAPH_GNN_COMMIT}" -) -PYLIBWHOLEGRAPH_WHEELHOUSE=$( - rapids-get-pr-artifact cugraph-gnn 425 python wheel --pkg_name pylibwholegraph --stable "${CUGRAPH_GNN_COMMIT}" -) +LIBWHOLEGRAPH_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github cpp) +PYLIBWHOLEGRAPH_WHEELHOUSE=$(rapids-download-from-github "$(rapids-package-name "wheel_python" pylibwholegraph --stable --cuda "$RAPIDS_CUDA_VERSION")") +CUGRAPH_PYG_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-github python) +# CUGRAPH_GNN_COMMIT=4cacebfc1058481f413b80ac4cf7e8ad84010be9 +# CUGRAPH_PYG_WHEELHOUSE=$( +# RAPIDS_PY_WHEEL_NAME="cugraph-pyg_cu12" RAPIDS_PY_WHEEL_PURE="1" rapids-get-pr-artifact cugraph-gnn 425 python wheel "${CUGRAPH_GNN_COMMIT}" +# ) +# LIBWHOLEGRAPH_WHEELHOUSE=$( +# RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-get-pr-artifact cugraph-gnn 425 cpp wheel "${CUGRAPH_GNN_COMMIT}" +# ) +# PYLIBWHOLEGRAPH_WHEELHOUSE=$( +# rapids-get-pr-artifact cugraph-gnn 425 python wheel --pkg_name pylibwholegraph --stable "${CUGRAPH_GNN_COMMIT}" +# ) # generate constraints (possibly pinning to oldest support versions of dependencies) rapids-generate-pip-constraints test_cugraph_pyg "${PIP_CONSTRAINT}" @@ -43,10 +43,10 @@ TORCH_WHEEL_DIR="$(mktemp -d)" # 'cugraph-pyg' is still expected to be importable # and testable in an environment where 'torch' isn't installed. -torch_installed=true +torch_downloaded=true if [ -z "$(ls -A ${TORCH_WHEEL_DIR} 2>/dev/null)" ]; then rapids-echo-stderr "No 'torch' wheels downloaded." - torch_installed=false + torch_downloaded=false else PIP_INSTALL_ARGS+=("${TORCH_WHEEL_DIR}"/torch-*.whl) fi @@ -62,21 +62,25 @@ rapids-pip-retry install \ # RAPIDS_DATASET_ROOT_DIR is used by test scripts export RAPIDS_DATASET_ROOT_DIR="$(realpath datasets)" -# mkdir -p "${RAPIDS_DATASET_ROOT_DIR}" -# pushd "${RAPIDS_DATASET_ROOT_DIR}" -# ./get_test_data.sh --test -# popd +mkdir -p "${RAPIDS_DATASET_ROOT_DIR}" +pushd "${RAPIDS_DATASET_ROOT_DIR}" +./get_test_data.sh --test +popd # Enable legacy behavior of torch.load for examples relying on ogb export TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD=1 -if [[ "${torch_installed}" == "true" ]]; then +if [[ "${torch_downloaded}" == "true" ]]; then + # 'torch' is an optional dependency of 'cugraph-pyg'... confirm that it's actually + # installed here and that we've installed a package with CUDA support. + rapids-logger "Confirming that PyTorch is installed" + python -c "import torch; assert torch.cuda.is_available()" + rapids-logger "pytest cugraph-pyg (single GPU, with 'torch')" ./ci/run_cugraph_pyg_pytests.sh fi rapids-logger "pytest cugraph-pyg (no 'torch')" pip uninstall --yes 'torch' - python -c "import cugraph_pyg; print(cugraph_pyg.__version__)" ./ci/run_cugraph_pyg_pytests.sh diff --git a/ci/test_wheel_pylibwholegraph.sh b/ci/test_wheel_pylibwholegraph.sh index 6125f20d..ff243373 100755 --- a/ci/test_wheel_pylibwholegraph.sh +++ b/ci/test_wheel_pylibwholegraph.sh @@ -36,10 +36,10 @@ TORCH_WHEEL_DIR="$(mktemp -d)" # 'cugraph-pyg' is still expected to be importable # and testable in an environment where 'torch' isn't installed. -torch_installed=true +torch_downloaded=true if [ -z "$(ls -A ${TORCH_WHEEL_DIR} 2>/dev/null)" ]; then rapids-echo-stderr "No 'torch' wheels downloaded." - torch_installed=false + torch_downloaded=false else PIP_INSTALL_ARGS+=("${TORCH_WHEEL_DIR}"/torch-*.whl) fi @@ -49,7 +49,12 @@ rapids-logger "Installing Packages" rapids-pip-retry install \ "${PIP_INSTALL_ARGS[@]}" -if [[ "${torch_installed}" == "true" ]]; then +if [[ "${torch_downloaded}" == "true" ]]; then + # 'torch' is an optional dependency of 'cugraph-pyg'... confirm that it's actually + # installed here and that we've installed a package with CUDA support. + rapids-logger "Confirming that PyTorch is installed" + python -c "import torch; assert torch.cuda.is_available()" + rapids-logger "pytest pylibwholegraph (with 'torch')" ./ci/run_pylibwholegraph_pytests.sh fi diff --git a/python/pylibwholegraph/pylibwholegraph/torch/data_loader.py b/python/pylibwholegraph/pylibwholegraph/torch/data_loader.py index b87801f9..ac54cde6 100644 --- a/python/pylibwholegraph/pylibwholegraph/torch/data_loader.py +++ b/python/pylibwholegraph/pylibwholegraph/torch/data_loader.py @@ -2,20 +2,29 @@ # SPDX-License-Identifier: Apache-2.0 import numpy as np -from pylibwholegraph.utils.imports import import_optional +from pylibwholegraph.utils.imports import import_optional, MissingModule torch = import_optional("torch") -class NodeClassificationDataset(torch.utils.data.Dataset): - def __init__(self, raw_dataset): - self.dataset = raw_dataset +if not isinstance(torch, MissingModule): - def __getitem__(self, index): - return self.dataset[index] + class NodeClassificationDataset(torch.utils.data.Dataset): + def __init__(self, raw_dataset): + self.dataset = raw_dataset - def __len__(self): - return len(self.dataset) + def __getitem__(self, index): + return self.dataset[index] + + def __len__(self): + return len(self.dataset) +else: + + class NodeClassificationDataset: + def __init__(self, raw_dataset): + raise ModuleNotFoundError( + "NodeClassificationDataset requires 'torch' to be installed." + ) def create_node_classification_datasets(data_and_label: dict): diff --git a/python/pylibwholegraph/pylibwholegraph/torch/embedding.py b/python/pylibwholegraph/pylibwholegraph/torch/embedding.py index 37c6de3c..70b8e563 100644 --- a/python/pylibwholegraph/pylibwholegraph/torch/embedding.py +++ b/python/pylibwholegraph/pylibwholegraph/torch/embedding.py @@ -2,7 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 import pylibwholegraph.binding.wholememory_binding as wmb -from pylibwholegraph.utils.imports import import_optional +from pylibwholegraph.utils.imports import MissingModule, import_optional from .utils import torch_dtype_to_wholememory_dtype, get_file_size from .utils import str_to_wmb_wholememory_location, str_to_wmb_wholememory_memory_type from .utils import ( @@ -210,31 +210,60 @@ def create_builtin_cache_policy( ) -class EmbeddingLookupFn(torch.autograd.Function): - @staticmethod - def forward( - ctx, - indice: "torch.Tensor", - dummy_input: "torch.Tensor", - wm_embedding, - is_training: bool = False, - force_dtype: Union["torch.dtype", None] = None, - ): - output_tensor = wm_embedding.gather( - indice, is_training=is_training, force_dtype=force_dtype - ) - if is_training and wm_embedding.need_grad(): - ctx.save_for_backward(indice, output_tensor, dummy_input) - ctx.wm_embedding = wm_embedding - return output_tensor +if not isinstance(torch, MissingModule): + + class EmbeddingLookupFn(torch.autograd.Function): + @staticmethod + def forward( + ctx, + indice: "torch.Tensor", + dummy_input: "torch.Tensor", + wm_embedding, + is_training: bool = False, + force_dtype: Union["torch.dtype", None] = None, + ): + output_tensor = wm_embedding.gather( + indice, is_training=is_training, force_dtype=force_dtype + ) + if is_training and wm_embedding.need_grad(): + ctx.save_for_backward(indice, output_tensor, dummy_input) + ctx.wm_embedding = wm_embedding + return output_tensor + + @staticmethod + def backward(ctx, grad_outputs: "torch.Tensor"): + indice, output_tensor, dummy_input = ctx.saved_tensors + wm_embedding = ctx.wm_embedding + wm_embedding.add_gradients(indice, grad_outputs) + ctx.wm_embedding = None + return None, torch.zeros_like(dummy_input), None, None, None + +else: + + class EmbeddingLookupFn: + def __init__(self, *args, **kwargs): + raise ModuleNotFoundError( + "EmbeddingLookupFn requires 'torch' to be installed." + ) - @staticmethod - def backward(ctx, grad_outputs: "torch.Tensor"): - indice, output_tensor, dummy_input = ctx.saved_tensors - wm_embedding = ctx.wm_embedding - wm_embedding.add_gradients(indice, grad_outputs) - ctx.wm_embedding = None - return None, torch.zeros_like(dummy_input), None, None, None + @staticmethod + def forward( + ctx, + indice: "torch.Tensor", + dummy_input: "torch.Tensor", + wm_embedding, + is_training: bool = False, + force_dtype: Union["torch.dtype", None] = None, + ): + raise ModuleNotFoundError( + "EmbeddingLookupFn requires 'torch' to be installed." + ) + + @staticmethod + def backward(ctx, grad_outputs: "torch.Tensor"): + raise ModuleNotFoundError( + "EmbeddingLookupFn requires 'torch' to be installed." + ) class WholeMemoryEmbedding(object): @@ -539,28 +568,35 @@ def destroy_embedding(wm_embedding: WholeMemoryEmbedding): wm_embedding.wmb_embedding = None -# TODO: all of these class overrides -# AttributeError: module 'torch' has no attribute 'autograd' -class WholeMemoryEmbeddingModule(torch.nn.Module): - """ - torch.nn.Module wrapper of WholeMemoryEmbedding - """ +if not isinstance(torch, MissingModule): - def __init__(self, wm_embedding: WholeMemoryEmbedding): - super().__init__() - self.wm_embedding = wm_embedding - self.embedding_gather_fn = EmbeddingLookupFn.apply + class WholeMemoryEmbeddingModule(torch.nn.Module): + """ + torch.nn.Module wrapper of WholeMemoryEmbedding + """ - def forward( - self, indice: "torch.Tensor", force_dtype: Union["torch.dtype", None] = None - ): - return self.embedding_gather_fn( - indice, - self.wm_embedding.dummy_input, - self.wm_embedding, - self.training, - force_dtype, - ) + def __init__(self, wm_embedding: WholeMemoryEmbedding): + super().__init__() + self.wm_embedding = wm_embedding + self.embedding_gather_fn = EmbeddingLookupFn.apply + + def forward( + self, indice: "torch.Tensor", force_dtype: Union["torch.dtype", None] = None + ): + return self.embedding_gather_fn( + indice, + self.wm_embedding.dummy_input, + self.wm_embedding, + self.training, + force_dtype, + ) +else: + + class WholeMemoryEmbeddingModule: + def __init__(self, wm_embedding: WholeMemoryEmbedding): + raise ModuleNotFoundError( + "WholeMemoryEmbeddingModule requires 'torch' to be installed." + ) def create_wholememory_optimizer( diff --git a/python/pylibwholegraph/pylibwholegraph/torch/gnn_model.py b/python/pylibwholegraph/pylibwholegraph/torch/gnn_model.py index 888e07af..89af8118 100644 --- a/python/pylibwholegraph/pylibwholegraph/torch/gnn_model.py +++ b/python/pylibwholegraph/pylibwholegraph/torch/gnn_model.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 -from pylibwholegraph.utils.imports import import_optional +from pylibwholegraph.utils.imports import import_optional, MissingModule from .graph_structure import GraphStructure from .embedding import WholeMemoryEmbedding, WholeMemoryEmbeddingModule from .common_options import parse_max_neighbors @@ -120,76 +120,90 @@ def layer_forward(layer, x_feat, x_target_feat, sub_graph): return x_feat -class HomoGNNModel(torch.nn.Module): - def __init__( - self, - graph_structure: GraphStructure, - node_embedding: WholeMemoryEmbedding, - args, - ): - super().__init__() - hidden_feat_dim = args.hiddensize - self.graph_structure = graph_structure - self.node_embedding = node_embedding - self.num_layer = args.layernum - self.hidden_feat_dim = args.hiddensize - num_head = args.heads if (args.model == "gat") else 1 - assert hidden_feat_dim % num_head == 0 - in_feat_dim = self.node_embedding.shape[1] - self.gnn_layers = create_gnn_layers( - in_feat_dim, - hidden_feat_dim, - args.classnum, - args.layernum, - num_head, - args.model, - ) - self.mean_output = True if args.model == "gat" else False - self.add_self_loop = True if args.model == "gat" else False - self.gather_fn = WholeMemoryEmbeddingModule(self.node_embedding) - self.dropout = args.dropout - self.max_neighbors = parse_max_neighbors(args.layernum, args.neighbors) - self.max_inference_neighbors = parse_max_neighbors( - args.layernum, args.inferencesample - ) +if not isinstance(torch, MissingModule): + + class HomoGNNModel(torch.nn.Module): + def __init__( + self, + graph_structure: GraphStructure, + node_embedding: WholeMemoryEmbedding, + args, + ): + super().__init__() + hidden_feat_dim = args.hiddensize + self.graph_structure = graph_structure + self.node_embedding = node_embedding + self.num_layer = args.layernum + self.hidden_feat_dim = args.hiddensize + num_head = args.heads if (args.model == "gat") else 1 + assert hidden_feat_dim % num_head == 0 + in_feat_dim = self.node_embedding.shape[1] + self.gnn_layers = create_gnn_layers( + in_feat_dim, + hidden_feat_dim, + args.classnum, + args.layernum, + num_head, + args.model, + ) + self.mean_output = True if args.model == "gat" else False + self.add_self_loop = True if args.model == "gat" else False + self.gather_fn = WholeMemoryEmbeddingModule(self.node_embedding) + self.dropout = args.dropout + self.max_neighbors = parse_max_neighbors(args.layernum, args.neighbors) + self.max_inference_neighbors = parse_max_neighbors( + args.layernum, args.inferencesample + ) - def forward(self, ids): - global framework_name - max_neighbors = ( - self.max_neighbors if self.training else self.max_inference_neighbors - ) - ids = ids.to(self.graph_structure.csr_col_ind.dtype).cuda() - ( - target_gids, - edge_indice, - csr_row_ptrs, - csr_col_inds, - ) = self.graph_structure.multilayer_sample_without_replacement( - ids, max_neighbors - ) - x_feat = self.gather_fn(target_gids[0], force_dtype=torch.float32) - for i in range(self.num_layer): - x_target_feat = x_feat[: target_gids[i + 1].numel()] - sub_graph = create_sub_graph( - target_gids[i], - target_gids[i + 1], - edge_indice[i], - csr_row_ptrs[i], - csr_col_inds[i], - max_neighbors[self.num_layer - 1 - i], - self.add_self_loop, + def forward(self, ids): + global framework_name + max_neighbors = ( + self.max_neighbors if self.training else self.max_inference_neighbors ) - x_feat = layer_forward( - self.gnn_layers[i], - x_feat, - x_target_feat, - sub_graph, + ids = ids.to(self.graph_structure.csr_col_ind.dtype).cuda() + ( + target_gids, + edge_indice, + csr_row_ptrs, + csr_col_inds, + ) = self.graph_structure.multilayer_sample_without_replacement( + ids, max_neighbors ) - if i != self.num_layer - 1: - x_feat = torch.nn.functional.relu(x_feat) - x_feat = torch.nn.functional.dropout( - x_feat, self.dropout, training=self.training + x_feat = self.gather_fn(target_gids[0], force_dtype=torch.float32) + for i in range(self.num_layer): + x_target_feat = x_feat[: target_gids[i + 1].numel()] + sub_graph = create_sub_graph( + target_gids[i], + target_gids[i + 1], + edge_indice[i], + csr_row_ptrs[i], + csr_col_inds[i], + max_neighbors[self.num_layer - 1 - i], + self.add_self_loop, + ) + x_feat = layer_forward( + self.gnn_layers[i], + x_feat, + x_target_feat, + sub_graph, ) + if i != self.num_layer - 1: + x_feat = torch.nn.functional.relu(x_feat) + x_feat = torch.nn.functional.dropout( + x_feat, self.dropout, training=self.training + ) - out_feat = x_feat - return out_feat + out_feat = x_feat + return out_feat +else: + + class HomoGNNModel: + def __init__( + self, + graph_structure: GraphStructure, + node_embedding: WholeMemoryEmbedding, + args, + ): + raise ModuleNotFoundError( + "EmbeddingLookupFn requires 'torch' to be installed." + ) From b1cb02c500a2794bc24701fbe4bb2647254d39d2 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Wed, 11 Mar 2026 13:28:47 -0500 Subject: [PATCH 21/43] remove debugging code --- test.sh | 14 -------------- 1 file changed, 14 deletions(-) delete mode 100644 test.sh diff --git a/test.sh b/test.sh deleted file mode 100644 index 30c82c22..00000000 --- a/test.sh +++ /dev/null @@ -1,14 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 - - -docker run \ - --rm \ - --gpus all \ - --env GH_TOKEN=$(gh auth token) \ - -v $(pwd):/opt/work \ - -w /opt/work \ - -it rapidsai/citestwheel:26.04-cuda12.9.1-ubuntu22.04-py3.11 \ - bash - -ci/test_wheel_cugraph-pyg.sh From ca6e314da11247edf7ec903d339f8ca82f85a54b Mon Sep 17 00:00:00 2001 From: James Lamb Date: Wed, 11 Mar 2026 14:18:25 -0500 Subject: [PATCH 22/43] fix typo with pytest.importorskip() --- .github/workflows/pr.yaml | 9 --------- .../wholegraph_torch/ops/test_graph_append_unique.py | 2 +- 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 864c5abe..c92839f5 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -17,7 +17,6 @@ jobs: - devcontainer - checks - conda-cpp-build - - conda-cpp-tests - conda-python-build - conda-python-build-noarch - conda-python-tests @@ -180,14 +179,6 @@ jobs: build_type: pull-request node_type: cpu8 script: ci/build_cpp.sh - conda-cpp-tests: - needs: [conda-cpp-build, changed-files] - secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@main - if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_cpp - with: - build_type: pull-request - script: ci/test_cpp.sh conda-python-build: needs: conda-cpp-build secrets: inherit diff --git a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_append_unique.py b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_append_unique.py index 77c2dfdb..804b8a6a 100644 --- a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_append_unique.py +++ b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_append_unique.py @@ -4,7 +4,7 @@ import pytest import pylibwholegraph.torch.graph_ops as wg_ops -torch = pytest.importorski("torch") +torch = pytest.importorskip("torch") def host_neighbor_raw_to_unique(unique_node_tensor, neighbor_node_tensor): From 2f3d4f8ee3fd004121736191a15c523549bbe610 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Wed, 11 Mar 2026 14:58:18 -0500 Subject: [PATCH 23/43] more fixes --- ci/run_cugraph_pyg_pytests.sh | 10 ++++++++-- ci/run_pylibwholegraph_pytests.sh | 4 ++-- ci/test_wheel_cugraph-pyg.sh | 9 ++++++--- ci/test_wheel_pylibwholegraph.sh | 6 ++++-- 4 files changed, 20 insertions(+), 9 deletions(-) diff --git a/ci/run_cugraph_pyg_pytests.sh b/ci/run_cugraph_pyg_pytests.sh index cb281a29..4431a013 100755 --- a/ci/run_cugraph_pyg_pytests.sh +++ b/ci/run_cugraph_pyg_pytests.sh @@ -1,5 +1,5 @@ #!/bin/bash -# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 set -euo pipefail @@ -7,7 +7,13 @@ set -euo pipefail # Support invoking run_cugraph_pyg_pytests.sh outside the script directory cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/cugraph-pyg/cugraph_pyg -pytest --cache-clear --benchmark-disable "$@" +pytest --cache-clear --benchmark-disable "$@" . + +# Used to skip certain examples in CI due to memory limitations +export CI=true + +# Enable legacy behavior of torch.load for examples relying on ogb +export TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD=1 # Test examples (disabled due to lack of memory) #for e in "$(pwd)"/examples/*.py; do diff --git a/ci/run_pylibwholegraph_pytests.sh b/ci/run_pylibwholegraph_pytests.sh index 8bc88e3c..d9c858e1 100755 --- a/ci/run_pylibwholegraph_pytests.sh +++ b/ci/run_pylibwholegraph_pytests.sh @@ -1,5 +1,5 @@ #!/bin/bash -# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 set -euo pipefail @@ -7,4 +7,4 @@ set -euo pipefail # Support invoking run_pytests.sh outside the script directory cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/pylibwholegraph/pylibwholegraph/ -pytest --cache-clear --forked --import-mode=append "$@" +pytest --cache-clear --forked --import-mode=append "$@" tests diff --git a/ci/test_wheel_cugraph-pyg.sh b/ci/test_wheel_cugraph-pyg.sh index 077223b1..6500736c 100755 --- a/ci/test_wheel_cugraph-pyg.sh +++ b/ci/test_wheel_cugraph-pyg.sh @@ -14,7 +14,8 @@ RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" LIBWHOLEGRAPH_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github cpp) PYLIBWHOLEGRAPH_WHEELHOUSE=$(rapids-download-from-github "$(rapids-package-name "wheel_python" pylibwholegraph --stable --cuda "$RAPIDS_CUDA_VERSION")") CUGRAPH_PYG_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-github python) -# CUGRAPH_GNN_COMMIT=4cacebfc1058481f413b80ac4cf7e8ad84010be9 + +# CUGRAPH_GNN_COMMIT=b1cb02c500a2794bc24701fbe4bb2647254d39d2 # CUGRAPH_PYG_WHEELHOUSE=$( # RAPIDS_PY_WHEEL_NAME="cugraph-pyg_cu12" RAPIDS_PY_WHEEL_PURE="1" rapids-get-pr-artifact cugraph-gnn 425 python wheel "${CUGRAPH_GNN_COMMIT}" # ) @@ -80,7 +81,9 @@ if [[ "${torch_downloaded}" == "true" ]]; then ./ci/run_cugraph_pyg_pytests.sh fi -rapids-logger "pytest cugraph-pyg (no 'torch')" +rapids-logger "import cugraph-pyg (no 'torch')" pip uninstall --yes 'torch' -python -c "import cugraph_pyg; print(cugraph_pyg.__version__)" +python -c "import cugraph_pyg; print(f'cugraph-pyg version: {cugraph_pyg.__version__}')" + +rapids-logger "pytest cugraph-pyg (no 'torch')" ./ci/run_cugraph_pyg_pytests.sh diff --git a/ci/test_wheel_pylibwholegraph.sh b/ci/test_wheel_pylibwholegraph.sh index ff243373..4fd34a6f 100755 --- a/ci/test_wheel_pylibwholegraph.sh +++ b/ci/test_wheel_pylibwholegraph.sh @@ -59,7 +59,9 @@ if [[ "${torch_downloaded}" == "true" ]]; then ./ci/run_pylibwholegraph_pytests.sh fi -rapids-logger "pytest pylibwholegraph (no 'torch')" +rapids-logger "import cugraph-pyg (no 'torch')" pip uninstall --yes 'torch' -python -c "import pylibwholegraph; print(pylibwholegraph.__version__)" +python -c "import cugraph_pyg; print(f'pylibwholegraph version: {pylibwholegraph.__version__}')" + +rapids-logger "pytest pylibwholegraph (no 'torch')" ./ci/run_pylibwholegraph_pytests.sh From 22fb7494b031603e7dfdd71717392991aad234f6 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Wed, 11 Mar 2026 17:01:02 -0500 Subject: [PATCH 24/43] fix more imports --- ci/test_wheel_pylibwholegraph.sh | 4 +-- .../cugraph_pyg/sampler/sampler_utils.py | 4 +-- ...h_unweighted_sample_without_replacement.py | 2 +- .../pylibwholegraph/torch/data_loader.py | 20 ++++++++----- .../pylibwholegraph/torch/embedding.py | 29 ++++++++++++------- .../pylibwholegraph/torch/gnn_model.py | 15 ++++++---- 6 files changed, 45 insertions(+), 29 deletions(-) diff --git a/ci/test_wheel_pylibwholegraph.sh b/ci/test_wheel_pylibwholegraph.sh index 4fd34a6f..9fb99102 100755 --- a/ci/test_wheel_pylibwholegraph.sh +++ b/ci/test_wheel_pylibwholegraph.sh @@ -59,9 +59,9 @@ if [[ "${torch_downloaded}" == "true" ]]; then ./ci/run_pylibwholegraph_pytests.sh fi -rapids-logger "import cugraph-pyg (no 'torch')" +rapids-logger "import pylibwholegraph (no 'torch')" pip uninstall --yes 'torch' -python -c "import cugraph_pyg; print(f'pylibwholegraph version: {pylibwholegraph.__version__}')" +python -c "import pylibwholegraph print(f'pylibwholegraph version: {pylibwholegraph.__version__}')" rapids-logger "pytest pylibwholegraph (no 'torch')" ./ci/run_pylibwholegraph_pytests.sh diff --git a/python/cugraph-pyg/cugraph_pyg/sampler/sampler_utils.py b/python/cugraph-pyg/cugraph_pyg/sampler/sampler_utils.py index f8bb1f6e..53644afa 100644 --- a/python/cugraph-pyg/cugraph_pyg/sampler/sampler_utils.py +++ b/python/cugraph-pyg/cugraph_pyg/sampler/sampler_utils.py @@ -12,10 +12,8 @@ import cupy import pylibcugraph -torch_geometric = import_optional("torch_geometric") - torch = import_optional("torch") -HeteroSamplerOutput = torch_geometric.sampler.base.HeteroSamplerOutput +torch_geometric = import_optional("torch_geometric") def verify_metadata(metadata: Optional[Dict[str, Union[str, Tuple[str, str, str]]]]): diff --git a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_unweighted_sample_without_replacement.py b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_unweighted_sample_without_replacement.py index b4439d42..7101ef06 100644 --- a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_unweighted_sample_without_replacement.py +++ b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_unweighted_sample_without_replacement.py @@ -382,7 +382,7 @@ def test_wholegraph_unweighted_sample( host_csr_row_ptr, host_csr_col_ptr, _ = gen_csr_graph( graph_node_count, graph_edge_count, - graph_node_count=None, + neighbor_node_count=None, csr_row_dtype=torch.int64, csr_col_dtype=csr_col_dtype, weight_dtype=torch.float32, diff --git a/python/pylibwholegraph/pylibwholegraph/torch/data_loader.py b/python/pylibwholegraph/pylibwholegraph/torch/data_loader.py index ac54cde6..041c2d77 100644 --- a/python/pylibwholegraph/pylibwholegraph/torch/data_loader.py +++ b/python/pylibwholegraph/pylibwholegraph/torch/data_loader.py @@ -6,10 +6,16 @@ torch = import_optional("torch") +# NOTE: using more specific 'import_optional()' than just 'torch' for import-time checks +# (e.g. those needed for defining base classes) can be helpful because 'torch' can appear +# to be available even after a 'pip uninstall torch' if any files are left behind in +# 'site-packages/torch'. +torch_utils_data = import_optional("torch.utils.data") -if not isinstance(torch, MissingModule): - class NodeClassificationDataset(torch.utils.data.Dataset): +if not isinstance(torch_utils_data, MissingModule): + + class NodeClassificationDataset(torch_utils_data.Dataset): def __init__(self, raw_dataset): self.dataset = raw_dataset @@ -23,7 +29,7 @@ def __len__(self): class NodeClassificationDataset: def __init__(self, raw_dataset): raise ModuleNotFoundError( - "NodeClassificationDataset requires 'torch' to be installed." + "NodeClassificationDataset requires 'torch.utils.data'. Install 'torch'." ) @@ -65,14 +71,14 @@ def get_train_dataloader( num_replicas: int = 1, num_workers: int = 0, ): - train_sampler = torch.utils.data.distributed.DistributedSampler( + train_sampler = torch_utils_data.distributed.DistributedSampler( train_dataset, num_replicas=num_replicas, rank=replica_id, shuffle=True, drop_last=True, ) - train_dataloader = torch.utils.data.DataLoader( + train_dataloader = torch_utils_data.DataLoader( train_dataset, batch_size=batch_size, num_workers=num_workers, @@ -86,10 +92,10 @@ def get_train_dataloader( def get_valid_test_dataloader( valid_test_dataset, batch_size: int, *, num_workers: int = 0 ): - valid_test_sampler = torch.utils.data.distributed.DistributedSampler( + valid_test_sampler = torch_utils_data.distributed.DistributedSampler( valid_test_dataset, num_replicas=1, rank=0, shuffle=False, drop_last=False ) - valid_test_dataloader = torch.utils.data.DataLoader( + valid_test_dataloader = torch_utils_data.DataLoader( valid_test_dataset, batch_size=batch_size, num_workers=num_workers, diff --git a/python/pylibwholegraph/pylibwholegraph/torch/embedding.py b/python/pylibwholegraph/pylibwholegraph/torch/embedding.py index 70b8e563..8ac7ea13 100644 --- a/python/pylibwholegraph/pylibwholegraph/torch/embedding.py +++ b/python/pylibwholegraph/pylibwholegraph/torch/embedding.py @@ -19,9 +19,15 @@ from .tensor import WholeMemoryTensor from .wholegraph_env import wrap_torch_tensor, get_wholegraph_env_fns, get_stream - torch = import_optional("torch") +# NOTE: using more specific 'import_optional()' than just 'torch' for import-time checks +# (e.g. those needed for defining base classes) can be helpful because 'torch' can appear +# to be available even after a 'pip uninstall torch' if any files are left behind in +# 'site-packages/torch'. +torch_autograd = import_optional("torch.autograd") +torch_nn = import_optional("torch.nn") + class WholeMemoryOptimizer(object): """ @@ -210,9 +216,10 @@ def create_builtin_cache_policy( ) -if not isinstance(torch, MissingModule): +# NOTE: the 'hasattr()' is necessary because sometimes 'pip uninstall' +if not isinstance(torch_autograd, MissingModule): - class EmbeddingLookupFn(torch.autograd.Function): + class EmbeddingLookupFn(torch_autograd.Function): @staticmethod def forward( ctx, @@ -243,7 +250,7 @@ def backward(ctx, grad_outputs: "torch.Tensor"): class EmbeddingLookupFn: def __init__(self, *args, **kwargs): raise ModuleNotFoundError( - "EmbeddingLookupFn requires 'torch' to be installed." + "EmbeddingLookupFn requires 'torch.autograd'. Install 'torch'." ) @staticmethod @@ -256,13 +263,13 @@ def forward( force_dtype: Union["torch.dtype", None] = None, ): raise ModuleNotFoundError( - "EmbeddingLookupFn requires 'torch' to be installed." + "EmbeddingLookupFn requires 'torch.autograd'. Install 'torch'." ) @staticmethod def backward(ctx, grad_outputs: "torch.Tensor"): raise ModuleNotFoundError( - "EmbeddingLookupFn requires 'torch' to be installed." + "EmbeddingLookupFn requires 'torch.autograd'. Install 'torch'." ) @@ -285,7 +292,7 @@ def __init__( self.wmb_optimizer = None - self.dummy_input = torch.nn.Parameter(torch.zeros(1), requires_grad=False) + self.dummy_input = torch_nn.Parameter(torch.zeros(1), requires_grad=False) self.need_apply = False self.sparse_indices = [] self.sparse_grads = [] @@ -484,7 +491,7 @@ def create_embedding( local_tensor, local_offset, ) = wm_embedding.get_embedding_tensor().get_local_tensor() - torch.nn.init.xavier_uniform_(local_tensor) + torch_nn.init.xavier_uniform_(local_tensor) comm.barrier() return wm_embedding @@ -568,9 +575,9 @@ def destroy_embedding(wm_embedding: WholeMemoryEmbedding): wm_embedding.wmb_embedding = None -if not isinstance(torch, MissingModule): +if not isinstance(torch_nn, MissingModule): - class WholeMemoryEmbeddingModule(torch.nn.Module): + class WholeMemoryEmbeddingModule(torch_nn.Module): """ torch.nn.Module wrapper of WholeMemoryEmbedding """ @@ -595,7 +602,7 @@ def forward( class WholeMemoryEmbeddingModule: def __init__(self, wm_embedding: WholeMemoryEmbedding): raise ModuleNotFoundError( - "WholeMemoryEmbeddingModule requires 'torch' to be installed." + "WholeMemoryEmbeddingModule requires 'torch.nn.Module'. Install 'torch'." ) diff --git a/python/pylibwholegraph/pylibwholegraph/torch/gnn_model.py b/python/pylibwholegraph/pylibwholegraph/torch/gnn_model.py index 89af8118..67fcb6d6 100644 --- a/python/pylibwholegraph/pylibwholegraph/torch/gnn_model.py +++ b/python/pylibwholegraph/pylibwholegraph/torch/gnn_model.py @@ -8,6 +8,11 @@ torch = import_optional("torch") +# NOTE: using more specific 'import_optional()' than just 'torch' for import-time checks +# (e.g. those needed for defining base classes) can be helpful because 'torch' can appear +# to be available even after a 'pip uninstall torch' if any files are left behind in +# 'site-packages/torch'. +torch_nn = import_optional("torch.nn") framework_name = None @@ -29,7 +34,7 @@ def set_framework(framework: str): def create_gnn_layers( in_feat_dim, hidden_feat_dim, class_count, num_layer, num_head, model_type ): - gnn_layers = torch.nn.ModuleList() + gnn_layers = torch_nn.ModuleList() global framework_name for i in range(num_layer): layer_output_dim = ( @@ -120,9 +125,9 @@ def layer_forward(layer, x_feat, x_target_feat, sub_graph): return x_feat -if not isinstance(torch, MissingModule): +if not isinstance(torch_nn, MissingModule): - class HomoGNNModel(torch.nn.Module): + class HomoGNNModel(torch_nn.Module): def __init__( self, graph_structure: GraphStructure, @@ -188,8 +193,8 @@ def forward(self, ids): sub_graph, ) if i != self.num_layer - 1: - x_feat = torch.nn.functional.relu(x_feat) - x_feat = torch.nn.functional.dropout( + x_feat = torch_nn.functional.relu(x_feat) + x_feat = torch_nn.functional.dropout( x_feat, self.dropout, training=self.training ) From 79b78541eafc5484b0a695a2852411454b68e364 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Wed, 11 Mar 2026 17:11:49 -0500 Subject: [PATCH 25/43] pytest params need to be lazy too --- .../tests/data/test_feature_store.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/python/cugraph-pyg/cugraph_pyg/tests/data/test_feature_store.py b/python/cugraph-pyg/cugraph_pyg/tests/data/test_feature_store.py index f64bee55..d14db14c 100644 --- a/python/cugraph-pyg/cugraph_pyg/tests/data/test_feature_store.py +++ b/python/cugraph-pyg/cugraph_pyg/tests/data/test_feature_store.py @@ -53,18 +53,19 @@ def test_feature_store_basic_api(single_pytorch_worker): @pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") @pytest.mark.sg @pytest.mark.parametrize( - "dtype", + "dtype_name", [ - torch.float32, - torch.float16, - torch.int8, - torch.int16, - torch.int32, - torch.int64, - torch.float64, + "float32", + "float16", + "int8", + "int16", + "int32", + "int64", + "float64", ], ) -def test_feature_store_basic_api_types(single_pytorch_worker, dtype): +def test_feature_store_basic_api_types(single_pytorch_worker, dtype_name, torch): + dtype = getattr(torch, dtype_name) features = torch.arange(0, 2000) features = features.reshape((features.numel() // 100, 100)).to(dtype) From 2633d4fd82c07033d61bd03a8ce475500030179e Mon Sep 17 00:00:00 2001 From: James Lamb Date: Wed, 11 Mar 2026 20:30:52 -0500 Subject: [PATCH 26/43] pre-commit --- python/cugraph-pyg/cugraph_pyg/tests/data/test_feature_store.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cugraph-pyg/cugraph_pyg/tests/data/test_feature_store.py b/python/cugraph-pyg/cugraph_pyg/tests/data/test_feature_store.py index d14db14c..fc29c0a8 100644 --- a/python/cugraph-pyg/cugraph_pyg/tests/data/test_feature_store.py +++ b/python/cugraph-pyg/cugraph_pyg/tests/data/test_feature_store.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 import pytest From 603979696017f350e171a5bf4462010ed42d29e4 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Wed, 11 Mar 2026 21:50:19 -0500 Subject: [PATCH 27/43] more testing fixes --- ci/test_wheel_pylibwholegraph.sh | 2 +- .../wholegraph_torch/ops/test_graph_add_csr_self_loop.py | 2 +- .../tests/wholegraph_torch/ops/test_graph_append_unique.py | 6 ++---- ...test_wholegraph_unweighted_sample_without_replacement.py | 6 ++---- 4 files changed, 6 insertions(+), 10 deletions(-) diff --git a/ci/test_wheel_pylibwholegraph.sh b/ci/test_wheel_pylibwholegraph.sh index 9fb99102..8a6aef48 100755 --- a/ci/test_wheel_pylibwholegraph.sh +++ b/ci/test_wheel_pylibwholegraph.sh @@ -61,7 +61,7 @@ fi rapids-logger "import pylibwholegraph (no 'torch')" pip uninstall --yes 'torch' -python -c "import pylibwholegraph print(f'pylibwholegraph version: {pylibwholegraph.__version__}')" +python -c "import pylibwholegraph; print(f'pylibwholegraph version: {pylibwholegraph.__version__}')" rapids-logger "pytest pylibwholegraph (no 'torch')" ./ci/run_pylibwholegraph_pytests.sh diff --git a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_add_csr_self_loop.py b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_add_csr_self_loop.py index 94e9b2c9..91e3c388 100644 --- a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_add_csr_self_loop.py +++ b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_add_csr_self_loop.py @@ -36,7 +36,7 @@ def routine_func(**kwargs): csr_row_ptr_tensor, csr_col_ptr_tensor, _ = gen_csr_graph( target_node_count, edge_num, - neighbor_node_count, + neighbor_node_count=neighbor_node_count, csr_row_dtype=torch.int64, csr_col_dtype=torch.int32, weight_dtype=torch.float32, diff --git a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_append_unique.py b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_append_unique.py index 804b8a6a..e94c1a9a 100644 --- a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_append_unique.py +++ b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_append_unique.py @@ -4,8 +4,6 @@ import pytest import pylibwholegraph.torch.graph_ops as wg_ops -torch = pytest.importorskip("torch") - def host_neighbor_raw_to_unique(unique_node_tensor, neighbor_node_tensor): torch = pytest.importorskip("torch") @@ -76,7 +74,7 @@ def routine_func(**kwargs): @pytest.mark.parametrize("target_node_count", [10, 113]) @pytest.mark.parametrize("neighbor_node_count", [104, 1987]) -@pytest.mark.parametrize("target_node_dtype", [torch.int32, torch.int64]) +@pytest.mark.parametrize("target_node_dtype", ["int32", "int64"]) @pytest.mark.parametrize("need_neighbor_raw_to_unique", [True, False]) def test_append_unique( target_node_count, @@ -90,6 +88,6 @@ def test_append_unique( routine_func( target_node_count=target_node_count, neighbor_node_count=neighbor_node_count, - target_node_dtype=target_node_dtype, + target_node_dtype=getattr(torch, target_node_dtype), need_neighbor_raw_to_unique=need_neighbor_raw_to_unique, ) diff --git a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_unweighted_sample_without_replacement.py b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_unweighted_sample_without_replacement.py index 7101ef06..75f1cd9a 100644 --- a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_unweighted_sample_without_replacement.py +++ b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_unweighted_sample_without_replacement.py @@ -18,8 +18,6 @@ import pylibwholegraph.torch.wholegraph_ops as wg_ops import random -torch = pytest.importorskip("torch") - def unweighte_sample_without_replacement_base(random_values, M, N): torch = pytest.importorskip("torch") @@ -355,7 +353,7 @@ def routine_func(world_rank: int, world_size: int, **kwargs): @pytest.mark.parametrize("graph_edge_count", [1043]) @pytest.mark.parametrize("max_sample_count", [11, -1]) @pytest.mark.parametrize("center_node_count", [13]) -@pytest.mark.parametrize("center_node_dtype", [torch.int32, torch.int64]) +@pytest.mark.parametrize("center_node_dtype", ["int32", "int64"]) @pytest.mark.parametrize("col_id_dtype", [0, 1]) @pytest.mark.parametrize("wholememory_location", ([0, 1])) @pytest.mark.parametrize("wholememory_type", ([0, 1, 2])) @@ -395,7 +393,7 @@ def test_wholegraph_unweighted_sample( graph_edge_count=graph_edge_count, max_sample_count=max_sample_count, center_node_count=center_node_count, - center_node_dtype=center_node_dtype, + center_node_dtype=getattr(torch, center_node_dtype), col_id_dtype=col_id_dtype, wholememory_location=wholememory_location, wholememory_type=wholememory_type, From 005a89089ff846fa7d1fc24353173d0a675d55e0 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Thu, 12 Mar 2026 11:46:32 -0500 Subject: [PATCH 28/43] work around nvJitLink symbol issues, fix a few more test skips, other misc. fixes --- ci/download-torch-wheels.sh | 2 +- ci/run_cugraph_pyg_pytests.sh | 4 +- ci/run_pylibwholegraph_pytests.sh | 4 +- ci/test_wheel_cugraph-pyg.sh | 52 ++++++++++++------- ci/test_wheel_pylibwholegraph.sh | 25 ++++++++- ci/uninstall-torch-wheels.sh | 16 ++++++ .../test_wholememory_binding.py | 2 +- .../test_wholememory_tensor.py | 4 +- .../ops/test_graph_add_csr_self_loop.py | 2 +- .../ops/test_wholegraph_gather_scatter.py | 2 +- 10 files changed, 84 insertions(+), 29 deletions(-) create mode 100755 ci/uninstall-torch-wheels.sh diff --git a/ci/download-torch-wheels.sh b/ci/download-torch-wheels.sh index 21e84051..24c68bf7 100755 --- a/ci/download-torch-wheels.sh +++ b/ci/download-torch-wheels.sh @@ -18,7 +18,7 @@ set -e -u -o pipefail TORCH_WHEEL_DIR="${1}" # skip download attempt on CUDA versions where we know there isn't a 'torch' CUDA wheel. -CUDA_MAJOR=$(echo "${RAPIDS_CUDA_VERSION}" | cut -d'.' -f1) +CUDA_MAJOR="${RAPIDS_CUDA_VERSION%%.*}" CUDA_MINOR=$(echo "${RAPIDS_CUDA_VERSION}" | cut -d'.' -f2) if \ { [ "${CUDA_MAJOR}" -eq 12 ] && [ "${CUDA_MINOR}" -lt 9 ]; } \ diff --git a/ci/run_cugraph_pyg_pytests.sh b/ci/run_cugraph_pyg_pytests.sh index 4431a013..da255e71 100755 --- a/ci/run_cugraph_pyg_pytests.sh +++ b/ci/run_cugraph_pyg_pytests.sh @@ -1,5 +1,5 @@ #!/bin/bash -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 set -euo pipefail @@ -7,7 +7,7 @@ set -euo pipefail # Support invoking run_cugraph_pyg_pytests.sh outside the script directory cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/cugraph-pyg/cugraph_pyg -pytest --cache-clear --benchmark-disable "$@" . +pytest -rs --cache-clear --benchmark-disable "$@" . # Used to skip certain examples in CI due to memory limitations export CI=true diff --git a/ci/run_pylibwholegraph_pytests.sh b/ci/run_pylibwholegraph_pytests.sh index d9c858e1..805698d0 100755 --- a/ci/run_pylibwholegraph_pytests.sh +++ b/ci/run_pylibwholegraph_pytests.sh @@ -1,5 +1,5 @@ #!/bin/bash -# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 set -euo pipefail @@ -7,4 +7,4 @@ set -euo pipefail # Support invoking run_pytests.sh outside the script directory cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/pylibwholegraph/pylibwholegraph/ -pytest --cache-clear --forked --import-mode=append "$@" tests +pytest -rs --cache-clear --forked --import-mode=append "$@" tests diff --git a/ci/test_wheel_cugraph-pyg.sh b/ci/test_wheel_cugraph-pyg.sh index 6500736c..33fbf4b0 100755 --- a/ci/test_wheel_cugraph-pyg.sh +++ b/ci/test_wheel_cugraph-pyg.sh @@ -11,20 +11,20 @@ package_name="cugraph-pyg" RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" # Download the libwholegraph, pylibwholegraph, and cugraph-pyg built in the previous step -LIBWHOLEGRAPH_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github cpp) -PYLIBWHOLEGRAPH_WHEELHOUSE=$(rapids-download-from-github "$(rapids-package-name "wheel_python" pylibwholegraph --stable --cuda "$RAPIDS_CUDA_VERSION")") -CUGRAPH_PYG_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-github python) - -# CUGRAPH_GNN_COMMIT=b1cb02c500a2794bc24701fbe4bb2647254d39d2 -# CUGRAPH_PYG_WHEELHOUSE=$( -# RAPIDS_PY_WHEEL_NAME="cugraph-pyg_cu12" RAPIDS_PY_WHEEL_PURE="1" rapids-get-pr-artifact cugraph-gnn 425 python wheel "${CUGRAPH_GNN_COMMIT}" -# ) -# LIBWHOLEGRAPH_WHEELHOUSE=$( -# RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-get-pr-artifact cugraph-gnn 425 cpp wheel "${CUGRAPH_GNN_COMMIT}" -# ) -# PYLIBWHOLEGRAPH_WHEELHOUSE=$( -# rapids-get-pr-artifact cugraph-gnn 425 python wheel --pkg_name pylibwholegraph --stable "${CUGRAPH_GNN_COMMIT}" -# ) +# LIBWHOLEGRAPH_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github cpp) +# PYLIBWHOLEGRAPH_WHEELHOUSE=$(rapids-download-from-github "$(rapids-package-name "wheel_python" pylibwholegraph --stable --cuda "$RAPIDS_CUDA_VERSION")") +# CUGRAPH_PYG_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-github python) + +CUGRAPH_GNN_COMMIT=603979696017f350e171a5bf4462010ed42d29e4 +CUGRAPH_PYG_WHEELHOUSE=$( + RAPIDS_PY_WHEEL_NAME="cugraph-pyg_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-get-pr-artifact cugraph-gnn 425 python wheel "${CUGRAPH_GNN_COMMIT}" +) +LIBWHOLEGRAPH_WHEELHOUSE=$( + RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-get-pr-artifact cugraph-gnn 425 cpp wheel "${CUGRAPH_GNN_COMMIT}" +) +PYLIBWHOLEGRAPH_WHEELHOUSE=$( + rapids-get-pr-artifact cugraph-gnn 425 python wheel --pkg_name pylibwholegraph --stable "${CUGRAPH_GNN_COMMIT}" +) # generate constraints (possibly pinning to oldest support versions of dependencies) rapids-generate-pip-constraints test_cugraph_pyg "${PIP_CONSTRAINT}" @@ -44,6 +44,7 @@ TORCH_WHEEL_DIR="$(mktemp -d)" # 'cugraph-pyg' is still expected to be importable # and testable in an environment where 'torch' isn't installed. +CUDA_MAJOR="${RAPIDS_CUDA_VERSION%%.*}" torch_downloaded=true if [ -z "$(ls -A ${TORCH_WHEEL_DIR} 2>/dev/null)" ]; then rapids-echo-stderr "No 'torch' wheels downloaded." @@ -64,14 +65,28 @@ rapids-pip-retry install \ # RAPIDS_DATASET_ROOT_DIR is used by test scripts export RAPIDS_DATASET_ROOT_DIR="$(realpath datasets)" mkdir -p "${RAPIDS_DATASET_ROOT_DIR}" -pushd "${RAPIDS_DATASET_ROOT_DIR}" -./get_test_data.sh --test -popd +# pushd "${RAPIDS_DATASET_ROOT_DIR}" +# ./get_test_data.sh --test +# popd # Enable legacy behavior of torch.load for examples relying on ogb export TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD=1 if [[ "${torch_downloaded}" == "true" ]]; then + # TODO: remove this when RAPIDS wheels and 'torch' CUDA wheels have compatible package requirements + # + # * https://github.com/rapidsai/cugraph/issues/5443 + # * https://github.com/rapidsai/build-planning/issues/257 + # * https://github.com/rapidsai/build-planning/issues/255 + # + CUDA_MAJOR="${RAPIDS_CUDA_VERSION%%.*}" + CUDA_MINOR=$(echo "${RAPIDS_CUDA_VERSION}" | cut -d'.' -f2) + if [[ "${CUDA_MAJOR}" == "13" ]]; then + pip install \ + --upgrade \ + "nvidia-nvjitlink>=${CUDA_MAJOR}.${CUDA_MINOR}" + fi + # 'torch' is an optional dependency of 'cugraph-pyg'... confirm that it's actually # installed here and that we've installed a package with CUDA support. rapids-logger "Confirming that PyTorch is installed" @@ -82,7 +97,8 @@ if [[ "${torch_downloaded}" == "true" ]]; then fi rapids-logger "import cugraph-pyg (no 'torch')" -pip uninstall --yes 'torch' +./ci/uninstall-torch-wheels.sh + python -c "import cugraph_pyg; print(f'cugraph-pyg version: {cugraph_pyg.__version__}')" rapids-logger "pytest cugraph-pyg (no 'torch')" diff --git a/ci/test_wheel_pylibwholegraph.sh b/ci/test_wheel_pylibwholegraph.sh index 8a6aef48..d586f028 100755 --- a/ci/test_wheel_pylibwholegraph.sh +++ b/ci/test_wheel_pylibwholegraph.sh @@ -16,6 +16,14 @@ RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" LIBWHOLEGRAPH_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github cpp) PYLIBWHOLEGRAPH_WHEELHOUSE=$(rapids-download-from-github "$(rapids-package-name "wheel_python" pylibwholegraph --stable --cuda "$RAPIDS_CUDA_VERSION")") +# CUGRAPH_GNN_COMMIT=603979696017f350e171a5bf4462010ed42d29e4 +# LIBWHOLEGRAPH_WHEELHOUSE=$( +# RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-get-pr-artifact cugraph-gnn 425 cpp wheel "${CUGRAPH_GNN_COMMIT}" +# ) +# PYLIBWHOLEGRAPH_WHEELHOUSE=$( +# rapids-get-pr-artifact cugraph-gnn 425 python wheel --pkg_name pylibwholegraph --stable "${CUGRAPH_GNN_COMMIT}" +# ) + RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"} RAPIDS_COVERAGE_DIR=${RAPIDS_COVERAGE_DIR:-"${PWD}/coverage-results"} mkdir -p "${RAPIDS_TESTS_DIR}" "${RAPIDS_COVERAGE_DIR}" @@ -49,7 +57,21 @@ rapids-logger "Installing Packages" rapids-pip-retry install \ "${PIP_INSTALL_ARGS[@]}" + if [[ "${torch_downloaded}" == "true" ]]; then + # TODO: remove this when RAPIDS wheels and 'torch' CUDA wheels have compatible package requirements + # + # * https://github.com/rapidsai/build-planning/issues/257 + # * https://github.com/rapidsai/build-planning/issues/255 + # + CUDA_MAJOR="${RAPIDS_CUDA_VERSION%%.*}" + CUDA_MINOR=$(echo "${RAPIDS_CUDA_VERSION}" | cut -d'.' -f2) + if [[ "${CUDA_MAJOR}" == "13" ]]; then + pip install \ + --upgrade \ + "nvidia-nvjitlink>=${CUDA_MAJOR}.${CUDA_MINOR}" + fi + # 'torch' is an optional dependency of 'cugraph-pyg'... confirm that it's actually # installed here and that we've installed a package with CUDA support. rapids-logger "Confirming that PyTorch is installed" @@ -60,7 +82,8 @@ if [[ "${torch_downloaded}" == "true" ]]; then fi rapids-logger "import pylibwholegraph (no 'torch')" -pip uninstall --yes 'torch' +./ci/uninstall-torch-wheels.sh + python -c "import pylibwholegraph; print(f'pylibwholegraph version: {pylibwholegraph.__version__}')" rapids-logger "pytest pylibwholegraph (no 'torch')" diff --git a/ci/uninstall-torch-wheels.sh b/ci/uninstall-torch-wheels.sh new file mode 100755 index 00000000..3590bdc0 --- /dev/null +++ b/ci/uninstall-torch-wheels.sh @@ -0,0 +1,16 @@ +#!/bin/bash +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +set -euo pipefail + +pip uninstall --yes 'torch' + +# 'pytest' leaves behind some pycache files in site-packages/torch that make 'import torch' +# seem to "work" even though there's not really a package there, leading to errors like +# "module 'torch' has no attribute 'distributed'" +# +# For the sake of testing, just fully delete 'torch' from site-packages to simulate an environment +# where it was never installed. +SITE_PACKAGES=$(python -c "import site; print(site.getsitepackages()[0])") +rm -rf "${SITE_PACKAGES}/torch" diff --git a/python/pylibwholegraph/pylibwholegraph/tests/pylibwholegraph/test_wholememory_binding.py b/python/pylibwholegraph/pylibwholegraph/tests/pylibwholegraph/test_wholememory_binding.py index c80afd9c..366d03e3 100644 --- a/python/pylibwholegraph/pylibwholegraph/tests/pylibwholegraph/test_wholememory_binding.py +++ b/python/pylibwholegraph/pylibwholegraph/tests/pylibwholegraph/test_wholememory_binding.py @@ -105,7 +105,7 @@ def routine_func(world_rank: int, world_size: int): wmb.finalize() -def test_dlpack(): +def test_dlpack(torch): gpu_count = wmb.fork_get_gpu_count() assert gpu_count > 0 multiprocess_run(gpu_count, routine_func) diff --git a/python/pylibwholegraph/pylibwholegraph/tests/pylibwholegraph/test_wholememory_tensor.py b/python/pylibwholegraph/pylibwholegraph/tests/pylibwholegraph/test_wholememory_tensor.py index 648f7dc8..0e53c209 100644 --- a/python/pylibwholegraph/pylibwholegraph/tests/pylibwholegraph/test_wholememory_tensor.py +++ b/python/pylibwholegraph/pylibwholegraph/tests/pylibwholegraph/test_wholememory_tensor.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2019-2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 import pylibwholegraph.binding.wholememory_binding as wmb @@ -107,7 +107,7 @@ def routine_func(world_rank: int, world_size: int): wmb.finalize() -def test_wholememory_tensor(): +def test_wholememory_tensor(torch): gpu_count = wmb.fork_get_gpu_count() assert gpu_count > 0 multiprocess_run(gpu_count, routine_func) diff --git a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_add_csr_self_loop.py b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_add_csr_self_loop.py index 91e3c388..07fb409a 100644 --- a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_add_csr_self_loop.py +++ b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_add_csr_self_loop.py @@ -37,7 +37,7 @@ def routine_func(**kwargs): target_node_count, edge_num, neighbor_node_count=neighbor_node_count, - csr_row_dtype=torch.int64, + csr_row_dtype=torch.int32, csr_col_dtype=torch.int32, weight_dtype=torch.float32, ) diff --git a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_gather_scatter.py b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_gather_scatter.py index cd1af84f..0395d2a6 100644 --- a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_gather_scatter.py +++ b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_gather_scatter.py @@ -174,7 +174,7 @@ def routine_func(world_rank: int, world_size: int): wmb.finalize() -def test_wholegraph_gather_scatter(): +def test_wholegraph_gather_scatter(torch): gpu_count = wmb.fork_get_gpu_count() assert gpu_count > 0 multiprocess_run(gpu_count, routine_func) From 22ded28c9126072aa1d79512212e8090e111fb82 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Thu, 12 Mar 2026 12:57:44 -0500 Subject: [PATCH 29/43] revert temporary testing stuff --- .github/workflows/pr.yaml | 11 ++++++++++- ci/test_wheel_cugraph-pyg.sh | 23 ++++++----------------- ci/test_wheel_pylibwholegraph.sh | 9 +-------- 3 files changed, 17 insertions(+), 26 deletions(-) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index c92839f5..127f4c2a 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -17,6 +17,7 @@ jobs: - devcontainer - checks - conda-cpp-build + - conda-cpp-tests - conda-python-build - conda-python-build-noarch - conda-python-tests @@ -179,6 +180,14 @@ jobs: build_type: pull-request node_type: cpu8 script: ci/build_cpp.sh + conda-cpp-tests: + needs: [conda-cpp-build, changed-files] + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@main + if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_cpp + with: + build_type: pull-request + script: ci/test_cpp.sh conda-python-build: needs: conda-cpp-build secrets: inherit @@ -215,7 +224,7 @@ jobs: with: arch: "amd64" build_type: pull-request - container_image: "rapidsai/ci-conda:26.04-latest" + container_image: "rapidsai/ci-conda:26.06-latest" script: "ci/build_docs.sh" wheel-build-libwholegraph: needs: checks diff --git a/ci/test_wheel_cugraph-pyg.sh b/ci/test_wheel_cugraph-pyg.sh index 33fbf4b0..dc0d799b 100755 --- a/ci/test_wheel_cugraph-pyg.sh +++ b/ci/test_wheel_cugraph-pyg.sh @@ -11,20 +11,9 @@ package_name="cugraph-pyg" RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" # Download the libwholegraph, pylibwholegraph, and cugraph-pyg built in the previous step -# LIBWHOLEGRAPH_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github cpp) -# PYLIBWHOLEGRAPH_WHEELHOUSE=$(rapids-download-from-github "$(rapids-package-name "wheel_python" pylibwholegraph --stable --cuda "$RAPIDS_CUDA_VERSION")") -# CUGRAPH_PYG_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-github python) - -CUGRAPH_GNN_COMMIT=603979696017f350e171a5bf4462010ed42d29e4 -CUGRAPH_PYG_WHEELHOUSE=$( - RAPIDS_PY_WHEEL_NAME="cugraph-pyg_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-get-pr-artifact cugraph-gnn 425 python wheel "${CUGRAPH_GNN_COMMIT}" -) -LIBWHOLEGRAPH_WHEELHOUSE=$( - RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-get-pr-artifact cugraph-gnn 425 cpp wheel "${CUGRAPH_GNN_COMMIT}" -) -PYLIBWHOLEGRAPH_WHEELHOUSE=$( - rapids-get-pr-artifact cugraph-gnn 425 python wheel --pkg_name pylibwholegraph --stable "${CUGRAPH_GNN_COMMIT}" -) +LIBWHOLEGRAPH_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github cpp) +PYLIBWHOLEGRAPH_WHEELHOUSE=$(rapids-download-from-github "$(rapids-package-name "wheel_python" pylibwholegraph --stable --cuda "$RAPIDS_CUDA_VERSION")") +CUGRAPH_PYG_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-github python) # generate constraints (possibly pinning to oldest support versions of dependencies) rapids-generate-pip-constraints test_cugraph_pyg "${PIP_CONSTRAINT}" @@ -65,9 +54,9 @@ rapids-pip-retry install \ # RAPIDS_DATASET_ROOT_DIR is used by test scripts export RAPIDS_DATASET_ROOT_DIR="$(realpath datasets)" mkdir -p "${RAPIDS_DATASET_ROOT_DIR}" -# pushd "${RAPIDS_DATASET_ROOT_DIR}" -# ./get_test_data.sh --test -# popd +pushd "${RAPIDS_DATASET_ROOT_DIR}" +./get_test_data.sh --test +popd # Enable legacy behavior of torch.load for examples relying on ogb export TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD=1 diff --git a/ci/test_wheel_pylibwholegraph.sh b/ci/test_wheel_pylibwholegraph.sh index d586f028..1086d439 100755 --- a/ci/test_wheel_pylibwholegraph.sh +++ b/ci/test_wheel_pylibwholegraph.sh @@ -16,14 +16,6 @@ RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" LIBWHOLEGRAPH_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github cpp) PYLIBWHOLEGRAPH_WHEELHOUSE=$(rapids-download-from-github "$(rapids-package-name "wheel_python" pylibwholegraph --stable --cuda "$RAPIDS_CUDA_VERSION")") -# CUGRAPH_GNN_COMMIT=603979696017f350e171a5bf4462010ed42d29e4 -# LIBWHOLEGRAPH_WHEELHOUSE=$( -# RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-get-pr-artifact cugraph-gnn 425 cpp wheel "${CUGRAPH_GNN_COMMIT}" -# ) -# PYLIBWHOLEGRAPH_WHEELHOUSE=$( -# rapids-get-pr-artifact cugraph-gnn 425 python wheel --pkg_name pylibwholegraph --stable "${CUGRAPH_GNN_COMMIT}" -# ) - RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"} RAPIDS_COVERAGE_DIR=${RAPIDS_COVERAGE_DIR:-"${PWD}/coverage-results"} mkdir -p "${RAPIDS_TESTS_DIR}" "${RAPIDS_COVERAGE_DIR}" @@ -61,6 +53,7 @@ rapids-pip-retry install \ if [[ "${torch_downloaded}" == "true" ]]; then # TODO: remove this when RAPIDS wheels and 'torch' CUDA wheels have compatible package requirements # + # * https://github.com/rapidsai/cugraph/issues/5443 # * https://github.com/rapidsai/build-planning/issues/257 # * https://github.com/rapidsai/build-planning/issues/255 # From bbe4c972e2e157b0b3f24cb09b75f043a175c50d Mon Sep 17 00:00:00 2001 From: James Lamb Date: Thu, 12 Mar 2026 13:44:38 -0500 Subject: [PATCH 30/43] remove comment --- python/pylibwholegraph/pylibwholegraph/torch/embedding.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/pylibwholegraph/pylibwholegraph/torch/embedding.py b/python/pylibwholegraph/pylibwholegraph/torch/embedding.py index 8ac7ea13..b89ebe93 100644 --- a/python/pylibwholegraph/pylibwholegraph/torch/embedding.py +++ b/python/pylibwholegraph/pylibwholegraph/torch/embedding.py @@ -216,7 +216,6 @@ def create_builtin_cache_policy( ) -# NOTE: the 'hasattr()' is necessary because sometimes 'pip uninstall' if not isinstance(torch_autograd, MissingModule): class EmbeddingLookupFn(torch_autograd.Function): From 21920891edd35143b59cfa44a03813b0100080af Mon Sep 17 00:00:00 2001 From: James Lamb Date: Thu, 12 Mar 2026 20:41:02 -0500 Subject: [PATCH 31/43] Apply suggestion from @jameslamb --- .github/workflows/pr.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index db6d1f82..8f76fc2e 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -224,7 +224,7 @@ jobs: with: arch: "amd64" build_type: pull-request - container_image: "rapidsai/ci-conda:26.06-latest" + container_image: "rapidsai/ci-conda:26.04-latest" script: "ci/build_docs.sh" wheel-build-libwholegraph: needs: checks From b827cc22ac0fa864da45139a0f3af17e3b8c70ea Mon Sep 17 00:00:00 2001 From: James Lamb Date: Thu, 12 Mar 2026 21:19:12 -0500 Subject: [PATCH 32/43] fix copy-paste mistakes Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com> --- ci/test_wheel_pylibwholegraph.sh | 4 ++-- python/pylibwholegraph/pylibwholegraph/torch/gnn_model.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ci/test_wheel_pylibwholegraph.sh b/ci/test_wheel_pylibwholegraph.sh index 1086d439..ec31f656 100755 --- a/ci/test_wheel_pylibwholegraph.sh +++ b/ci/test_wheel_pylibwholegraph.sh @@ -34,7 +34,7 @@ PIP_INSTALL_ARGS=( TORCH_WHEEL_DIR="$(mktemp -d)" ./ci/download-torch-wheels.sh "${TORCH_WHEEL_DIR}" -# 'cugraph-pyg' is still expected to be importable +# 'pylibwholegraph' is still expected to be importable # and testable in an environment where 'torch' isn't installed. torch_downloaded=true if [ -z "$(ls -A ${TORCH_WHEEL_DIR} 2>/dev/null)" ]; then @@ -65,7 +65,7 @@ if [[ "${torch_downloaded}" == "true" ]]; then "nvidia-nvjitlink>=${CUDA_MAJOR}.${CUDA_MINOR}" fi - # 'torch' is an optional dependency of 'cugraph-pyg'... confirm that it's actually + # 'torch' is an optional dependency of 'pylibwholegraph'... confirm that it's actually # installed here and that we've installed a package with CUDA support. rapids-logger "Confirming that PyTorch is installed" python -c "import torch; assert torch.cuda.is_available()" diff --git a/python/pylibwholegraph/pylibwholegraph/torch/gnn_model.py b/python/pylibwholegraph/pylibwholegraph/torch/gnn_model.py index 67fcb6d6..a0422b89 100644 --- a/python/pylibwholegraph/pylibwholegraph/torch/gnn_model.py +++ b/python/pylibwholegraph/pylibwholegraph/torch/gnn_model.py @@ -210,5 +210,5 @@ def __init__( args, ): raise ModuleNotFoundError( - "EmbeddingLookupFn requires 'torch' to be installed." + "HomoGNNModel requires 'torch' to be installed." ) From 6a958e6221123d845132a9ad4997fa77d8e3502f Mon Sep 17 00:00:00 2001 From: James Lamb Date: Thu, 12 Mar 2026 21:21:54 -0500 Subject: [PATCH 33/43] standardize dependencies.yaml filters --- ci/download-torch-wheels.sh | 2 +- dependencies.yaml | 8 ++++---- python/pylibwholegraph/pylibwholegraph/torch/gnn_model.py | 4 +--- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/ci/download-torch-wheels.sh b/ci/download-torch-wheels.sh index 24c68bf7..82b22787 100755 --- a/ci/download-torch-wheels.sh +++ b/ci/download-torch-wheels.sh @@ -39,7 +39,7 @@ fi rapids-dependency-file-generator \ --output requirements \ --file-key "torch_only" \ - --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};dependencies=${RAPIDS_DEPENDENCIES};require_gpu_pytorch=true" \ + --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};dependencies=${RAPIDS_DEPENDENCIES};require_gpu=true" \ | tee ./torch-constraints.txt rapids-pip-retry download \ diff --git a/dependencies.yaml b/dependencies.yaml index abc150fd..bc277925 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -408,26 +408,26 @@ dependencies: - matrix: cuda: "12.9" dependencies: "oldest" - require_gpu_pytorch: "true" + require_gpu: "true" packages: - &torch_cu129_index --extra-index-url=https://download.pytorch.org/whl/cu129 - torch==2.8.0+cu129 - matrix: cuda: "12.9" - require_gpu_pytorch: "true" + require_gpu: "true" packages: - *torch_cu129_index - torch==2.10.0+cu129 - matrix: cuda: "13.0" dependencies: "oldest" - require_gpu_pytorch: "true" + require_gpu: "true" packages: - &torch_index_cu13 --extra-index-url=https://download.pytorch.org/whl/cu130 - torch==2.8.0+cu130 - matrix: cuda: "13.0" - require_gpu_pytorch: "true" + require_gpu: "true" packages: - *torch_index_cu13 - torch==2.10.0+cu130 diff --git a/python/pylibwholegraph/pylibwholegraph/torch/gnn_model.py b/python/pylibwholegraph/pylibwholegraph/torch/gnn_model.py index a0422b89..c6e2813a 100644 --- a/python/pylibwholegraph/pylibwholegraph/torch/gnn_model.py +++ b/python/pylibwholegraph/pylibwholegraph/torch/gnn_model.py @@ -209,6 +209,4 @@ def __init__( node_embedding: WholeMemoryEmbedding, args, ): - raise ModuleNotFoundError( - "HomoGNNModel requires 'torch' to be installed." - ) + raise ModuleNotFoundError("HomoGNNModel requires 'torch' to be installed.") From 2c3d0d03e9b2b2dd38ed41773483d4e9aa77a993 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Thu, 12 Mar 2026 21:53:14 -0500 Subject: [PATCH 34/43] Update ci/validate_wheel.sh Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com> --- ci/validate_wheel.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/validate_wheel.sh b/ci/validate_wheel.sh index 88ba85aa..a759fb7b 100755 --- a/ci/validate_wheel.sh +++ b/ci/validate_wheel.sh @@ -50,7 +50,7 @@ WHEEL_FILE="$(echo ${wheel_dir_relative_path}/*.whl)" # NOTE: group of specifiers after 'torch' to avoid a false positive like 'torch-geometric' # Use '|| true' so grep not finding any matches (exit 1) does not kill the script under set -e unzip -p "${WHEEL_FILE}" '*.dist-info/METADATA' \ -| grep -E '^Requires-Dist:.*torch[><=!~ ]+.*' \ +| grep -E '^Requires-Dist:.*\btorch\b([><=!~ ].*)?' \ | tee matches.txt || true if [[ -s ./matches.txt ]]; then From 40cdfa875858a1cd2bd9ac9a1e5d10944b427381 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Thu, 12 Mar 2026 21:53:40 -0500 Subject: [PATCH 35/43] Update pyproject.toml Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com> --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 97a06025..6c8d15f5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,7 @@ select = [ ] [tool.ruff.lint.flake8-tidy-imports.banned-api] -"torch".msg = "Use the 'torch' fixture instead of 'import torch' in tests (see conftest.py)." +"torch".msg = "Use 'import_optional(\"torch\")' in library code, or the 'torch' pytest fixture in test code (see conftest.py), instead of 'import torch'." [tool.ruff.lint.per-file-ignores] # allow importing 'torch' directly in cugraph-pyg examples From 41c5277d04a5acb88bf6f0e1c6c8b248d066b237 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Thu, 12 Mar 2026 22:03:30 -0500 Subject: [PATCH 36/43] Apply suggestion from @jameslamb --- ci/validate_wheel.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/validate_wheel.sh b/ci/validate_wheel.sh index a759fb7b..88ba85aa 100755 --- a/ci/validate_wheel.sh +++ b/ci/validate_wheel.sh @@ -50,7 +50,7 @@ WHEEL_FILE="$(echo ${wheel_dir_relative_path}/*.whl)" # NOTE: group of specifiers after 'torch' to avoid a false positive like 'torch-geometric' # Use '|| true' so grep not finding any matches (exit 1) does not kill the script under set -e unzip -p "${WHEEL_FILE}" '*.dist-info/METADATA' \ -| grep -E '^Requires-Dist:.*\btorch\b([><=!~ ].*)?' \ +| grep -E '^Requires-Dist:.*torch[><=!~ ]+.*' \ | tee matches.txt || true if [[ -s ./matches.txt ]]; then From eed447c9b15273338605078911ef8e2633ba39a1 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Fri, 13 Mar 2026 14:54:14 -0500 Subject: [PATCH 37/43] one more import --- python/pylibwholegraph/pylibwholegraph/torch/wholegraph_env.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/pylibwholegraph/pylibwholegraph/torch/wholegraph_env.py b/python/pylibwholegraph/pylibwholegraph/torch/wholegraph_env.py index f4981d06..dff09220 100644 --- a/python/pylibwholegraph/pylibwholegraph/torch/wholegraph_env.py +++ b/python/pylibwholegraph/pylibwholegraph/torch/wholegraph_env.py @@ -10,6 +10,7 @@ from .utils import wholememory_dtype_to_torch_dtype, torch_dtype_to_wholememory_dtype torch = import_optional("torch") +torch_utils = import_optional("torch.utils") default_wholegraph_env_context = None torch_cpp_ext_loaded = False @@ -192,7 +193,7 @@ def compile_cpp_extension(): extra_ldflags.append( "".join(["-L", os.path.join(os.environ["LIBWHOLEGRAPH_DIR"], "lib")]) ) - torch.utils.cpp_extension.load( + torch_utils.cpp_extension.load( name="pylibwholegraph.pylibwholegraph_torch_ext", sources=[ os.path.join(cpp_extension_path, "wholegraph_torch_ext.cpp"), From 79a6efecf918403f568c5e356b00d73bd4ddf2af Mon Sep 17 00:00:00 2001 From: James Lamb Date: Fri, 13 Mar 2026 15:47:49 -0500 Subject: [PATCH 38/43] fix --- .../pylibwholegraph/pylibwholegraph/torch/wholegraph_env.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/pylibwholegraph/pylibwholegraph/torch/wholegraph_env.py b/python/pylibwholegraph/pylibwholegraph/torch/wholegraph_env.py index dff09220..d9c90a5e 100644 --- a/python/pylibwholegraph/pylibwholegraph/torch/wholegraph_env.py +++ b/python/pylibwholegraph/pylibwholegraph/torch/wholegraph_env.py @@ -10,7 +10,7 @@ from .utils import wholememory_dtype_to_torch_dtype, torch_dtype_to_wholememory_dtype torch = import_optional("torch") -torch_utils = import_optional("torch.utils") +torch_utils_cpp_ext = import_optional("torch.utils.cpp_extension") default_wholegraph_env_context = None torch_cpp_ext_loaded = False @@ -193,7 +193,7 @@ def compile_cpp_extension(): extra_ldflags.append( "".join(["-L", os.path.join(os.environ["LIBWHOLEGRAPH_DIR"], "lib")]) ) - torch_utils.cpp_extension.load( + torch_utils_cpp_ext.load( name="pylibwholegraph.pylibwholegraph_torch_ext", sources=[ os.path.join(cpp_extension_path, "wholegraph_torch_ext.cpp"), From a61a427b35e5e31b749e57ff632941293ee287d6 Mon Sep 17 00:00:00 2001 From: Alexandria Barghi Date: Mon, 16 Mar 2026 15:29:34 -0700 Subject: [PATCH 39/43] make optional imports lazy --- .../pylibwholegraph/utils/imports.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/python/pylibwholegraph/pylibwholegraph/utils/imports.py b/python/pylibwholegraph/pylibwholegraph/utils/imports.py index 67be22fe..c0a9851e 100644 --- a/python/pylibwholegraph/pylibwholegraph/utils/imports.py +++ b/python/pylibwholegraph/pylibwholegraph/utils/imports.py @@ -21,6 +21,18 @@ def __getattr__(self, attr): raise RuntimeError(f"This feature requires the '{self.name}' package/module") +class FoundModule: + def __init__(self, mod): + self.mod = mod + self.imported = False + + def __getattr__(self, attr): + if not self.imported: + self.mod = import_module(self.mod) + self.imported = True + return getattr(self.mod, attr) + + def import_optional(mod, default_mod_class=MissingModule): """ import the "optional" module 'mod' and return the module object or object. @@ -41,6 +53,6 @@ def import_optional(mod, default_mod_class=MissingModule): RuntimeError: This feature requires the 'torch' package/module """ try: - return import_module(mod) + return FoundModule(mod) except ModuleNotFoundError: return default_mod_class(mod_name=mod) From 96201b6c96227d68bdc00c8b6cd1901ee3c93458 Mon Sep 17 00:00:00 2001 From: Alexandria Barghi Date: Mon, 16 Mar 2026 15:54:38 -0700 Subject: [PATCH 40/43] fix module check - meant to change to use find_spec --- python/pylibwholegraph/pylibwholegraph/utils/imports.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/pylibwholegraph/pylibwholegraph/utils/imports.py b/python/pylibwholegraph/pylibwholegraph/utils/imports.py index c0a9851e..9b9ec65b 100644 --- a/python/pylibwholegraph/pylibwholegraph/utils/imports.py +++ b/python/pylibwholegraph/pylibwholegraph/utils/imports.py @@ -2,6 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 from importlib import import_module +from importlib.util import find_spec class MissingModule: @@ -52,7 +53,7 @@ def import_optional(mod, default_mod_class=MissingModule): ... RuntimeError: This feature requires the 'torch' package/module """ - try: + if find_spec(mod) is not None: return FoundModule(mod) - except ModuleNotFoundError: + else: return default_mod_class(mod_name=mod) From 456857ae02e36f02aad06b2bc251bb729a5f30d0 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Tue, 17 Mar 2026 10:45:02 -0500 Subject: [PATCH 41/43] handle dotted imports, make ruff selections explicit --- .pre-commit-config.yaml | 3 +++ pyproject.toml | 6 +++++ .../cugraph-pyg/cugraph_pyg/utils/imports.py | 27 ++++++++++++++++--- .../pylibwholegraph/_doctor_check.py | 2 +- .../pylibwholegraph/utils/imports.py | 10 ++++++- 5 files changed, 43 insertions(+), 5 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index cb393465..f284c3df 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -28,6 +28,9 @@ repos: - id: yesqa additional_dependencies: - flake8==7.1.1 + exclude: | + (?x) + python/pylibwholegraph/pylibwholegraph/_doctor_check[.]py$ - repo: https://github.com/pre-commit/mirrors-clang-format rev: v20.1.4 hooks: diff --git a/pyproject.toml b/pyproject.toml index 6c8d15f5..5662e9f0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,6 +13,12 @@ ignore = [ "E203" ] select = [ + # (pycodestyle) + "E4", + "E7", + "E9", + # (pyflakes) + "F", # (flake8-tidy-imports) banned-api "TID251" ] diff --git a/python/cugraph-pyg/cugraph_pyg/utils/imports.py b/python/cugraph-pyg/cugraph_pyg/utils/imports.py index b4e4df42..270b2eca 100644 --- a/python/cugraph-pyg/cugraph_pyg/utils/imports.py +++ b/python/cugraph-pyg/cugraph_pyg/utils/imports.py @@ -1,8 +1,9 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from packaging.requirements import Requirement from importlib import import_module +from importlib.util import find_spec def package_available(requirement: str) -> bool: @@ -39,6 +40,18 @@ def __getattr__(self, attr): raise RuntimeError(f"This feature requires the {self.name} package/module") +class FoundModule: + def __init__(self, mod): + self.mod = mod + self.imported = False + + def __getattr__(self, attr): + if not self.imported: + self.mod = import_module(self.mod) + self.imported = True + return getattr(self.mod, attr) + + def import_optional(mod, default_mod_class=MissingModule): """ import the "optional" module 'mod' and return the module object or object. @@ -80,7 +93,15 @@ def import_optional(mod, default_mod_class=MissingModule): >> """ + # this try-except is necessary to handle dotted imports, + # like `import_optional("torch.autograd")` + mod_found = False try: - return import_module(mod) - except ModuleNotFoundError: + mod_found = find_spec(mod) is not None + except ImportError: + mod_found = False + + if mod_found: + return FoundModule(mod) + else: return default_mod_class(mod_name=mod) diff --git a/python/pylibwholegraph/pylibwholegraph/_doctor_check.py b/python/pylibwholegraph/pylibwholegraph/_doctor_check.py index 33ac107d..a76e8483 100644 --- a/python/pylibwholegraph/pylibwholegraph/_doctor_check.py +++ b/python/pylibwholegraph/pylibwholegraph/_doctor_check.py @@ -27,7 +27,7 @@ def pylibwholegraph_smoke_check(**kwargs): ) try: - import torch + import torch # noqa: TID251 assert torch.cuda.is_available() diff --git a/python/pylibwholegraph/pylibwholegraph/utils/imports.py b/python/pylibwholegraph/pylibwholegraph/utils/imports.py index 9b9ec65b..564a6b97 100644 --- a/python/pylibwholegraph/pylibwholegraph/utils/imports.py +++ b/python/pylibwholegraph/pylibwholegraph/utils/imports.py @@ -53,7 +53,15 @@ def import_optional(mod, default_mod_class=MissingModule): ... RuntimeError: This feature requires the 'torch' package/module """ - if find_spec(mod) is not None: + # this try-except is necessary to handle dotted imports, + # like `import_optional("torch.autograd")` + mod_found = False + try: + mod_found = find_spec(mod) is not None + except ImportError: + mod_found = False + + if mod_found: return FoundModule(mod) else: return default_mod_class(mod_name=mod) From 0afcdd767e099fb961c83c93bbae96ea31c16b32 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Tue, 17 Mar 2026 11:52:01 -0500 Subject: [PATCH 42/43] more import-time patching --- python/cugraph-pyg/cugraph_pyg/data/feature_store.py | 7 +++++-- python/cugraph-pyg/cugraph_pyg/data/graph_store.py | 5 ++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/python/cugraph-pyg/cugraph_pyg/data/feature_store.py b/python/cugraph-pyg/cugraph_pyg/data/feature_store.py index ba2081ca..fd645cb7 100644 --- a/python/cugraph-pyg/cugraph_pyg/data/feature_store.py +++ b/python/cugraph-pyg/cugraph_pyg/data/feature_store.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 import warnings @@ -18,9 +18,12 @@ wgth = import_optional("pylibwholegraph.torch") +# If 'torch_geometric' is available but 'torch' is not, accessing +# 'torch_geometric.data.GraphStore' will fail because `torch_geometric` +# unconditionally imports 'torch'... so need to check that both are available. class FeatureStore( object - if isinstance(torch_geometric, MissingModule) + if (isinstance(torch_geometric, MissingModule) or isinstance(torch, MissingModule)) else torch_geometric.data.FeatureStore ): """ diff --git a/python/cugraph-pyg/cugraph_pyg/data/graph_store.py b/python/cugraph-pyg/cugraph_pyg/data/graph_store.py index eada6a61..7a522912 100644 --- a/python/cugraph-pyg/cugraph_pyg/data/graph_store.py +++ b/python/cugraph-pyg/cugraph_pyg/data/graph_store.py @@ -30,9 +30,12 @@ ] +# If 'torch_geometric' is available but 'torch' is not, accessing +# 'torch_geometric.data.GraphStore' will fail because `torch_geometric` +# unconditionally imports 'torch'... so need to check that both are available. class GraphStore( object - if isinstance(torch_geometric, MissingModule) + if (isinstance(torch_geometric, MissingModule) or isinstance(torch, MissingModule)) else torch_geometric.data.GraphStore ): """ From 27f8fdd56ced43cfbb7a417558b0f148dc060d56 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Tue, 17 Mar 2026 14:10:17 -0500 Subject: [PATCH 43/43] remove unnecessary CUDA_MAJOR --- ci/test_wheel_cugraph-pyg.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/ci/test_wheel_cugraph-pyg.sh b/ci/test_wheel_cugraph-pyg.sh index dc0d799b..54c425d1 100755 --- a/ci/test_wheel_cugraph-pyg.sh +++ b/ci/test_wheel_cugraph-pyg.sh @@ -33,7 +33,6 @@ TORCH_WHEEL_DIR="$(mktemp -d)" # 'cugraph-pyg' is still expected to be importable # and testable in an environment where 'torch' isn't installed. -CUDA_MAJOR="${RAPIDS_CUDA_VERSION%%.*}" torch_downloaded=true if [ -z "$(ls -A ${TORCH_WHEEL_DIR} 2>/dev/null)" ]; then rapids-echo-stderr "No 'torch' wheels downloaded."