From 13825b5e34d5554fdcc9c77b3832eda31a149301 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Mon, 9 Mar 2026 09:14:25 -0500
Subject: [PATCH 01/43] ensure 'torch' CUDA wheels are installed in CI

---
 .gitignore                                    |   2 +
 ci/download-torch-wheels.sh                   |  40 +++++
 ci/test_wheel_pylibwholegraph.sh              |   4 +-
 .../all_cuda-129_arch-aarch64.yaml            |   2 +
 .../all_cuda-129_arch-x86_64.yaml             |   2 +
 .../all_cuda-131_arch-aarch64.yaml            |   2 +
 .../all_cuda-131_arch-x86_64.yaml             |   2 +
 conda/recipes/cugraph-pyg/recipe.yaml         |   2 +-
 dependencies.yaml                             | 146 +++++++++++-------
 python/cugraph-pyg/pyproject.toml             |   2 +-
 python/pylibwholegraph/pyproject.toml         |   1 +
 11 files changed, 147 insertions(+), 58 deletions(-)
 create mode 100755 ci/download-torch-wheels.sh

diff --git a/.gitignore b/.gitignore
index 1ccc2780..8d4f88e2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -40,6 +40,8 @@ wheels/
 wheelhouse/
 _skbuild/
 cufile.log
+*.tar.gz
+*.whl
 
 ## Patching
 *.diff
diff --git a/ci/download-torch-wheels.sh b/ci/download-torch-wheels.sh
new file mode 100755
index 00000000..7313a006
--- /dev/null
+++ b/ci/download-torch-wheels.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# [description]
+#
+#   Downloads a CUDA variant of 'torch' from the correct index, based on CUDA major version.
+#
+#   This exists to avoid using 'pip --extra-index-url', which has these undesirable properties:
+#
+#     - allows for CPU-only 'torch' to be downloaded from pypi.org
+#     - allows for other non-torch packages like 'numpy' to be downloaded from the PyTorch indices
+#     - increases solve complexity for 'pip'
+#
+
+set -e -u -o pipefail
+
+TORCH_WHEEL_DIR="${1}"
+
+# Ensure CUDA-enabled 'torch' packages are always used.
+#
+# Downloading + passing the downloaded file as a requirement forces the use of this
+# package and ensures 'pip' considers all of its requirements.
+#
+# Not appending this to PIP_CONSTRAINT, because we don't want the torch '--extra-index-url'
+# to leak outside of this script into other 'pip {download,install}'' calls.
+rapids-dependency-file-generator \
+    --output requirements \
+    --file-key "torch_only" \
+    --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};dependencies=${RAPIDS_DEPENDENCIES};require_gpu_pytorch=true" \
+| tee ./torch-constraints.txt
+
+rapids-pip-retry download \
+  --isolated \
+  --prefer-binary \
+  --no-deps \
+  -d "${TORCH_WHEEL_DIR}" \
+  --constraint "${PIP_CONSTRAINT}" \
+  --constraint ./torch-constraints.txt \
+  'torch'
diff --git a/ci/test_wheel_pylibwholegraph.sh b/ci/test_wheel_pylibwholegraph.sh
index 252c6cfa..ac065e68 100755
--- a/ci/test_wheel_pylibwholegraph.sh
+++ b/ci/test_wheel_pylibwholegraph.sh
@@ -2,9 +2,7 @@
 # SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
-set -e          # abort the script on error
-set -o pipefail # piped commands propagate their error
-set -E          # ERR traps are inherited by subcommands
+set -euo pipefail
 
 # Delete system libnccl.so to ensure the wheel is used.
 # (but only do this in CI, to avoid breaking local dev environments)
diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml
index efb98a8c..36a21955 100644
--- a/conda/environments/all_cuda-129_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-129_arch-aarch64.yaml
@@ -27,6 +27,7 @@ dependencies:
 - nccl>=2.19
 - ninja
 - numpy>=1.23,<3.0
+- ogb
 - packaging
 - pandas
 - pre-commit
@@ -42,6 +43,7 @@ dependencies:
 - rmm==26.4.*,>=0.0.0a0
 - scikit-build-core>=0.11.0
 - scipy
+- sentence-transformers
 - setuptools>=77.0.0
 - torchdata
 - wheel
diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml
index cc7de24e..1085bba4 100644
--- a/conda/environments/all_cuda-129_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-129_arch-x86_64.yaml
@@ -27,6 +27,7 @@ dependencies:
 - nccl>=2.19
 - ninja
 - numpy>=1.23,<3.0
+- ogb
 - packaging
 - pandas
 - pre-commit
@@ -42,6 +43,7 @@ dependencies:
 - rmm==26.4.*,>=0.0.0a0
 - scikit-build-core>=0.11.0
 - scipy
+- sentence-transformers
 - setuptools>=77.0.0
 - torchdata
 - wheel
diff --git a/conda/environments/all_cuda-131_arch-aarch64.yaml b/conda/environments/all_cuda-131_arch-aarch64.yaml
index 65502230..0ebe16a4 100644
--- a/conda/environments/all_cuda-131_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-131_arch-aarch64.yaml
@@ -27,6 +27,7 @@ dependencies:
 - nccl>=2.19
 - ninja
 - numpy>=1.23,<3.0
+- ogb
 - packaging
 - pandas
 - pre-commit
@@ -42,6 +43,7 @@ dependencies:
 - rmm==26.4.*,>=0.0.0a0
 - scikit-build-core>=0.11.0
 - scipy
+- sentence-transformers
 - setuptools>=77.0.0
 - torchdata
 - wheel
diff --git a/conda/environments/all_cuda-131_arch-x86_64.yaml b/conda/environments/all_cuda-131_arch-x86_64.yaml
index f8bf95a2..58f1fe21 100644
--- a/conda/environments/all_cuda-131_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-131_arch-x86_64.yaml
@@ -27,6 +27,7 @@ dependencies:
 - nccl>=2.19
 - ninja
 - numpy>=1.23,<3.0
+- ogb
 - packaging
 - pandas
 - pre-commit
@@ -42,6 +43,7 @@ dependencies:
 - rmm==26.4.*,>=0.0.0a0
 - scikit-build-core>=0.11.0
 - scipy
+- sentence-transformers
 - setuptools>=77.0.0
 - torchdata
 - wheel
diff --git a/conda/recipes/cugraph-pyg/recipe.yaml b/conda/recipes/cugraph-pyg/recipe.yaml
index 3243a6c8..ea70f7b9 100644
--- a/conda/recipes/cugraph-pyg/recipe.yaml
+++ b/conda/recipes/cugraph-pyg/recipe.yaml
@@ -40,7 +40,7 @@ requirements:
     # This is intentionally spelled 'pytorch' (not 'pytorch-gpu' and not using build string selectors)
     # because we want it to be possible to at least install `cugraph-pyg` in an environment without a GPU,
     # to support use cases like building container images.
-    - pytorch >=2.3
+    - pytorch >=2.6
     - pytorch_geometric >=2.5,<2.8
 
 tests:
diff --git a/dependencies.yaml b/dependencies.yaml
index 1f10f263..1848ed1b 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -31,6 +31,7 @@ files:
       - rapids_build_skbuild
       - test_cpp
       - test_python_common
+      - test_python_cugraph_pyg
       - test_python_pylibwholegraph
   checks:
     output: none
@@ -60,10 +61,9 @@ files:
       - depends_on_cudf
       - depends_on_pytorch
       - depends_on_cuml
-      - depends_on_ogb
-      - depends_on_sentence_transformers
       - py_version
       - test_python_common
+      - test_pythong_cugraph_pyg
       - depends_on_pylibwholegraph
       - depends_on_cugraph_pyg
   test_pylibwholegraph:
@@ -76,6 +76,10 @@ files:
       - test_python_common
       - depends_on_pylibwholegraph
       - test_python_pylibwholegraph
+  torch_only:
+    output: none
+    includes:
+      - depends_on_pytorch
   py_build_libwholegraph:
     output: pyproject
     pyproject_dir: python/libwholegraph
@@ -165,9 +169,8 @@ files:
       - depends_on_pytorch
       - depends_on_cuml
       - depends_on_cugraph
-      - depends_on_ogb
-      - depends_on_sentence_transformers
       - test_python_common
+      - test_python_cugraph_pyg
   cugraph_pyg_dev:
     matrix:
       cuda: ["12.9", "13.1"]
@@ -298,7 +301,7 @@ dependencies:
       - output_types: [conda, pyproject, requirements]
         packages:
           - *numpy
-          - packaging
+          - &packaging packaging
           - pandas
   rapids_build_skbuild:
     common:
@@ -331,104 +334,141 @@ dependencies:
           - pytest-benchmark
           - pytest-cov
           - pytest-xdist
+    specific:
+      - output_types: [conda]
+        matrices:
+          - matrix:
+              no_pytorch: "true"
+            packages:
+          - matrix:
+            packages:
+              - torchdata
+              - pydantic
+  test_python_cugraph_pyg:
+    specific:
+      - output_types: [conda, requirements, pyproject]
+        matrices:
+          - matrix:
+              no_pytorch: "true"
+            packages:
+          - matrix:
+            packages:
+              - ogb
+              - sentence-transformers
   test_python_pylibwholegraph:
     common:
       - output_types: [conda, pyproject, requirements]
         packages:
+          - *packaging
           - pytest-forked
           - scipy
   depends_on_pytorch:
-    common:
-      - output_types: [conda]
-        packages:
-          - torchdata
-          - pydantic
     specific:
-      - output_types: [requirements]
+      # conda: choose between GPU and CPU-only pytorch
+      - output_types: conda
         matrices:
           - matrix:
               no_pytorch: "true"
             packages:
           - matrix:
-              cuda: "12.*"
+              require_gpu_pytorch: "true"
             packages:
-              - --extra-index-url=https://download.pytorch.org/whl/cu126
-          - matrix:
-              cuda: "13.*"
-            packages:
-              - --extra-index-url=https://download.pytorch.org/whl/cu130
+              - pytorch-gpu>=2.6
+          # Default to falling back to whatever 'pytorch' is pulled in via cugraph-pyg's dependencies.
           - matrix:
             packages:
-      - output_types: [requirements, pyproject]
+      # conda: optionally pins to older versions based on 'dependencies' key
+      - output_types: conda
         matrices:
           - matrix:
               no_pytorch: "true"
             packages:
           - matrix:
               cuda: "12.*"
+              dependencies: "oldest"
             packages:
-              - torch>=2.3
+              - pytorch==2.6
           - matrix:
               cuda: "13.*"
+              dependencies: "oldest"
             packages:
-              - &pytorch_pip torch>=2.9.0
+              - pytorch==2.9
+          # Default to falling back to whatever 'pytorch' is pulled in via cugraph-pyg's dependencies.
           - matrix:
             packages:
-              - *pytorch_pip
-      - output_types: [conda]
+      - output_types: pyproject
         matrices:
-          # Prevent fallback to CPU-only pytorch when we want a CUDA variant.
+          # avoid pulling in 'torch' in places like DLFW builds that prefer to install it other ways
           - matrix:
-              require_gpu: "true"
+              no_pytorch: "true"
             packages:
-              - pytorch-gpu
-          # Default to falling back to whatever 'pytorch' is pulled in via cugraph-pyg's dependencies.
           - matrix:
             packages:
-  depends_on_nccl:
-    common:
-      - output_types: conda
-        packages:
-          - nccl>=2.19
-    specific:
-      - output_types: [pyproject, requirements]
+              - &pytorch_pip torch>=2.6
+      # wheels: handle GPU vs. CPU and version pinning together
+      #
+      # The 'pytorch.org' indices referenced in --extra-index-url below host CPU-only variants too,
+      # so requirements like '>=' are not safe.
+      #
+      # Using '==' and a version with the CUDA specifier like '+cu130' is the most reliable way to ensure
+      # the packages we want are pulled (at the expense of needing to maintain this list).
+      #
+      # 'torch' tightly pins wheels to a single {major}.{minor} CTK version.
+      #
+      # This list only contains entries exactly matching CUDA {major}.{minor} that we test in RAPIDS CI,
+      # to ensure a loud error alerts us to the need to update this list (or CI scripts) when new
+      # CTKs are added to the support matrix.
+      - output_types: requirements
         matrices:
+          # avoid pulling in 'torch' in places like DLFW builds that prefer to install it other ways
           - matrix:
-              cuda: "12.*"
-              cuda_suffixed: "true"
+              no_pytorch: "true"
             packages:
-              - nvidia-nccl-cu12>=2.19
+          # matrices below ensure CUDA 'torch' packages are used
           - matrix:
+              cuda: "12.9"
+              dependencies: "oldest"
+              require_gpu_pytorch: "true"
             packages:
-  depends_on_ogb:
-    common:
-      - output_types: [conda]
-        packages:
-          - ogb
-    specific:
-      - output_types: [requirements, pyproject]
-        matrices:
+              - &torch_cu129_index --extra-index-url=https://download.pytorch.org/whl/cu129
+              - torch==2.8.0+cu129
           - matrix:
-              no_pytorch: "true"
+              cuda: "12.9"
+              require_gpu_pytorch: "true"
             packages:
+              - *torch_cu129_index
+              - torch==2.10.0+cu129
           - matrix:
+              cuda: "13.0"
+              dependencies: "oldest"
+              require_gpu_pytorch: "true"
             packages:
-              - ogb
-  # for MovieLens example
-  depends_on_sentence_transformers:
+              - &torch_index_cu13 --extra-index-url=https://download.pytorch.org/whl/cu130
+              - torch==2.8.0+cu130
+          - matrix:
+              cuda: "13.0"
+              require_gpu_pytorch: "true"
+            packages:
+              - *torch_index_cu13
+              - torch==2.10.0+cu130
+          - matrix:
+            packages:
+              - *pytorch_pip
+  depends_on_nccl:
     common:
-      - output_types: [conda]
+      - output_types: conda
         packages:
-          - sentence-transformers
+          - nccl>=2.19
     specific:
-      - output_types: [requirements, pyproject]
+      - output_types: [pyproject, requirements]
         matrices:
           - matrix:
-              no_pytorch: "true"
+              cuda: "12.*"
+              cuda_suffixed: "true"
             packages:
+              - nvidia-nccl-cu12>=2.19
           - matrix:
             packages:
-              - sentence-transformers
   depends_on_pyg:
     common:
       - output_types: [conda]
diff --git a/python/cugraph-pyg/pyproject.toml b/python/cugraph-pyg/pyproject.toml
index 1013f7af..39ef7d2d 100644
--- a/python/cugraph-pyg/pyproject.toml
+++ b/python/cugraph-pyg/pyproject.toml
@@ -55,7 +55,7 @@ test = [
     "pytest-cov",
     "pytest-xdist",
     "sentence-transformers",
-    "torch>=2.9.0",
+    "torch>=2.6",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 
 [tool.setuptools.dynamic]
diff --git a/python/pylibwholegraph/pyproject.toml b/python/pylibwholegraph/pyproject.toml
index dd4dda49..4682c260 100644
--- a/python/pylibwholegraph/pyproject.toml
+++ b/python/pylibwholegraph/pyproject.toml
@@ -33,6 +33,7 @@ dependencies = [
 
 [project.optional-dependencies]
 test = [
+    "packaging",
     "pytest",
     "pytest-benchmark",
     "pytest-cov",

From 698f1152e62105cee0aca0b53738343f7d21700b Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Mon, 9 Mar 2026 09:22:23 -0500
Subject: [PATCH 02/43] help git understand the diff

---
 dependencies.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dependencies.yaml b/dependencies.yaml
index 1848ed1b..8a2d6f52 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -456,11 +456,11 @@ dependencies:
               - *pytorch_pip
   depends_on_nccl:
     common:
-      - output_types: conda
+      - output_types: [conda]
         packages:
           - nccl>=2.19
     specific:
-      - output_types: [pyproject, requirements]
+      - output_types: [requirements, pyproject]
         matrices:
           - matrix:
               cuda: "12.*"

From 1c457b8f7970466922423c8ac549740eb55ae6e6 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Mon, 9 Mar 2026 10:24:08 -0500
Subject: [PATCH 03/43] use rapids-generate-pip-constraints, fix typo

---
 ci/test_wheel_cugraph-pyg.sh     | 18 +++++++++---------
 ci/test_wheel_pylibwholegraph.sh | 18 ++++++++++--------
 dependencies.yaml                |  2 +-
 3 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/ci/test_wheel_cugraph-pyg.sh b/ci/test_wheel_cugraph-pyg.sh
index c05e18af..15a41602 100755
--- a/ci/test_wheel_cugraph-pyg.sh
+++ b/ci/test_wheel_cugraph-pyg.sh
@@ -15,13 +15,12 @@ LIBWHOLEGRAPH_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_
 PYLIBWHOLEGRAPH_WHEELHOUSE=$(rapids-download-from-github "$(rapids-package-name "wheel_python" pylibwholegraph --stable --cuda "$RAPIDS_CUDA_VERSION")")
 CUGRAPH_PYG_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-github python)
 
-CUDA_MAJOR="${RAPIDS_CUDA_VERSION%%.*}"
+# generate constraints (possibly pinning to oldest support versions of dependencies)
+rapids-generate-pip-constraints test_cugraph_pyg "${PIP_CONSTRAINT}"
 
-if [[ "${CUDA_MAJOR}" == "12" ]]; then
-  PYTORCH_INDEX="https://download.pytorch.org/whl/cu126"
-else
-  PYTORCH_INDEX="https://download.pytorch.org/whl/cu130"
-fi
+# ensure a CUDA variant of 'torch' is used
+TORCH_WHEEL_DIR="$(mktemp -d)"
+./ci/download-torch-wheels.sh "${TORCH_WHEEL_DIR}"
 
 # notes:
 #
@@ -30,12 +29,13 @@ fi
 #     its dependencies are available from pypi.org
 #
 rapids-pip-retry install \
-    -v \
-    --extra-index-url "${PYTORCH_INDEX}" \
+    --prefer-binary \
+    --constraint "${PIP_CONSTRAINT}" \
     --extra-index-url 'https://pypi.nvidia.com' \
     "${LIBWHOLEGRAPH_WHEELHOUSE}"/*.whl \
     "$(echo "${PYLIBWHOLEGRAPH_WHEELHOUSE}"/pylibwholegraph_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)" \
-    "$(echo "${CUGRAPH_PYG_WHEELHOUSE}"/cugraph_pyg_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)[test]"
+    "$(echo "${CUGRAPH_PYG_WHEELHOUSE}"/cugraph_pyg_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)[test]" \
+    "${TORCH_WHEEL_DIR}"/torch-*.whl
 
 # RAPIDS_DATASET_ROOT_DIR is used by test scripts
 export RAPIDS_DATASET_ROOT_DIR="$(realpath datasets)"
diff --git a/ci/test_wheel_pylibwholegraph.sh b/ci/test_wheel_pylibwholegraph.sh
index ac065e68..33a857db 100755
--- a/ci/test_wheel_pylibwholegraph.sh
+++ b/ci/test_wheel_pylibwholegraph.sh
@@ -16,23 +16,25 @@ RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
 LIBWHOLEGRAPH_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github cpp)
 PYLIBWHOLEGRAPH_WHEELHOUSE=$(rapids-download-from-github "$(rapids-package-name "wheel_python" pylibwholegraph --stable --cuda "$RAPIDS_CUDA_VERSION")")
 
-# determine pytorch source
-if [[ "${CUDA_MAJOR}" == "12" ]]; then
-  PYTORCH_INDEX="https://download.pytorch.org/whl/cu126"
-else
-  PYTORCH_INDEX="https://download.pytorch.org/whl/cu130"
-fi
 RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"}
 RAPIDS_COVERAGE_DIR=${RAPIDS_COVERAGE_DIR:-"${PWD}/coverage-results"}
 mkdir -p "${RAPIDS_TESTS_DIR}" "${RAPIDS_COVERAGE_DIR}"
 
+# generate constraints (possibly pinning to oldest support versions of dependencies)
+rapids-generate-pip-constraints test_pylibwholegraph "${PIP_CONSTRAINT}"
+
+# ensure a CUDA variant of 'torch' is used
+TORCH_WHEEL_DIR="$(mktemp -d)"
+./ci/download-torch-wheels.sh "${TORCH_WHEEL_DIR}"
+
 # echo to expand wildcard before adding `[extra]` requires for pip
 rapids-logger "Installing Packages"
 rapids-pip-retry install \
-    --extra-index-url ${PYTORCH_INDEX} \
+    --prefer-binary \
+    --constraint "${PIP_CONSTRAINT}" \
     "$(echo "${PYLIBWHOLEGRAPH_WHEELHOUSE}"/pylibwholegraph*.whl)[test]" \
     "${LIBWHOLEGRAPH_WHEELHOUSE}"/*.whl \
-    'torch>=2.3'
+    "${TORCH_WHEEL_DIR}"/torch-*.whl
 
 rapids-logger "pytest pylibwholegraph"
 cd python/pylibwholegraph/pylibwholegraph/tests
diff --git a/dependencies.yaml b/dependencies.yaml
index 8a2d6f52..568f4a4e 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -63,7 +63,7 @@ files:
       - depends_on_cuml
       - py_version
       - test_python_common
-      - test_pythong_cugraph_pyg
+      - test_python_cugraph_pyg
       - depends_on_pylibwholegraph
       - depends_on_cugraph_pyg
   test_pylibwholegraph:

From 066d5c4787889ca6707c7eb107886f92431d05d0 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Mon, 9 Mar 2026 11:41:49 -0500
Subject: [PATCH 04/43] handle the fallback case better, other fixes

---
 ci/download-torch-wheels.sh       | 12 ++++++++++++
 ci/test_python.sh                 |  4 ++--
 ci/test_wheel_cugraph-pyg.sh      | 27 +++++++++++++++++++--------
 ci/test_wheel_pylibwholegraph.sh  | 23 +++++++++++++++++------
 ci/validate_wheel.sh              | 22 ++++++++++++++++++++++
 dependencies.yaml                 | 11 +++++++----
 python/cugraph-pyg/pyproject.toml |  1 -
 7 files changed, 79 insertions(+), 21 deletions(-)

diff --git a/ci/download-torch-wheels.sh b/ci/download-torch-wheels.sh
index 7313a006..d2ecde66 100755
--- a/ci/download-torch-wheels.sh
+++ b/ci/download-torch-wheels.sh
@@ -17,6 +17,18 @@ set -e -u -o pipefail
 
 TORCH_WHEEL_DIR="${1}"
 
+# skip download attempt on CUDA versions where we know there isn't a 'torch' CUDA wheel.
+CUDA_MAJOR=$(echo "${RAPIDS_CUDA_VERSION}" | cut -d'.' -f1)
+CUDA_MINOR=$(echo "${RAPIDS_CUDA_VERSION}" | cut -d'.' -f2)
+if \
+    { [ "${CUDA_MAJOR}" -eq 12 ] && [ "${CUDA_MINOR}" -lt 9 ]; } \
+    || { [ "${CUDA_MAJOR}" -eq 13 ] && [ "${CUDA_MINOR}" -gt 0 ]; }; \
+    || [ "${CUDA_MAJOR}" -gt 13 ];
+then
+    rapids-logger "Skipping 'torch' wheel download. (requires CUDA 12.9+ or 13.0, found ${RAPIDS_CUDA_VERSION})"
+    exit 0
+fi
+
 # Ensure CUDA-enabled 'torch' packages are always used.
 #
 # Downloading + passing the downloaded file as a requirement forces the use of this
diff --git a/ci/test_python.sh b/ci/test_python.sh
index 09aeb27e..6b661236 100755
--- a/ci/test_python.sh
+++ b/ci/test_python.sh
@@ -37,7 +37,7 @@ if [[ "${RUNNER_ARCH}" != "ARM64" ]]; then
   rapids-dependency-file-generator \
     --output conda \
     --file-key test_cugraph_pyg \
-    --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};require_gpu=true" \
+    --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};require_gpu_pytorch=true" \
     --prepend-channel "${CPP_CHANNEL}" \
     --prepend-channel "${PYTHON_CHANNEL}" \
     --prepend-channel "${PYTHON_NOARCH_CHANNEL}" \
@@ -76,7 +76,7 @@ if [[ "${RUNNER_ARCH}" != "ARM64" ]]; then
   rapids-dependency-file-generator \
     --output conda \
     --file-key test_pylibwholegraph \
-    --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};require_gpu=true" \
+    --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};require_gpu_pytorch=true" \
     --prepend-channel "${CPP_CHANNEL}" \
     --prepend-channel "${PYTHON_CHANNEL}" \
   | tee env.yaml
diff --git a/ci/test_wheel_cugraph-pyg.sh b/ci/test_wheel_cugraph-pyg.sh
index 15a41602..60c03dc0 100755
--- a/ci/test_wheel_cugraph-pyg.sh
+++ b/ci/test_wheel_cugraph-pyg.sh
@@ -18,10 +18,27 @@ CUGRAPH_PYG_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_
 # generate constraints (possibly pinning to oldest support versions of dependencies)
 rapids-generate-pip-constraints test_cugraph_pyg "${PIP_CONSTRAINT}"
 
-# ensure a CUDA variant of 'torch' is used
+PIP_INSTALL_ARGS=(
+  --prefer-binary
+  --constraint "${PIP_CONSTRAINT}"
+  --extra-index-url 'https://pypi.nvidia.com'
+  "${LIBWHOLEGRAPH_WHEELHOUSE}"/*.whl
+  "$(echo "${PYLIBWHOLEGRAPH_WHEELHOUSE}"/pylibwholegraph_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)"
+  "$(echo "${CUGRAPH_PYG_WHEELHOUSE}"/cugraph_pyg_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)[test]"
+)
+
+# ensure a CUDA variant of 'torch' is used (if one is available)
 TORCH_WHEEL_DIR="$(mktemp -d)"
 ./ci/download-torch-wheels.sh "${TORCH_WHEEL_DIR}"
 
+# 'cugraph-pyg' is still expected to be importable
+# and testable in an environment where 'torch' isn't installed.
+if [ -z "$(ls -A ${TORCH_WHEEL_DIR} 2>/dev/null)" ]; then
+  rapids-echo-stderr "No 'torch' wheels downloaded."
+else
+  PIP_INSTALL_ARGS+=("${TORCH_WHEEL_DIR}"/torch-*.whl)
+fi
+
 # notes:
 #
 #   * echo to expand wildcard before adding `[extra]` requires for pip
@@ -29,13 +46,7 @@ TORCH_WHEEL_DIR="$(mktemp -d)"
 #     its dependencies are available from pypi.org
 #
 rapids-pip-retry install \
-    --prefer-binary \
-    --constraint "${PIP_CONSTRAINT}" \
-    --extra-index-url 'https://pypi.nvidia.com' \
-    "${LIBWHOLEGRAPH_WHEELHOUSE}"/*.whl \
-    "$(echo "${PYLIBWHOLEGRAPH_WHEELHOUSE}"/pylibwholegraph_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)" \
-    "$(echo "${CUGRAPH_PYG_WHEELHOUSE}"/cugraph_pyg_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)[test]" \
-    "${TORCH_WHEEL_DIR}"/torch-*.whl
+  "${PIP_INSTALL_ARGS[@]}"
 
 # RAPIDS_DATASET_ROOT_DIR is used by test scripts
 export RAPIDS_DATASET_ROOT_DIR="$(realpath datasets)"
diff --git a/ci/test_wheel_pylibwholegraph.sh b/ci/test_wheel_pylibwholegraph.sh
index 33a857db..e7b06090 100755
--- a/ci/test_wheel_pylibwholegraph.sh
+++ b/ci/test_wheel_pylibwholegraph.sh
@@ -23,18 +23,29 @@ mkdir -p "${RAPIDS_TESTS_DIR}" "${RAPIDS_COVERAGE_DIR}"
 # generate constraints (possibly pinning to oldest support versions of dependencies)
 rapids-generate-pip-constraints test_pylibwholegraph "${PIP_CONSTRAINT}"
 
-# ensure a CUDA variant of 'torch' is used
+PIP_INSTALL_ARGS=(
+  --prefer-binary
+  --constraint "${PIP_CONSTRAINT}"
+  "$(echo "${PYLIBWHOLEGRAPH_WHEELHOUSE}"/pylibwholegraph*.whl)[test]"
+  "${LIBWHOLEGRAPH_WHEELHOUSE}"/*.whl
+)
+
+# ensure a CUDA variant of 'torch' is used (if one is available)
 TORCH_WHEEL_DIR="$(mktemp -d)"
 ./ci/download-torch-wheels.sh "${TORCH_WHEEL_DIR}"
 
+# 'cugraph-pyg' is still expected to be importable
+# and testable in an environment where 'torch' isn't installed.
+if [ -z "$(ls -A ${TORCH_WHEEL_DIR} 2>/dev/null)" ]; then
+  rapids-echo-stderr "No 'torch' wheels downloaded."
+else
+  PIP_INSTALL_ARGS+=("${TORCH_WHEEL_DIR}"/torch-*.whl)
+fi
+
 # echo to expand wildcard before adding `[extra]` requires for pip
 rapids-logger "Installing Packages"
 rapids-pip-retry install \
-    --prefer-binary \
-    --constraint "${PIP_CONSTRAINT}" \
-    "$(echo "${PYLIBWHOLEGRAPH_WHEELHOUSE}"/pylibwholegraph*.whl)[test]" \
-    "${LIBWHOLEGRAPH_WHEELHOUSE}"/*.whl \
-    "${TORCH_WHEEL_DIR}"/torch-*.whl
+    "${PIP_INSTALL_ARGS[@]}"
 
 rapids-logger "pytest pylibwholegraph"
 cd python/pylibwholegraph/pylibwholegraph/tests
diff --git a/ci/validate_wheel.sh b/ci/validate_wheel.sh
index 42d0a8bf..944e8f76 100755
--- a/ci/validate_wheel.sh
+++ b/ci/validate_wheel.sh
@@ -9,6 +9,10 @@ wheel_dir_relative_path=$2
 
 RAPIDS_CUDA_MAJOR="${RAPIDS_CUDA_VERSION%%.*}"
 
+python -m pip install \
+    --prefer-binary \
+    'pkginfo>=1.12.1.2'
+
 cd "${package_dir}"
 
 rapids-logger "validate packages with 'pydistcheck'"
@@ -43,3 +47,21 @@ rapids-logger "validate packages with 'twine'"
 twine check \
     --strict \
     "$(echo ${wheel_dir_relative_path}/*.whl)"
+
+rapids-logger "validating that the wheel doesn't depend on 'torch' (even in an extra)"
+WHEEL_FILE="$(${wheel_dir_relative_path}/*.whl)"
+
+# NOTE: group of specifiers after 'torch' to avoid a false positive like 'torch-geometric'
+unzip -p "${WHEEL_FILE}" '*.dist-info/METADATA' \
+| grep -E '^Requires-Dist:.*torch[><=!~ ]+.*' \
+| tee matches.txt
+
+if wc -l < ./matches.txt; then
+    echo -n "Wheel '${WHEEL_FILE}' appears to depend on 'torch'. Remove that dependency. "
+    echo -n "We prefer to not declare a 'torch' dependency and allow it to be managed separately, "
+    echo "to ensure tight control over the variants installed (including for DLFW builds)."
+    exit 1
+else
+    echo "No dependency on 'torch' found"
+    exit 0
+fi
diff --git a/dependencies.yaml b/dependencies.yaml
index 568f4a4e..03ed13d3 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -166,7 +166,6 @@ files:
       table: project.optional-dependencies
       key: test
     includes:
-      - depends_on_pytorch
       - depends_on_cuml
       - depends_on_cugraph
       - test_python_common
@@ -451,9 +450,13 @@ dependencies:
             packages:
               - *torch_index_cu13
               - torch==2.10.0+cu130
-          - matrix:
-            packages:
-              - *pytorch_pip
+          #
+          # (empty)
+          #
+          # Intentionally no fallback entry, to ensure a loud error alerts us to the need
+          # to update this list and/or CI scripts when new CUDA versions, Python versions,
+          # etc. are added to the RAPIDS support matrix.
+          #
   depends_on_nccl:
     common:
       - output_types: [conda]
diff --git a/python/cugraph-pyg/pyproject.toml b/python/cugraph-pyg/pyproject.toml
index 39ef7d2d..10c06e77 100644
--- a/python/cugraph-pyg/pyproject.toml
+++ b/python/cugraph-pyg/pyproject.toml
@@ -55,7 +55,6 @@ test = [
     "pytest-cov",
     "pytest-xdist",
     "sentence-transformers",
-    "torch>=2.6",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 
 [tool.setuptools.dynamic]

From 6f73e44263f2f80712dd82851579fcef32a17842 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Mon, 9 Mar 2026 13:16:14 -0500
Subject: [PATCH 05/43] echo wheel name

---
 ci/validate_wheel.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/validate_wheel.sh b/ci/validate_wheel.sh
index 944e8f76..ef5d085c 100755
--- a/ci/validate_wheel.sh
+++ b/ci/validate_wheel.sh
@@ -49,7 +49,7 @@ twine check \
     "$(echo ${wheel_dir_relative_path}/*.whl)"
 
 rapids-logger "validating that the wheel doesn't depend on 'torch' (even in an extra)"
-WHEEL_FILE="$(${wheel_dir_relative_path}/*.whl)"
+WHEEL_FILE="$(echo ${wheel_dir_relative_path}/*.whl)"
 
 # NOTE: group of specifiers after 'torch' to avoid a false positive like 'torch-geometric'
 unzip -p "${WHEEL_FILE}" '*.dist-info/METADATA' \

From 271eb7ef1b08cf56a4eb6ad9c70ea8a70ac5e409 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Mon, 9 Mar 2026 13:29:27 -0500
Subject: [PATCH 06/43] more pin fiddling

---
 conda/recipes/cugraph-pyg/recipe.yaml | 2 +-
 dependencies.yaml                     | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/conda/recipes/cugraph-pyg/recipe.yaml b/conda/recipes/cugraph-pyg/recipe.yaml
index ea70f7b9..3243a6c8 100644
--- a/conda/recipes/cugraph-pyg/recipe.yaml
+++ b/conda/recipes/cugraph-pyg/recipe.yaml
@@ -40,7 +40,7 @@ requirements:
     # This is intentionally spelled 'pytorch' (not 'pytorch-gpu' and not using build string selectors)
     # because we want it to be possible to at least install `cugraph-pyg` in an environment without a GPU,
     # to support use cases like building container images.
-    - pytorch >=2.6
+    - pytorch >=2.3
     - pytorch_geometric >=2.5,<2.8
 
 tests:
diff --git a/dependencies.yaml b/dependencies.yaml
index 03ed13d3..bb414d39 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -372,7 +372,7 @@ dependencies:
           - matrix:
               require_gpu_pytorch: "true"
             packages:
-              - pytorch-gpu>=2.6
+              - pytorch-gpu>=2.3
           # Default to falling back to whatever 'pytorch' is pulled in via cugraph-pyg's dependencies.
           - matrix:
             packages:
@@ -386,7 +386,7 @@ dependencies:
               cuda: "12.*"
               dependencies: "oldest"
             packages:
-              - pytorch==2.6
+              - pytorch==2.4.1
           - matrix:
               cuda: "13.*"
               dependencies: "oldest"
@@ -403,7 +403,7 @@ dependencies:
             packages:
           - matrix:
             packages:
-              - &pytorch_pip torch>=2.6
+              - &pytorch_pip torch>=2.5
       # wheels: handle GPU vs. CPU and version pinning together
       #
       # The 'pytorch.org' indices referenced in --extra-index-url below host CPU-only variants too,

From 5a4064e7bf27a2548b32012375996f976d23e4e9 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Mon, 9 Mar 2026 15:44:49 -0500
Subject: [PATCH 07/43] fix validation script

---
 ci/validate_wheel.sh | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/ci/validate_wheel.sh b/ci/validate_wheel.sh
index ef5d085c..88ba85aa 100755
--- a/ci/validate_wheel.sh
+++ b/ci/validate_wheel.sh
@@ -9,10 +9,6 @@ wheel_dir_relative_path=$2
 
 RAPIDS_CUDA_MAJOR="${RAPIDS_CUDA_VERSION%%.*}"
 
-python -m pip install \
-    --prefer-binary \
-    'pkginfo>=1.12.1.2'
-
 cd "${package_dir}"
 
 rapids-logger "validate packages with 'pydistcheck'"
@@ -52,11 +48,12 @@ rapids-logger "validating that the wheel doesn't depend on 'torch' (even in an e
 WHEEL_FILE="$(echo ${wheel_dir_relative_path}/*.whl)"
 
 # NOTE: group of specifiers after 'torch' to avoid a false positive like 'torch-geometric'
+# Use '|| true' so grep not finding any matches (exit 1) does not kill the script under set -e
 unzip -p "${WHEEL_FILE}" '*.dist-info/METADATA' \
 | grep -E '^Requires-Dist:.*torch[><=!~ ]+.*' \
-| tee matches.txt
+| tee matches.txt || true
 
-if wc -l < ./matches.txt; then
+if [[ -s ./matches.txt ]]; then
     echo -n "Wheel '${WHEEL_FILE}' appears to depend on 'torch'. Remove that dependency. "
     echo -n "We prefer to not declare a 'torch' dependency and allow it to be managed separately, "
     echo "to ensure tight control over the variants installed (including for DLFW builds)."

From 0d7215ec76e8fedc46ad5aa0a7128c56aa19db55 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Tue, 10 Mar 2026 10:19:40 -0500
Subject: [PATCH 08/43] just wheels changes

---
 .../all_cuda-129_arch-aarch64.yaml            |   2 -
 .../all_cuda-129_arch-x86_64.yaml             |   2 -
 .../all_cuda-131_arch-aarch64.yaml            |   2 -
 .../all_cuda-131_arch-x86_64.yaml             |   2 -
 dependencies.yaml                             | 116 ++++++++----------
 python/cugraph-pyg/pyproject.toml             |   1 +
 python/pylibwholegraph/pyproject.toml         |   1 -
 7 files changed, 53 insertions(+), 73 deletions(-)

diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml
index 36a21955..efb98a8c 100644
--- a/conda/environments/all_cuda-129_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-129_arch-aarch64.yaml
@@ -27,7 +27,6 @@ dependencies:
 - nccl>=2.19
 - ninja
 - numpy>=1.23,<3.0
-- ogb
 - packaging
 - pandas
 - pre-commit
@@ -43,7 +42,6 @@ dependencies:
 - rmm==26.4.*,>=0.0.0a0
 - scikit-build-core>=0.11.0
 - scipy
-- sentence-transformers
 - setuptools>=77.0.0
 - torchdata
 - wheel
diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml
index 1085bba4..cc7de24e 100644
--- a/conda/environments/all_cuda-129_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-129_arch-x86_64.yaml
@@ -27,7 +27,6 @@ dependencies:
 - nccl>=2.19
 - ninja
 - numpy>=1.23,<3.0
-- ogb
 - packaging
 - pandas
 - pre-commit
@@ -43,7 +42,6 @@ dependencies:
 - rmm==26.4.*,>=0.0.0a0
 - scikit-build-core>=0.11.0
 - scipy
-- sentence-transformers
 - setuptools>=77.0.0
 - torchdata
 - wheel
diff --git a/conda/environments/all_cuda-131_arch-aarch64.yaml b/conda/environments/all_cuda-131_arch-aarch64.yaml
index 0ebe16a4..65502230 100644
--- a/conda/environments/all_cuda-131_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-131_arch-aarch64.yaml
@@ -27,7 +27,6 @@ dependencies:
 - nccl>=2.19
 - ninja
 - numpy>=1.23,<3.0
-- ogb
 - packaging
 - pandas
 - pre-commit
@@ -43,7 +42,6 @@ dependencies:
 - rmm==26.4.*,>=0.0.0a0
 - scikit-build-core>=0.11.0
 - scipy
-- sentence-transformers
 - setuptools>=77.0.0
 - torchdata
 - wheel
diff --git a/conda/environments/all_cuda-131_arch-x86_64.yaml b/conda/environments/all_cuda-131_arch-x86_64.yaml
index 58f1fe21..f8bf95a2 100644
--- a/conda/environments/all_cuda-131_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-131_arch-x86_64.yaml
@@ -27,7 +27,6 @@ dependencies:
 - nccl>=2.19
 - ninja
 - numpy>=1.23,<3.0
-- ogb
 - packaging
 - pandas
 - pre-commit
@@ -43,7 +42,6 @@ dependencies:
 - rmm==26.4.*,>=0.0.0a0
 - scikit-build-core>=0.11.0
 - scipy
-- sentence-transformers
 - setuptools>=77.0.0
 - torchdata
 - wheel
diff --git a/dependencies.yaml b/dependencies.yaml
index bb414d39..407be53c 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -31,7 +31,6 @@ files:
       - rapids_build_skbuild
       - test_cpp
       - test_python_common
-      - test_python_cugraph_pyg
       - test_python_pylibwholegraph
   checks:
     output: none
@@ -61,9 +60,10 @@ files:
       - depends_on_cudf
       - depends_on_pytorch
       - depends_on_cuml
+      - depends_on_ogb
+      - depends_on_sentence_transformers
       - py_version
       - test_python_common
-      - test_python_cugraph_pyg
       - depends_on_pylibwholegraph
       - depends_on_cugraph_pyg
   test_pylibwholegraph:
@@ -76,10 +76,6 @@ files:
       - test_python_common
       - depends_on_pylibwholegraph
       - test_python_pylibwholegraph
-  torch_only:
-    output: none
-    includes:
-      - depends_on_pytorch
   py_build_libwholegraph:
     output: pyproject
     pyproject_dir: python/libwholegraph
@@ -166,10 +162,12 @@ files:
       table: project.optional-dependencies
       key: test
     includes:
+      - depends_on_pytorch
       - depends_on_cuml
       - depends_on_cugraph
+      - depends_on_ogb
+      - depends_on_sentence_transformers
       - test_python_common
-      - test_python_cugraph_pyg
   cugraph_pyg_dev:
     matrix:
       cuda: ["12.9", "13.1"]
@@ -300,7 +298,7 @@ dependencies:
       - output_types: [conda, pyproject, requirements]
         packages:
           - *numpy
-          - &packaging packaging
+          - packaging
           - pandas
   rapids_build_skbuild:
     common:
@@ -333,68 +331,19 @@ dependencies:
           - pytest-benchmark
           - pytest-cov
           - pytest-xdist
-    specific:
-      - output_types: [conda]
-        matrices:
-          - matrix:
-              no_pytorch: "true"
-            packages:
-          - matrix:
-            packages:
-              - torchdata
-              - pydantic
-  test_python_cugraph_pyg:
-    specific:
-      - output_types: [conda, requirements, pyproject]
-        matrices:
-          - matrix:
-              no_pytorch: "true"
-            packages:
-          - matrix:
-            packages:
-              - ogb
-              - sentence-transformers
   test_python_pylibwholegraph:
     common:
       - output_types: [conda, pyproject, requirements]
         packages:
-          - *packaging
           - pytest-forked
           - scipy
   depends_on_pytorch:
+    common:
+      - output_types: [conda]
+        packages:
+          - torchdata
+          - pydantic
     specific:
-      # conda: choose between GPU and CPU-only pytorch
-      - output_types: conda
-        matrices:
-          - matrix:
-              no_pytorch: "true"
-            packages:
-          - matrix:
-              require_gpu_pytorch: "true"
-            packages:
-              - pytorch-gpu>=2.3
-          # Default to falling back to whatever 'pytorch' is pulled in via cugraph-pyg's dependencies.
-          - matrix:
-            packages:
-      # conda: optionally pins to older versions based on 'dependencies' key
-      - output_types: conda
-        matrices:
-          - matrix:
-              no_pytorch: "true"
-            packages:
-          - matrix:
-              cuda: "12.*"
-              dependencies: "oldest"
-            packages:
-              - pytorch==2.4.1
-          - matrix:
-              cuda: "13.*"
-              dependencies: "oldest"
-            packages:
-              - pytorch==2.9
-          # Default to falling back to whatever 'pytorch' is pulled in via cugraph-pyg's dependencies.
-          - matrix:
-            packages:
       - output_types: pyproject
         matrices:
           # avoid pulling in 'torch' in places like DLFW builds that prefer to install it other ways
@@ -457,13 +406,23 @@ dependencies:
           # to update this list and/or CI scripts when new CUDA versions, Python versions,
           # etc. are added to the RAPIDS support matrix.
           #
+      - output_types: [conda]
+        matrices:
+          # Prevent fallback to CPU-only pytorch when we want a CUDA variant.
+          - matrix:
+              require_gpu: "true"
+            packages:
+              - pytorch-gpu
+          # Default to falling back to whatever 'pytorch' is pulled in via cugraph-pyg's dependencies.
+          - matrix:
+            packages:
   depends_on_nccl:
     common:
-      - output_types: [conda]
+      - output_types: conda
         packages:
           - nccl>=2.19
     specific:
-      - output_types: [requirements, pyproject]
+      - output_types: [pyproject, requirements]
         matrices:
           - matrix:
               cuda: "12.*"
@@ -472,6 +431,35 @@ dependencies:
               - nvidia-nccl-cu12>=2.19
           - matrix:
             packages:
+  depends_on_ogb:
+    common:
+      - output_types: [conda]
+        packages:
+          - ogb
+    specific:
+      - output_types: [requirements, pyproject]
+        matrices:
+          - matrix:
+              no_pytorch: "true"
+            packages:
+          - matrix:
+            packages:
+              - ogb
+  # for MovieLens example
+  depends_on_sentence_transformers:
+    common:
+      - output_types: [conda]
+        packages:
+          - sentence-transformers
+    specific:
+      - output_types: [requirements, pyproject]
+        matrices:
+          - matrix:
+              no_pytorch: "true"
+            packages:
+          - matrix:
+            packages:
+              - sentence-transformers
   depends_on_pyg:
     common:
       - output_types: [conda]
diff --git a/python/cugraph-pyg/pyproject.toml b/python/cugraph-pyg/pyproject.toml
index 10c06e77..52c8db00 100644
--- a/python/cugraph-pyg/pyproject.toml
+++ b/python/cugraph-pyg/pyproject.toml
@@ -55,6 +55,7 @@ test = [
     "pytest-cov",
     "pytest-xdist",
     "sentence-transformers",
+    "torch>=2.5",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 
 [tool.setuptools.dynamic]
diff --git a/python/pylibwholegraph/pyproject.toml b/python/pylibwholegraph/pyproject.toml
index 4682c260..dd4dda49 100644
--- a/python/pylibwholegraph/pyproject.toml
+++ b/python/pylibwholegraph/pyproject.toml
@@ -33,7 +33,6 @@ dependencies = [
 
 [project.optional-dependencies]
 test = [
-    "packaging",
     "pytest",
     "pytest-benchmark",
     "pytest-cov",

From fc30204c43ec8c299d5c806d3e77f4a72c0a3a68 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Tue, 10 Mar 2026 10:21:28 -0500
Subject: [PATCH 09/43] even fewer changes

---
 ci/test_python.sh                 | 4 ++--
 dependencies.yaml                 | 2 +-
 python/cugraph-pyg/pyproject.toml | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/ci/test_python.sh b/ci/test_python.sh
index 6b661236..09aeb27e 100755
--- a/ci/test_python.sh
+++ b/ci/test_python.sh
@@ -37,7 +37,7 @@ if [[ "${RUNNER_ARCH}" != "ARM64" ]]; then
   rapids-dependency-file-generator \
     --output conda \
     --file-key test_cugraph_pyg \
-    --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};require_gpu_pytorch=true" \
+    --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};require_gpu=true" \
     --prepend-channel "${CPP_CHANNEL}" \
     --prepend-channel "${PYTHON_CHANNEL}" \
     --prepend-channel "${PYTHON_NOARCH_CHANNEL}" \
@@ -76,7 +76,7 @@ if [[ "${RUNNER_ARCH}" != "ARM64" ]]; then
   rapids-dependency-file-generator \
     --output conda \
     --file-key test_pylibwholegraph \
-    --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};require_gpu_pytorch=true" \
+    --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};require_gpu=true" \
     --prepend-channel "${CPP_CHANNEL}" \
     --prepend-channel "${PYTHON_CHANNEL}" \
   | tee env.yaml
diff --git a/dependencies.yaml b/dependencies.yaml
index 407be53c..2516bd0c 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -352,7 +352,7 @@ dependencies:
             packages:
           - matrix:
             packages:
-              - &pytorch_pip torch>=2.5
+              - &pytorch_pip torch>=2.9.0
       # wheels: handle GPU vs. CPU and version pinning together
       #
       # The 'pytorch.org' indices referenced in --extra-index-url below host CPU-only variants too,
diff --git a/python/cugraph-pyg/pyproject.toml b/python/cugraph-pyg/pyproject.toml
index 52c8db00..1013f7af 100644
--- a/python/cugraph-pyg/pyproject.toml
+++ b/python/cugraph-pyg/pyproject.toml
@@ -55,7 +55,7 @@ test = [
     "pytest-cov",
     "pytest-xdist",
     "sentence-transformers",
-    "torch>=2.5",
+    "torch>=2.9.0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 
 [tool.setuptools.dynamic]

From 97e2c0284c816eea3004895ce2b2261f97fb232c Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Tue, 10 Mar 2026 10:34:30 -0500
Subject: [PATCH 10/43] revert gitignore changes

---
 .gitignore | 2 --
 1 file changed, 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index 8d4f88e2..1ccc2780 100644
--- a/.gitignore
+++ b/.gitignore
@@ -40,8 +40,6 @@ wheels/
 wheelhouse/
 _skbuild/
 cufile.log
-*.tar.gz
-*.whl
 
 ## Patching
 *.diff

From 355d5aa4e4eb8c31edf58785372f7cce1fda0991 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Tue, 10 Mar 2026 10:36:39 -0500
Subject: [PATCH 11/43] add 'torch_only'

---
 dependencies.yaml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/dependencies.yaml b/dependencies.yaml
index 2516bd0c..52bd1260 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -180,6 +180,10 @@ files:
       - depends_on_pyg
       - depends_on_pytorch
       - test_python_common
+  torch_only:
+    output: none
+    includes:
+      - depends_on_pytorch
 channels:
   - rapidsai-nightly
   - rapidsai

From 4aad5b4f4cc24903301d21f30b2633342083e64f Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Tue, 10 Mar 2026 10:42:00 -0500
Subject: [PATCH 12/43] testing

---
 ci/test_wheel_cugraph-pyg.sh | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/ci/test_wheel_cugraph-pyg.sh b/ci/test_wheel_cugraph-pyg.sh
index 60c03dc0..11d7e99e 100755
--- a/ci/test_wheel_cugraph-pyg.sh
+++ b/ci/test_wheel_cugraph-pyg.sh
@@ -11,9 +11,19 @@ package_name="cugraph-pyg"
 RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
 
 # Download the libwholegraph, pylibwholegraph, and cugraph-pyg built in the previous step
-LIBWHOLEGRAPH_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github cpp)
-PYLIBWHOLEGRAPH_WHEELHOUSE=$(rapids-download-from-github "$(rapids-package-name "wheel_python" pylibwholegraph --stable --cuda "$RAPIDS_CUDA_VERSION")")
-CUGRAPH_PYG_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-github python)
+# LIBWHOLEGRAPH_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github cpp)
+# PYLIBWHOLEGRAPH_WHEELHOUSE=$(rapids-download-from-github "$(rapids-package-name "wheel_python" pylibwholegraph --stable --cuda "$RAPIDS_CUDA_VERSION")")
+# CUGRAPH_PYG_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-github python)
+# rapids-get-pr-artifact cugraph-gnn 425 python wheel --pkg_name cugraph_pyg --stable
+CUGRAPH_PYGH_WHEELHOUSE=$(
+  RAPIDS_PY_WHEEL_NAME="${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-get-pr-artifact --pkg_name cugraph-pyg cugraph-gnn 425 python wheel
+)
+LIBWHOLEGRAPH_WHEELHOUSE=$(
+  RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-get-pr-artifact cugraph-gnn 425 cpp wheel
+)
+PYLIBWHOLEGRAPH_WHEELHOUSE=$(
+  rapids-get-pr-artifact cugraph-gnn 425 python wheel --pkg_name pylibwholegraph --stable
+)
 
 # generate constraints (possibly pinning to oldest support versions of dependencies)
 rapids-generate-pip-constraints test_cugraph_pyg "${PIP_CONSTRAINT}"
@@ -48,6 +58,10 @@ fi
 rapids-pip-retry install \
   "${PIP_INSTALL_ARGS[@]}"
 
+python -c "import cugraph_pyg"
+echo "--- DONE ---"
+exit 0
+
 # RAPIDS_DATASET_ROOT_DIR is used by test scripts
 export RAPIDS_DATASET_ROOT_DIR="$(realpath datasets)"
 mkdir -p "${RAPIDS_DATASET_ROOT_DIR}"

From 426c5ff2757e5e4c550df172963d1ee950e48983 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Tue, 10 Mar 2026 11:25:20 -0500
Subject: [PATCH 13/43] more updates

---
 ci/download-torch-wheels.sh  |  2 +-
 ci/test_wheel_cugraph-pyg.sh | 26 +++++++++++++-------------
 dependencies.yaml            |  9 ++-------
 3 files changed, 16 insertions(+), 21 deletions(-)

diff --git a/ci/download-torch-wheels.sh b/ci/download-torch-wheels.sh
index d2ecde66..21e84051 100755
--- a/ci/download-torch-wheels.sh
+++ b/ci/download-torch-wheels.sh
@@ -22,7 +22,7 @@ CUDA_MAJOR=$(echo "${RAPIDS_CUDA_VERSION}" | cut -d'.' -f1)
 CUDA_MINOR=$(echo "${RAPIDS_CUDA_VERSION}" | cut -d'.' -f2)
 if \
     { [ "${CUDA_MAJOR}" -eq 12 ] && [ "${CUDA_MINOR}" -lt 9 ]; } \
-    || { [ "${CUDA_MAJOR}" -eq 13 ] && [ "${CUDA_MINOR}" -gt 0 ]; }; \
+    || { [ "${CUDA_MAJOR}" -eq 13 ] && [ "${CUDA_MINOR}" -gt 0 ]; } \
     || [ "${CUDA_MAJOR}" -gt 13 ];
 then
     rapids-logger "Skipping 'torch' wheel download. (requires CUDA 12.9+ or 13.0, found ${RAPIDS_CUDA_VERSION})"
diff --git a/ci/test_wheel_cugraph-pyg.sh b/ci/test_wheel_cugraph-pyg.sh
index 11d7e99e..6ef94920 100755
--- a/ci/test_wheel_cugraph-pyg.sh
+++ b/ci/test_wheel_cugraph-pyg.sh
@@ -11,19 +11,19 @@ package_name="cugraph-pyg"
 RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
 
 # Download the libwholegraph, pylibwholegraph, and cugraph-pyg built in the previous step
-# LIBWHOLEGRAPH_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github cpp)
-# PYLIBWHOLEGRAPH_WHEELHOUSE=$(rapids-download-from-github "$(rapids-package-name "wheel_python" pylibwholegraph --stable --cuda "$RAPIDS_CUDA_VERSION")")
-# CUGRAPH_PYG_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-github python)
-# rapids-get-pr-artifact cugraph-gnn 425 python wheel --pkg_name cugraph_pyg --stable
-CUGRAPH_PYGH_WHEELHOUSE=$(
-  RAPIDS_PY_WHEEL_NAME="${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-get-pr-artifact --pkg_name cugraph-pyg cugraph-gnn 425 python wheel
-)
-LIBWHOLEGRAPH_WHEELHOUSE=$(
-  RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-get-pr-artifact cugraph-gnn 425 cpp wheel
-)
-PYLIBWHOLEGRAPH_WHEELHOUSE=$(
-  rapids-get-pr-artifact cugraph-gnn 425 python wheel --pkg_name pylibwholegraph --stable
-)
+LIBWHOLEGRAPH_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github cpp)
+PYLIBWHOLEGRAPH_WHEELHOUSE=$(rapids-download-from-github "$(rapids-package-name "wheel_python" pylibwholegraph --stable --cuda "$RAPIDS_CUDA_VERSION")")
+CUGRAPH_PYG_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-github python)
+# CUGRAPH_GNN_COMMIT=5a4064e7bf27a2548b32012375996f976d23e4e9
+# CUGRAPH_PYG_WHEELHOUSE=$(
+#   RAPIDS_PY_WHEEL_NAME="cugraph-pyg_cu12" RAPIDS_PY_WHEEL_PURE="1" rapids-get-pr-artifact cugraph-gnn 425 python wheel "${CUGRAPH_GNN_COMMIT}"
+# )
+# LIBWHOLEGRAPH_WHEELHOUSE=$(
+#   RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-get-pr-artifact cugraph-gnn 425 cpp wheel "${CUGRAPH_GNN_COMMIT}"
+# )
+# PYLIBWHOLEGRAPH_WHEELHOUSE=$(
+#   rapids-get-pr-artifact cugraph-gnn 425 python wheel --pkg_name pylibwholegraph --stable "${CUGRAPH_GNN_COMMIT}"
+# )
 
 # generate constraints (possibly pinning to oldest support versions of dependencies)
 rapids-generate-pip-constraints test_cugraph_pyg "${PIP_CONSTRAINT}"
diff --git a/dependencies.yaml b/dependencies.yaml
index 52bd1260..bdcd0d6c 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -403,13 +403,8 @@ dependencies:
             packages:
               - *torch_index_cu13
               - torch==2.10.0+cu130
-          #
-          # (empty)
-          #
-          # Intentionally no fallback entry, to ensure a loud error alerts us to the need
-          # to update this list and/or CI scripts when new CUDA versions, Python versions,
-          # etc. are added to the RAPIDS support matrix.
-          #
+          - matrix:
+            packages:
       - output_types: [conda]
         matrices:
           # Prevent fallback to CPU-only pytorch when we want a CUDA variant.

From 7ac88d3d54108aa28171eb9f755fa8e0301a539f Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Tue, 10 Mar 2026 13:57:51 -0500
Subject: [PATCH 14/43] make 'torch' optional everywhere

---
 .pre-commit-config.yaml                       |  3 +-
 ci/run_cugraph_pyg_pytests.sh                 |  8 +---
 ci/test_wheel_cugraph-pyg.sh                  | 46 ++++++++-----------
 ci/test_wheel_pylibwholegraph.sh              |  7 +--
 dependencies.yaml                             |  1 -
 pyproject.toml                                | 18 +++++++-
 .../cugraph_pyg/tensor/dist_matrix.py         |  2 +-
 .../cugraph-pyg/cugraph_pyg/tensor/utils.py   |  4 +-
 python/cugraph-pyg/pyproject.toml             |  1 -
 .../pylibwholegraph/tests/conftest.py         | 10 ++++
 .../test_wholememory_binding.py               |  4 +-
 .../pylibwholegraph/test_wholememory_io.py    |  6 +--
 .../ops/test_graph_add_csr_self_loop.py       |  7 +--
 .../ops/test_graph_append_unique.py           |  6 ++-
 .../ops/test_wholegraph_gather_scatter.py     |  7 +--
 ...h_unweighted_sample_without_replacement.py |  9 +++-
 ...aph_weighted_sample_without_replacement.py |  7 ++-
 .../ops/test_wholememory_cython_binding.py    |  9 ++--
 .../pylibwholegraph/torch/comm.py             | 14 +++---
 .../pylibwholegraph/torch/data_loader.py      |  9 ++--
 .../pylibwholegraph/torch/dlpack_utils.py     |  7 +--
 .../pylibwholegraph/torch/embedding.py        | 23 ++++++----
 .../pylibwholegraph/torch/gnn_model.py        | 13 ++++--
 .../pylibwholegraph/torch/graph_ops.py        | 14 +++---
 .../pylibwholegraph/torch/graph_structure.py  | 12 +++--
 .../pylibwholegraph/torch/initialize.py       |  7 +--
 .../pylibwholegraph/torch/tensor.py           | 13 +++---
 .../pylibwholegraph/torch/utils.py            |  7 +--
 .../pylibwholegraph/torch/wholegraph_env.py   |  8 ++--
 .../pylibwholegraph/torch/wholegraph_ops.py   | 10 ++--
 .../pylibwholegraph/torch/wholememory_ops.py  | 12 +++--
 .../pylibwholegraph/utils/imports.py          | 46 +++++++++++++++++++
 32 files changed, 216 insertions(+), 134 deletions(-)
 create mode 100644 python/pylibwholegraph/pylibwholegraph/tests/conftest.py
 create mode 100644 python/pylibwholegraph/pylibwholegraph/utils/imports.py

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index d995d56c..e16dc623 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -19,8 +19,9 @@ repos:
     rev: v0.14.3
     hooks:
       - id: ruff-check
-        args: [--fix]
+        args: [--config, "pyproject.toml"]
       - id: ruff-format
+        args: [--config, "pyproject.toml"]
   - repo: https://github.com/asottile/yesqa
     rev: v1.3.0
     hooks:
diff --git a/ci/run_cugraph_pyg_pytests.sh b/ci/run_cugraph_pyg_pytests.sh
index 4431a013..2635d755 100755
--- a/ci/run_cugraph_pyg_pytests.sh
+++ b/ci/run_cugraph_pyg_pytests.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 set -euo pipefail
@@ -9,12 +9,6 @@ cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/cugraph-pyg/cugraph_
 
 pytest --cache-clear --benchmark-disable "$@" .
 
-# Used to skip certain examples in CI due to memory limitations
-export CI=true
-
-# Enable legacy behavior of torch.load for examples relying on ogb
-export TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD=1
-
 # Test examples (disabled due to lack of memory)
 #for e in "$(pwd)"/examples/*.py; do
 #  echo "running example $e"
diff --git a/ci/test_wheel_cugraph-pyg.sh b/ci/test_wheel_cugraph-pyg.sh
index 6ef94920..64ad708d 100755
--- a/ci/test_wheel_cugraph-pyg.sh
+++ b/ci/test_wheel_cugraph-pyg.sh
@@ -11,19 +11,19 @@ package_name="cugraph-pyg"
 RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
 
 # Download the libwholegraph, pylibwholegraph, and cugraph-pyg built in the previous step
-LIBWHOLEGRAPH_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github cpp)
-PYLIBWHOLEGRAPH_WHEELHOUSE=$(rapids-download-from-github "$(rapids-package-name "wheel_python" pylibwholegraph --stable --cuda "$RAPIDS_CUDA_VERSION")")
-CUGRAPH_PYG_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-github python)
-# CUGRAPH_GNN_COMMIT=5a4064e7bf27a2548b32012375996f976d23e4e9
-# CUGRAPH_PYG_WHEELHOUSE=$(
-#   RAPIDS_PY_WHEEL_NAME="cugraph-pyg_cu12" RAPIDS_PY_WHEEL_PURE="1" rapids-get-pr-artifact cugraph-gnn 425 python wheel "${CUGRAPH_GNN_COMMIT}"
-# )
-# LIBWHOLEGRAPH_WHEELHOUSE=$(
-#   RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-get-pr-artifact cugraph-gnn 425 cpp wheel "${CUGRAPH_GNN_COMMIT}"
-# )
-# PYLIBWHOLEGRAPH_WHEELHOUSE=$(
-#   rapids-get-pr-artifact cugraph-gnn 425 python wheel --pkg_name pylibwholegraph --stable "${CUGRAPH_GNN_COMMIT}"
-# )
+# LIBWHOLEGRAPH_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github cpp)
+# PYLIBWHOLEGRAPH_WHEELHOUSE=$(rapids-download-from-github "$(rapids-package-name "wheel_python" pylibwholegraph --stable --cuda "$RAPIDS_CUDA_VERSION")")
+# CUGRAPH_PYG_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-github python)
+CUGRAPH_GNN_COMMIT=5a4064e7bf27a2548b32012375996f976d23e4e9
+CUGRAPH_PYG_WHEELHOUSE=$(
+  RAPIDS_PY_WHEEL_NAME="cugraph-pyg_cu12" RAPIDS_PY_WHEEL_PURE="1" rapids-get-pr-artifact cugraph-gnn 425 python wheel "${CUGRAPH_GNN_COMMIT}"
+)
+LIBWHOLEGRAPH_WHEELHOUSE=$(
+  RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-get-pr-artifact cugraph-gnn 425 cpp wheel "${CUGRAPH_GNN_COMMIT}"
+)
+PYLIBWHOLEGRAPH_WHEELHOUSE=$(
+  rapids-get-pr-artifact cugraph-gnn 425 python wheel --pkg_name pylibwholegraph --stable "${CUGRAPH_GNN_COMMIT}"
+)
 
 # generate constraints (possibly pinning to oldest support versions of dependencies)
 rapids-generate-pip-constraints test_cugraph_pyg "${PIP_CONSTRAINT}"
@@ -73,20 +73,10 @@ popd
 export TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD=1
 
 rapids-logger "pytest cugraph-pyg (single GPU)"
-pushd python/cugraph-pyg/cugraph_pyg
-python -m pytest \
-  --cache-clear \
-  --benchmark-disable \
-  tests
-
-# Test examples (disabled due to lack of memory)
-#for e in "$(pwd)"/examples/*.py; do
-#  rapids-logger "running example $e"
-#  (yes || true) | python -m torch.distributed.run --nnodes 1 --nproc_per_node 1 $e --dataset_root "${RAPIDS_DATASET_ROOT_DIR}/ogb_datasets"
-#done
-
-# rapids-logger "running bitcoin example"
-# (yes || true) | python -m torch.distributed.run --nnodes 1 --nproc_per_node 1 "$(pwd)"/examples/fraud/bitcoin_mnmg.py --dataset_root "${RAPIDS_DATASET_ROOT_DIR}" --embedding_dir "${RAPIDS_DATASET_ROOT_DIR}/bitcoin_embeddings"
-# python "$(pwd)"/examples/fraud/bitcoin_rf.py --dataset_root "${RAPIDS_DATASET_ROOT_DIR}" --embedding_dir "${RAPIDS_DATASET_ROOT_DIR}/bitcoin_embeddings"
+./ci/run_cugraph_pyg_pytests.sh
+
+rapids-logger "testing that cugraph-pyg is importable without 'torch'"
+pip uninstall --yes 'torch'
+python -c "import cugraph_pyg; print(cugraph_pyg.__version__)"
 
 popd
diff --git a/ci/test_wheel_pylibwholegraph.sh b/ci/test_wheel_pylibwholegraph.sh
index e7b06090..e97cda35 100755
--- a/ci/test_wheel_pylibwholegraph.sh
+++ b/ci/test_wheel_pylibwholegraph.sh
@@ -48,9 +48,4 @@ rapids-pip-retry install \
     "${PIP_INSTALL_ARGS[@]}"
 
 rapids-logger "pytest pylibwholegraph"
-cd python/pylibwholegraph/pylibwholegraph/tests
-python -m pytest \
-  --cache-clear \
-  --forked \
-  --import-mode=append \
-  .
+ci/run_pylibwholegraph_pytests.sh
diff --git a/dependencies.yaml b/dependencies.yaml
index bdcd0d6c..1ca11bc0 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -162,7 +162,6 @@ files:
       table: project.optional-dependencies
       key: test
     includes:
-      - depends_on_pytorch
       - depends_on_cuml
       - depends_on_cugraph
       - depends_on_ogb
diff --git a/pyproject.toml b/pyproject.toml
index fbe24671..b038729a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
 [tool.ruff]
@@ -11,4 +11,20 @@ exclude = [
 ignore = [
     # whitespace before :
     "E203",
+    # (flake8-tidy-imports) banned-api
+    "TID251"
+]
+
+[tool.ruff.lint.flake8-tidy-imports.banned-api]
+"torch".msg = "Use the 'torch' fixture instead of 'import torch' in tests (see conftest.py)."
+
+[tool.ruff.lint.per-file-ignores]
+# allow importing 'torch' directly in cugraph-pyg examples
+"python/cugraph-pyg/cugraph_pyg/examples/*" = [
+    "TID251"
+]
+
+# allow importing 'torch' directly in pylibwholegraph examples
+"python/pylibwholegraph/examples/" = [
+    "TID251"
 ]
diff --git a/python/cugraph-pyg/cugraph_pyg/tensor/dist_matrix.py b/python/cugraph-pyg/cugraph_pyg/tensor/dist_matrix.py
index 2c811245..ec331f3d 100644
--- a/python/cugraph-pyg/cugraph_pyg/tensor/dist_matrix.py
+++ b/python/cugraph-pyg/cugraph_pyg/tensor/dist_matrix.py
@@ -106,7 +106,7 @@ def __setitem__(
             self._col[idx] = val[0]
             self._row[idx] = val[1]
 
-    def __getitem__(self, idx: torch.Tensor) -> torch.Tensor:
+    def __getitem__(self, idx: "torch.Tensor") -> torch.Tensor:
         if self._format != "coo":
             raise ValueError("Getting is currently only supported for COO format")
         if idx.dim() != 1:
diff --git a/python/cugraph-pyg/cugraph_pyg/tensor/utils.py b/python/cugraph-pyg/cugraph_pyg/tensor/utils.py
index d8780000..fb994bc9 100644
--- a/python/cugraph-pyg/cugraph_pyg/tensor/utils.py
+++ b/python/cugraph-pyg/cugraph_pyg/tensor/utils.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from typing import Union, List
@@ -96,7 +96,7 @@ def create_wg_dist_tensor(
 def create_wg_dist_tensor_from_files(
     file_list: List[str],
     shape: list,
-    dtype: torch.dtype,
+    dtype: "torch.dtype",
     location: str = "cpu",
     partition_book: Union[List[int], None] = None,
     backend: str = "nccl",
diff --git a/python/cugraph-pyg/pyproject.toml b/python/cugraph-pyg/pyproject.toml
index 1013f7af..10c06e77 100644
--- a/python/cugraph-pyg/pyproject.toml
+++ b/python/cugraph-pyg/pyproject.toml
@@ -55,7 +55,6 @@ test = [
     "pytest-cov",
     "pytest-xdist",
     "sentence-transformers",
-    "torch>=2.9.0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 
 [tool.setuptools.dynamic]
diff --git a/python/pylibwholegraph/pylibwholegraph/tests/conftest.py b/python/pylibwholegraph/pylibwholegraph/tests/conftest.py
new file mode 100644
index 00000000..1d80ddf8
--- /dev/null
+++ b/python/pylibwholegraph/pylibwholegraph/tests/conftest.py
@@ -0,0 +1,10 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+import pytest
+
+
+@pytest.fixture
+def torch():
+    """Pass this to any test case that needs 'torch' to be installed"""
+    return pytest.importorskip("torch")
diff --git a/python/pylibwholegraph/pylibwholegraph/tests/pylibwholegraph/test_wholememory_binding.py b/python/pylibwholegraph/pylibwholegraph/tests/pylibwholegraph/test_wholememory_binding.py
index 7e11b731..c80afd9c 100644
--- a/python/pylibwholegraph/pylibwholegraph/tests/pylibwholegraph/test_wholememory_binding.py
+++ b/python/pylibwholegraph/pylibwholegraph/tests/pylibwholegraph/test_wholememory_binding.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2019-2024, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 import pytest
@@ -6,7 +6,6 @@
 from pylibwholegraph.utils.multiprocess import multiprocess_run
 from pylibwholegraph.torch.initialize import init_torch_env_and_create_wm_comm
 from pylibwholegraph.torch.dlpack_utils import torch_import_from_dlpack
-import torch
 
 
 # Run with:
@@ -14,6 +13,7 @@
 
 
 def single_test_case(wm_comm, mt, ml, malloc_size, granularity):
+    torch = pytest.importorskip("torch")
     world_rank = wm_comm.get_rank()
     print("Rank=%d testing mt=%s, ml=%s" % (world_rank, mt, ml))
     h = wmb.malloc(malloc_size, wm_comm, mt, ml, granularity)
diff --git a/python/pylibwholegraph/pylibwholegraph/tests/pylibwholegraph/test_wholememory_io.py b/python/pylibwholegraph/pylibwholegraph/tests/pylibwholegraph/test_wholememory_io.py
index c9419c75..29380240 100644
--- a/python/pylibwholegraph/pylibwholegraph/tests/pylibwholegraph/test_wholememory_io.py
+++ b/python/pylibwholegraph/pylibwholegraph/tests/pylibwholegraph/test_wholememory_io.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2019-2024, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 import pytest
@@ -7,13 +7,11 @@
 from pylibwholegraph.torch.initialize import init_torch_env_and_create_wm_comm
 from pylibwholegraph.torch.dlpack_utils import torch_import_from_dlpack
 from pylibwholegraph.test_utils.test_comm import random_partition
-import torch
 import numpy as np
 import os
 import random
 from functools import partial
 
-
 gpu_count = None
 
 
@@ -49,6 +47,7 @@ def load_routine_func(
     round_robin_size=0,
     entry_partition=None,
 ):
+    torch = pytest.importorskip("torch")
     wm_comm, _ = init_torch_env_and_create_wm_comm(
         world_rank, world_size, world_rank, world_size
     )
@@ -181,6 +180,7 @@ def test_wholememory_load(
     storage_offset,
     round_robin_size,
     partition_method,
+    torch,
 ):
     if embedding_stride < storage_offset + embedding_dim:
         pytest.skip(
diff --git a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_add_csr_self_loop.py b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_add_csr_self_loop.py
index 821cf457..85cd0417 100644
--- a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_add_csr_self_loop.py
+++ b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_add_csr_self_loop.py
@@ -1,13 +1,13 @@
-# SPDX-FileCopyrightText: Copyright (c) 2019-2024, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 import pytest
-import torch
 from pylibwholegraph.test_utils.test_comm import gen_csr_graph
 import pylibwholegraph.torch.graph_ops as wg_ops
 
 
 def host_add_csr_self_loop(csr_row_ptr_tensor, csr_col_ptr_tensor):
+    torch = pytest.importorskip("torch")
     row_num = csr_row_ptr_tensor.shape[0] - 1
     edge_num = csr_col_ptr_tensor.shape[0]
     output_csr_row_ptr_tensor = torch.empty(
@@ -28,6 +28,7 @@ def host_add_csr_self_loop(csr_row_ptr_tensor, csr_col_ptr_tensor):
 
 
 def routine_func(**kwargs):
+    torch = pytest.importorskip("torch")
     target_node_count = kwargs["target_node_count"]
     neighbor_node_count = kwargs["neighbor_node_count"]
     edge_num = kwargs["edge_num"]
@@ -58,7 +59,7 @@ def routine_func(**kwargs):
 @pytest.mark.parametrize("target_node_count", [101, 113])
 @pytest.mark.parametrize("neighbor_node_count", [157, 1987])
 @pytest.mark.parametrize("edge_num", [1001, 2305])
-def test_add_csr_self_loop(target_node_count, neighbor_node_count, edge_num):
+def test_add_csr_self_loop(target_node_count, neighbor_node_count, edge_num, torch):
     gpu_count = torch.cuda.device_count()
     assert gpu_count > 0
     routine_func(
diff --git a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_append_unique.py b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_append_unique.py
index e325ef51..5856d943 100644
--- a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_append_unique.py
+++ b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_append_unique.py
@@ -1,12 +1,12 @@
-# SPDX-FileCopyrightText: Copyright (c) 2019-2024, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 import pytest
-import torch
 import pylibwholegraph.torch.graph_ops as wg_ops
 
 
 def host_neighbor_raw_to_unique(unique_node_tensor, neighbor_node_tensor):
+    torch = pytest.importorskip("torch")
     output_neighbor_raw_to_unique = torch.empty(
         (neighbor_node_tensor.size(0)), dtype=torch.int32
     )
@@ -19,6 +19,7 @@ def host_neighbor_raw_to_unique(unique_node_tensor, neighbor_node_tensor):
 
 
 def routine_func(**kwargs):
+    torch = pytest.importorskip("torch")
     target_node_count = kwargs["target_node_count"]
     neighbor_node_count = kwargs["neighbor_node_count"]
     target_node_dtype = kwargs["target_node_dtype"]
@@ -80,6 +81,7 @@ def test_append_unique(
     neighbor_node_count,
     target_node_dtype,
     need_neighbor_raw_to_unique,
+    torch,
 ):
     gpu_count = torch.cuda.device_count()
     assert gpu_count > 0
diff --git a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_gather_scatter.py b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_gather_scatter.py
index a3b8849b..cd1af84f 100644
--- a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_gather_scatter.py
+++ b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_gather_scatter.py
@@ -1,20 +1,20 @@
-# SPDX-FileCopyrightText: Copyright (c) 2019-2024, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
+import pytest
 import pylibwholegraph.binding.wholememory_binding as wmb
 from pylibwholegraph.utils.multiprocess import multiprocess_run
 from pylibwholegraph.torch.initialize import init_torch_env_and_create_wm_comm
 from pylibwholegraph.torch.dlpack_utils import torch_import_from_dlpack
 from pylibwholegraph.test_utils.test_comm import random_partition
-import torch
 import pylibwholegraph.torch.wholememory_ops as wm_ops
 
-
 # PYTHONPATH=../:$PYTHONPATH python3 -m pytest \
 # ../tests/wholegraph_torch/ops/test_wholegraph_gather_scatter.py -s
 
 
 def gen_int_embedding(indice_tensor, embedding_dim, output_type):
+    torch = pytest.importorskip("torch")
     if embedding_dim == 0:
         embedding_dim = 1  # unsqueeze 2D for input (2D is required for scatter op)
     indice_count = indice_tensor.shape[0]
@@ -41,6 +41,7 @@ def scatter_gather_test_cast(
     use_python_binding=True,
     entry_partition=None,
 ):
+    torch = pytest.importorskip("torch")
     world_rank = wm_comm.get_rank()
     world_size = wm_comm.get_size()
     print(
diff --git a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_unweighted_sample_without_replacement.py b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_unweighted_sample_without_replacement.py
index c436e9d1..6719f5ea 100644
--- a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_unweighted_sample_without_replacement.py
+++ b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_unweighted_sample_without_replacement.py
@@ -1,11 +1,10 @@
-# SPDX-FileCopyrightText: Copyright (c) 2019-2024, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 import pytest
 import pylibwholegraph.binding.wholememory_binding as wmb
 from pylibwholegraph.utils.multiprocess import multiprocess_run
 from pylibwholegraph.torch.initialize import init_torch_env_and_create_wm_comm
-import torch
 from functools import partial
 from pylibwholegraph.test_utils.test_comm import (
     gen_csr_graph,
@@ -19,8 +18,11 @@
 import pylibwholegraph.torch.wholegraph_ops as wg_ops
 import random
 
+torch = pytest.importorskip("torch")
+
 
 def unweighte_sample_without_replacement_base(random_values, M, N):
+    torch = pytest.importorskip("torch")
     a = torch.empty((M,), dtype=torch.int32)
     Q = torch.arange(N, dtype=torch.int32)
     for i in range(M):
@@ -39,6 +41,7 @@ def host_unweighted_sample_without_replacement_func(
     max_sample_count,
     random_seed,
 ):
+    torch = pytest.importorskip("torch")
     output_dest_tensor = torch.empty((total_sample_count,), dtype=col_id_dtype)
     output_center_localid_tensor = torch.empty((total_sample_count,), dtype=torch.int32)
     output_edge_gid_tensor = torch.empty((total_sample_count,), dtype=torch.int64)
@@ -211,6 +214,7 @@ def host_unweighted_sample_without_replacement(
 
 
 def routine_func(world_rank: int, world_size: int, **kwargs):
+    torch = pytest.importorskip("torch")
     wm_comm, _ = init_torch_env_and_create_wm_comm(
         world_rank, world_size, world_rank, world_size
     )
@@ -368,6 +372,7 @@ def test_wholegraph_unweighted_sample(
     wholememory_type,
     need_center_local_output,
     need_edge_output,
+    torch,
 ):
     gpu_count = wmb.fork_get_gpu_count()
     assert gpu_count > 0
diff --git a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_weighted_sample_without_replacement.py b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_weighted_sample_without_replacement.py
index 10ef139e..1f47f7fc 100644
--- a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_weighted_sample_without_replacement.py
+++ b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_weighted_sample_without_replacement.py
@@ -1,11 +1,10 @@
-# SPDX-FileCopyrightText: Copyright (c) 2019-2024, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 import pytest
 from pylibwholegraph.utils.multiprocess import multiprocess_run
 from pylibwholegraph.torch.initialize import init_torch_env_and_create_wm_comm
 import pylibwholegraph.binding.wholememory_binding as wmb
-import torch
 import random
 from functools import partial
 from pylibwholegraph.test_utils.test_comm import (
@@ -32,6 +31,7 @@ def host_weighted_sample_without_replacement_func(
     max_sample_count,
     random_seed,
 ):
+    torch = pytest.importorskip("torch")
     output_dest_tensor = torch.empty((total_sample_count,), dtype=col_id_dtype)
     output_center_localid_tensor = torch.empty((total_sample_count,), dtype=torch.int32)
     output_edge_gid_tensor = torch.empty((total_sample_count,), dtype=torch.int64)
@@ -116,6 +116,7 @@ def host_weighted_sample_without_replacement(
     col_id_dtype,
     random_seed,
 ):
+    torch = pytest.importorskip("torch")
     center_nodes_count = center_nodes.size(0)
     output_sample_offset_tensor = host_get_sample_offset_tensor(
         host_csr_row_ptr, center_nodes, max_sample_count
@@ -166,6 +167,7 @@ def host_weighted_sample_without_replacement(
 
 
 def routine_func(world_rank: int, world_size: int, **kwargs):
+    torch = pytest.importorskip("torch")
     wm_comm, _ = init_torch_env_and_create_wm_comm(
         world_rank, world_size, world_rank, world_size
     )
@@ -372,6 +374,7 @@ def test_wholegraph_weighted_sample(
     wholememory_type,
     need_center_local_output,
     need_edge_output,
+    torch,
 ):
     gpu_count = wmb.fork_get_gpu_count()
     assert gpu_count > 0
diff --git a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholememory_cython_binding.py b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholememory_cython_binding.py
index a4726383..0e3310c2 100644
--- a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholememory_cython_binding.py
+++ b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholememory_cython_binding.py
@@ -1,9 +1,8 @@
-# SPDX-FileCopyrightText: Copyright (c) 2019-2024, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 import pytest
 import pylibwholegraph.binding.wholememory_binding as wmb
-import torch
 from pylibwholegraph.torch.wholegraph_env import (
     get_stream,
     get_wholegraph_env_fns,
@@ -14,7 +13,7 @@
 import time
 
 
-def test_smoke():
+def test_smoke(torch):
     torch.cuda.set_device(0)
     output_len = 128
     embed_dim = 10
@@ -55,7 +54,7 @@ def test_smoke():
     assert wmb.py_get_wholememory_tensor_count() == 0
 
 
-def test_loop_memory():
+def test_loop_memory(torch):
     torch.cuda.set_device(0)
     embedding_dim = 1
     output_len = 1
@@ -107,7 +106,7 @@ def test_loop_memory():
 
 @pytest.mark.parametrize("output_len", list(range(1, 100, 17)))
 @pytest.mark.parametrize("embed_dim", list(range(1, 128, 23)))
-def test_random_alloc(output_len, embed_dim):
+def test_random_alloc(output_len, embed_dim, torch):
     torch.cuda.set_device(0)
     input_tensor = torch.rand((embed_dim,), device="cuda")
     indice_tensor = torch.arange(output_len, device="cuda")
diff --git a/python/pylibwholegraph/pylibwholegraph/torch/comm.py b/python/pylibwholegraph/pylibwholegraph/torch/comm.py
index 634473f7..85be715a 100644
--- a/python/pylibwholegraph/pylibwholegraph/torch/comm.py
+++ b/python/pylibwholegraph/pylibwholegraph/torch/comm.py
@@ -1,10 +1,8 @@
-# SPDX-FileCopyrightText: Copyright (c) 2019-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
-import torch
-import torch.distributed as dist
-import torch.utils.dlpack
 import pylibwholegraph.binding.wholememory_binding as wmb
+from pylibwholegraph.utils.imports import import_optional
 from .utils import (
     str_to_wmb_wholememory_distributed_backend_type,
     wholememory_distributed_backend_type_to_str,
@@ -12,6 +10,8 @@
     str_to_wmb_wholememory_location,
 )
 
+torch = import_optional("torch")
+
 global_communicators = {}
 local_node_communicator = None
 local_device_communicator = None
@@ -140,13 +140,13 @@ def create_group_communicator(group_size: int = -1, comm_stride: int = 1):
     :param comm_stride: Stride of each rank in each group
     :return: WholeMemoryCommunicator
     """
-    world_size = dist.get_world_size()
+    world_size = torch.distributed.get_world_size()
     if group_size == -1:
         group_size = world_size
     strided_group_size = group_size * comm_stride
     assert world_size % strided_group_size == 0
     strided_group_count = world_size // strided_group_size
-    world_rank = dist.get_rank()
+    world_rank = torch.distributed.get_rank()
     strided_group_idx = world_rank // strided_group_size
     idx_in_strided_group = world_rank % strided_group_size
     inner_group_idx = idx_in_strided_group % comm_stride
@@ -161,7 +161,7 @@ def create_group_communicator(group_size: int = -1, comm_stride: int = 1):
                 tmp_wm_uid = wmb.PyWholeMemoryUniqueID()
             uid_th = torch.utils.dlpack.from_dlpack(tmp_wm_uid.__dlpack__())
             uid_th_cuda = uid_th.cuda()
-            dist.broadcast(uid_th_cuda, group_root_rank)
+            torch.distributed.broadcast(uid_th_cuda, group_root_rank)
             uid_th.copy_(uid_th_cuda.cpu())
             if strided_group_idx == strided_group and inner_group_idx == inner_group:
                 wm_uid_th = torch.utils.dlpack.from_dlpack(wm_uid.__dlpack__())
diff --git a/python/pylibwholegraph/pylibwholegraph/torch/data_loader.py b/python/pylibwholegraph/pylibwholegraph/torch/data_loader.py
index 35dd8e60..b87801f9 100644
--- a/python/pylibwholegraph/pylibwholegraph/torch/data_loader.py
+++ b/python/pylibwholegraph/pylibwholegraph/torch/data_loader.py
@@ -1,12 +1,13 @@
-# SPDX-FileCopyrightText: Copyright (c) 2019-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 import numpy as np
-import torch
-from torch.utils.data import Dataset
+from pylibwholegraph.utils.imports import import_optional
 
+torch = import_optional("torch")
 
-class NodeClassificationDataset(Dataset):
+
+class NodeClassificationDataset(torch.utils.data.Dataset):
     def __init__(self, raw_dataset):
         self.dataset = raw_dataset
 
diff --git a/python/pylibwholegraph/pylibwholegraph/torch/dlpack_utils.py b/python/pylibwholegraph/pylibwholegraph/torch/dlpack_utils.py
index 25f36bf3..74e1ae82 100644
--- a/python/pylibwholegraph/pylibwholegraph/torch/dlpack_utils.py
+++ b/python/pylibwholegraph/pylibwholegraph/torch/dlpack_utils.py
@@ -1,8 +1,9 @@
-# SPDX-FileCopyrightText: Copyright (c) 2019-2024, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
-import torch
-import torch.utils.dlpack
+from pylibwholegraph.utils.imports import import_optional
+
+torch = import_optional("torch")
 
 
 def torch_import_from_dlpack(dp):
diff --git a/python/pylibwholegraph/pylibwholegraph/torch/embedding.py b/python/pylibwholegraph/pylibwholegraph/torch/embedding.py
index aad0a552..93bba90a 100644
--- a/python/pylibwholegraph/pylibwholegraph/torch/embedding.py
+++ b/python/pylibwholegraph/pylibwholegraph/torch/embedding.py
@@ -1,8 +1,8 @@
-# SPDX-FileCopyrightText: Copyright (c) 2019-2024, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 import pylibwholegraph.binding.wholememory_binding as wmb
-import torch
+from pylibwholegraph.utils.imports import import_optional
 from .utils import torch_dtype_to_wholememory_dtype, get_file_size
 from .utils import str_to_wmb_wholememory_location, str_to_wmb_wholememory_memory_type
 from .utils import (
@@ -20,6 +20,9 @@
 from .wholegraph_env import wrap_torch_tensor, get_wholegraph_env_fns, get_stream
 
 
+torch = import_optional("torch")
+
+
 class WholeMemoryOptimizer(object):
     """
     Sparse Optimizer for WholeMemoryEmbedding.
@@ -211,8 +214,8 @@ class EmbeddingLookupFn(torch.autograd.Function):
     @staticmethod
     def forward(
         ctx,
-        indice: torch.Tensor,
-        dummy_input: torch.Tensor,
+        indice: "torch.Tensor",
+        dummy_input: "torch.Tensor",
         wm_embedding,
         is_training: bool = False,
         force_dtype: Union[torch.dtype, None] = None,
@@ -226,7 +229,7 @@ def forward(
         return output_tensor
 
     @staticmethod
-    def backward(ctx, grad_outputs: torch.Tensor):
+    def backward(ctx, grad_outputs: "torch.Tensor"):
         indice, output_tensor, dummy_input = ctx.saved_tensors
         wm_embedding = ctx.wm_embedding
         wm_embedding.add_gradients(indice, grad_outputs)
@@ -273,7 +276,7 @@ def need_grad(self):
 
     def gather(
         self,
-        indice: torch.Tensor,
+        indice: "torch.Tensor",
         *,
         is_training: bool = False,
         force_dtype: Union[torch.dtype, None] = None,
@@ -304,7 +307,7 @@ def gather(
         )
         return output_tensor
 
-    def add_gradients(self, indice: torch.Tensor, grad_outputs: torch.Tensor):
+    def add_gradients(self, indice: "torch.Tensor", grad_outputs: "torch.Tensor"):
         self.sparse_indices.append(indice)
         self.sparse_grads.append(grad_outputs)
 
@@ -373,7 +376,7 @@ def create_embedding(
     comm: WholeMemoryCommunicator,
     memory_type: str,
     memory_location: str,
-    dtype: torch.dtype,
+    dtype: "torch.dtype",
     sizes: List[int],
     *,
     cache_policy: Union[WholeMemoryCachePolicy, None] = None,
@@ -462,7 +465,7 @@ def create_embedding_from_filelist(
     memory_type: str,
     memory_location: str,
     filelist: Union[List[str], str],
-    dtype: torch.dtype,
+    dtype: "torch.dtype",
     last_dim_size: int,
     *,
     cache_policy: Union[WholeMemoryCachePolicy, None] = None,
@@ -547,7 +550,7 @@ def __init__(self, wm_embedding: WholeMemoryEmbedding):
         self.embedding_gather_fn = EmbeddingLookupFn.apply
 
     def forward(
-        self, indice: torch.Tensor, force_dtype: Union[torch.dtype, None] = None
+        self, indice: "torch.Tensor", force_dtype: Union[torch.dtype, None] = None
     ):
         return self.embedding_gather_fn(
             indice,
diff --git a/python/pylibwholegraph/pylibwholegraph/torch/gnn_model.py b/python/pylibwholegraph/pylibwholegraph/torch/gnn_model.py
index b779862c..888e07af 100644
--- a/python/pylibwholegraph/pylibwholegraph/torch/gnn_model.py
+++ b/python/pylibwholegraph/pylibwholegraph/torch/gnn_model.py
@@ -1,11 +1,12 @@
-# SPDX-FileCopyrightText: Copyright (c) 2019-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
-import torch
+from pylibwholegraph.utils.imports import import_optional
 from .graph_structure import GraphStructure
 from .embedding import WholeMemoryEmbedding, WholeMemoryEmbeddingModule
 from .common_options import parse_max_neighbors
-import torch.nn.functional as F
+
+torch = import_optional("torch")
 
 
 framework_name = None
@@ -185,8 +186,10 @@ def forward(self, ids):
                 sub_graph,
             )
             if i != self.num_layer - 1:
-                x_feat = F.relu(x_feat)
-                x_feat = F.dropout(x_feat, self.dropout, training=self.training)
+                x_feat = torch.nn.functional.relu(x_feat)
+                x_feat = torch.nn.functional.dropout(
+                    x_feat, self.dropout, training=self.training
+                )
 
         out_feat = x_feat
         return out_feat
diff --git a/python/pylibwholegraph/pylibwholegraph/torch/graph_ops.py b/python/pylibwholegraph/pylibwholegraph/torch/graph_ops.py
index ae352444..c2bec6fe 100644
--- a/python/pylibwholegraph/pylibwholegraph/torch/graph_ops.py
+++ b/python/pylibwholegraph/pylibwholegraph/torch/graph_ops.py
@@ -1,7 +1,6 @@
-# SPDX-FileCopyrightText: Copyright (c) 2019-2024, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
-
-import torch
+from pylibwholegraph.utils.imports import import_optional
 import pylibwholegraph.binding.wholememory_binding as wmb
 from .wholegraph_env import (
     get_stream,
@@ -10,10 +9,12 @@
     wrap_torch_tensor,
 )
 
+torch = import_optional("torch")
+
 
 def append_unique(
-    target_node_tensor: torch.Tensor,
-    neighbor_node_tensor: torch.Tensor,
+    target_node_tensor: "torch.Tensor",
+    neighbor_node_tensor: "torch.Tensor",
     need_neighbor_raw_to_unique: bool = False,
 ):
     """
@@ -60,7 +61,8 @@ def append_unique(
 
 
 def add_csr_self_loop(
-    csr_row_ptr_tensor: torch.Tensor, csr_col_ptr_tensor: torch.Tensor
+    csr_row_ptr_tensor: "torch.Tensor",
+    csr_col_ptr_tensor: "torch.Tensor",
 ):
     """
     Add self loop to sampled CSR graph
diff --git a/python/pylibwholegraph/pylibwholegraph/torch/graph_structure.py b/python/pylibwholegraph/pylibwholegraph/torch/graph_structure.py
index 700b94c9..bb6d75b3 100644
--- a/python/pylibwholegraph/pylibwholegraph/torch/graph_structure.py
+++ b/python/pylibwholegraph/pylibwholegraph/torch/graph_structure.py
@@ -1,12 +1,14 @@
-# SPDX-FileCopyrightText: Copyright (c) 2019-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
-import torch
+from pylibwholegraph.utils.imports import import_optional
 from typing import Union, List
 from .tensor import WholeMemoryTensor
 from . import graph_ops
 from . import wholegraph_ops
 
+torch = import_optional("torch")
+
 
 class GraphStructure(object):
     r"""Graph structure storage
@@ -67,7 +69,7 @@ def set_edge_attribute(self, attr_name: str, attr_tensor: WholeMemoryTensor):
 
     def unweighted_sample_without_replacement_one_hop(
         self,
-        center_nodes_tensor: torch.Tensor,
+        center_nodes_tensor: "torch.Tensor",
         max_sample_count: int,
         *,
         random_seed: Union[int, None] = None,
@@ -98,7 +100,7 @@ def unweighted_sample_without_replacement_one_hop(
     def weighted_sample_without_replacement_one_hop(
         self,
         weight_name: str,
-        center_nodes_tensor: torch.Tensor,
+        center_nodes_tensor: "torch.Tensor",
         max_sample_count: int,
         *,
         random_seed: Union[int, None] = None,
@@ -133,7 +135,7 @@ def weighted_sample_without_replacement_one_hop(
 
     def multilayer_sample_without_replacement(
         self,
-        node_ids: torch.Tensor,
+        node_ids: "torch.Tensor",
         max_neighbors: List[int],
         weight_name: Union[str, None] = None,
     ):
diff --git a/python/pylibwholegraph/pylibwholegraph/torch/initialize.py b/python/pylibwholegraph/pylibwholegraph/torch/initialize.py
index 3f83ee64..6523779b 100644
--- a/python/pylibwholegraph/pylibwholegraph/torch/initialize.py
+++ b/python/pylibwholegraph/pylibwholegraph/torch/initialize.py
@@ -1,9 +1,8 @@
-# SPDX-FileCopyrightText: Copyright (c) 2019-2024, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 import os
-import torch
-import torch.utils.dlpack
+from pylibwholegraph.utils.imports import import_optional
 import pylibwholegraph.binding.wholememory_binding as wmb
 from .comm import (
     set_world_info,
@@ -13,6 +12,8 @@
 )
 from .utils import str_to_wmb_wholememory_log_level
 
+torch = import_optional("torch")
+
 
 def init(
     world_rank: int,
diff --git a/python/pylibwholegraph/pylibwholegraph/torch/tensor.py b/python/pylibwholegraph/pylibwholegraph/torch/tensor.py
index c9950b3e..e2de562b 100644
--- a/python/pylibwholegraph/pylibwholegraph/torch/tensor.py
+++ b/python/pylibwholegraph/pylibwholegraph/torch/tensor.py
@@ -1,8 +1,8 @@
-# SPDX-FileCopyrightText: Copyright (c) 2019-2024, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 import pylibwholegraph.binding.wholememory_binding as wmb
-import torch
+from pylibwholegraph.utils.imports import import_optional
 from .utils import (
     torch_dtype_to_wholememory_dtype,
     wholememory_dtype_to_torch_dtype,
@@ -15,6 +15,7 @@
 from .dlpack_utils import torch_import_from_dlpack
 from .wholegraph_env import wrap_torch_tensor, get_wholegraph_env_fns, get_stream
 
+torch = import_optional("torch")
 
 WholeMemoryMemoryType = wmb.WholeMemoryMemoryType
 WholeMemoryMemoryLocation = wmb.WholeMemoryMemoryLocation
@@ -49,7 +50,7 @@ def get_comm(self):
         )
 
     def gather(
-        self, indice: torch.Tensor, *, force_dtype: Union[torch.dtype, None] = None
+        self, indice: "torch.Tensor", *, force_dtype: Union[torch.dtype, None] = None
     ):
         assert indice.dim() == 1
         embedding_dim = self.shape[1] if self.dim() == 2 else 1
@@ -71,7 +72,7 @@ def gather(
         )
         return output_tensor.view(-1) if self.dim() == 1 else output_tensor
 
-    def scatter(self, input_tensor: torch.Tensor, indice: torch.Tensor):
+    def scatter(self, input_tensor: "torch.Tensor", indice: "torch.Tensor"):
         assert indice.dim() == 1
         assert input_tensor.dim() == self.dim()
         assert indice.shape[0] == input_tensor.shape[0]
@@ -201,7 +202,7 @@ def create_wholememory_tensor(
     memory_type: str,
     memory_location: str,
     sizes: List[int],
-    dtype: torch.dtype,
+    dtype: "torch.dtype",
     strides: List[int],
     tensor_entry_partition: Union[List[int], None] = None,
 ):
@@ -250,7 +251,7 @@ def create_wholememory_tensor_from_filelist(
     memory_type: str,
     memory_location: str,
     filelist: Union[List[str], str],
-    dtype: torch.dtype,
+    dtype: "torch.dtype",
     last_dim_size: int = 0,
     last_dim_strides: int = -1,
     tensor_entry_partition: Union[List[int], None] = None,
diff --git a/python/pylibwholegraph/pylibwholegraph/torch/utils.py b/python/pylibwholegraph/pylibwholegraph/torch/utils.py
index a1b296da..4f27061a 100644
--- a/python/pylibwholegraph/pylibwholegraph/torch/utils.py
+++ b/python/pylibwholegraph/pylibwholegraph/torch/utils.py
@@ -1,15 +1,16 @@
-# SPDX-FileCopyrightText: Copyright (c) 2019-2024, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 import pylibwholegraph.binding.wholememory_binding as wmb
-import torch
+from pylibwholegraph.utils.imports import import_optional
 import os
 
+torch = import_optional("torch")
 
 WholeMemoryDataType = wmb.WholeMemoryDataType
 
 
-def torch_dtype_to_wholememory_dtype(torch_dtype: torch.dtype):
+def torch_dtype_to_wholememory_dtype(torch_dtype: "torch.dtype"):
     """
     Convert torch.dtype to WholeMemoryDataType
     :param torch_dtype: torch.dtype
diff --git a/python/pylibwholegraph/pylibwholegraph/torch/wholegraph_env.py b/python/pylibwholegraph/pylibwholegraph/torch/wholegraph_env.py
index f59418fe..8829b9d7 100644
--- a/python/pylibwholegraph/pylibwholegraph/torch/wholegraph_env.py
+++ b/python/pylibwholegraph/pylibwholegraph/torch/wholegraph_env.py
@@ -1,14 +1,16 @@
-# SPDX-FileCopyrightText: Copyright (c) 2019-2024, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 import os.path
 import importlib
 
-import torch
 import pylibwholegraph
 import pylibwholegraph.binding.wholememory_binding as wmb
+from pylibwholegraph.utils.imports import import_optional
 from typing import Union
 from .utils import wholememory_dtype_to_torch_dtype, torch_dtype_to_wholememory_dtype
 
+torch = import_optional("torch")
+
 default_wholegraph_env_context = None
 torch_cpp_ext_loaded = False
 torch_cpp_ext_lib = None
@@ -46,7 +48,7 @@ def get_c_context(self):
         else:
             return id(self)
 
-    def set_tensor(self, t: torch.Tensor):
+    def set_tensor(self, t: "torch.Tensor"):
         self.tensor = t
 
     def get_handle(self):
diff --git a/python/pylibwholegraph/pylibwholegraph/torch/wholegraph_ops.py b/python/pylibwholegraph/pylibwholegraph/torch/wholegraph_ops.py
index c6808010..70b61ac4 100644
--- a/python/pylibwholegraph/pylibwholegraph/torch/wholegraph_ops.py
+++ b/python/pylibwholegraph/pylibwholegraph/torch/wholegraph_ops.py
@@ -1,8 +1,8 @@
-# SPDX-FileCopyrightText: Copyright (c) 2019-2024, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
-import torch
 import pylibwholegraph.binding.wholememory_binding as wmb
+from pylibwholegraph.utils.imports import import_optional
 from .wholegraph_env import (
     get_stream,
     TorchMemoryContext,
@@ -12,11 +12,13 @@
 from typing import Union
 import random
 
+torch = import_optional("torch")
+
 
 def unweighted_sample_without_replacement(
     wm_csr_row_ptr_tensor: wmb.PyWholeMemoryTensor,
     wm_csr_col_ptr_tensor: wmb.PyWholeMemoryTensor,
-    center_nodes_tensor: torch.Tensor,
+    center_nodes_tensor: "torch.Tensor",
     max_sample_count: int,
     random_seed: Union[int, None] = None,
     need_center_local_output: bool = False,
@@ -85,7 +87,7 @@ def weighted_sample_without_replacement(
     wm_csr_row_ptr_tensor: wmb.PyWholeMemoryTensor,
     wm_csr_col_ptr_tensor: wmb.PyWholeMemoryTensor,
     wm_csr_weight_ptr_tensor: wmb.PyWholeMemoryTensor,
-    center_nodes_tensor: torch.Tensor,
+    center_nodes_tensor: "torch.Tensor",
     max_sample_count: int,
     random_seed: Union[int, None] = None,
     need_center_local_output: bool = False,
diff --git a/python/pylibwholegraph/pylibwholegraph/torch/wholememory_ops.py b/python/pylibwholegraph/pylibwholegraph/torch/wholememory_ops.py
index 9cb518c5..dfcf7041 100644
--- a/python/pylibwholegraph/pylibwholegraph/torch/wholememory_ops.py
+++ b/python/pylibwholegraph/pylibwholegraph/torch/wholememory_ops.py
@@ -1,8 +1,8 @@
-# SPDX-FileCopyrightText: Copyright (c) 2019-2024, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
-import torch
 import pylibwholegraph.binding.wholememory_binding as wmb
+from pylibwholegraph.utils.imports import import_optional
 from .wholegraph_env import (
     get_stream,
     get_wholegraph_env_fns,
@@ -10,10 +10,12 @@
 )
 from .utils import wholememory_dtype_to_torch_dtype
 
+torch = import_optional("torch")
+
 
 def wholememory_gather_forward_functor(
     wholememory_tensor: wmb.PyWholeMemoryTensor,
-    indices_tensor: torch.Tensor,
+    indices_tensor: "torch.Tensor",
     requires_grad=False,
     torch_output_dtype=None,
 ):
@@ -48,8 +50,8 @@ def wholememory_gather_forward_functor(
 
 
 def wholememory_scatter_functor(
-    input_tensor: torch.Tensor,
-    indices_tensor: torch.Tensor,
+    input_tensor: "torch.Tensor",
+    indices_tensor: "torch.Tensor",
     wholememory_tensor: wmb.PyWholeMemoryTensor,
 ):
     """
diff --git a/python/pylibwholegraph/pylibwholegraph/utils/imports.py b/python/pylibwholegraph/pylibwholegraph/utils/imports.py
new file mode 100644
index 00000000..67be22fe
--- /dev/null
+++ b/python/pylibwholegraph/pylibwholegraph/utils/imports.py
@@ -0,0 +1,46 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
+# SPDX-License-Identifier: Apache-2.0
+
+from importlib import import_module
+
+
+class MissingModule:
+    """
+    Raises RuntimeError when any attribute is accessed on instances of this
+    class.
+
+    Instances of this class are returned by import_optional() when a module
+    cannot be found, which allows for code to import optional dependencies, and
+    have only the code paths that use the module affected.
+    """
+
+    def __init__(self, mod_name):
+        self.name = mod_name
+
+    def __getattr__(self, attr):
+        raise RuntimeError(f"This feature requires the '{self.name}' package/module")
+
+
+def import_optional(mod, default_mod_class=MissingModule):
+    """
+    import the "optional" module 'mod' and return the module object or object.
+    If the import raises ModuleNotFoundError, returns an instance of
+    default_mod_class.
+
+    This method was written to support importing "optional" dependencies so
+    code can be written to run even if the dependency is not installed.
+
+    Example
+    -------
+    >> from pylibwholegraph.utils.imports import import_optional
+    >> torch = import_optional("torch")  # torch is not installed
+    >> torch.set_num_threads(1)
+    Traceback (most recent call last):
+      File "<stdin>", line 1, in <module>
+      ...
+    RuntimeError: This feature requires the 'torch' package/module
+    """
+    try:
+        return import_module(mod)
+    except ModuleNotFoundError:
+        return default_mod_class(mod_name=mod)

From 104b8bfe46011e52410319c19621126554e87068 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Tue, 10 Mar 2026 14:21:23 -0500
Subject: [PATCH 15/43] more changes

---
 ci/test_wheel_cugraph-pyg.sh                    | 17 ++++++++---------
 ci/test_wheel_pylibwholegraph.sh                | 13 +++++++++++--
 pyproject.toml                                  |  6 ++++--
 .../cugraph-pyg/cugraph_pyg/tests/conftest.py   | 17 +++++++++++------
 .../tests/tensor/test_dist_matrix_mg.py         |  8 +++++---
 .../pylibwholegraph/test_utils/test_comm.py     |  8 ++++++--
 .../pylibwholegraph/tests/conftest.py           |  2 +-
 .../pylibwholegraph/test_wholememory_io.py      |  2 ++
 .../ops/test_graph_append_unique.py             |  2 ++
 ...graph_weighted_sample_without_replacement.py |  4 ++--
 .../pylibwholegraph/torch/distributed_launch.py |  9 +++++----
 .../pylibwholegraph/torch/wholegraph_env.py     |  2 --
 12 files changed, 57 insertions(+), 33 deletions(-)

diff --git a/ci/test_wheel_cugraph-pyg.sh b/ci/test_wheel_cugraph-pyg.sh
index 64ad708d..4aed143f 100755
--- a/ci/test_wheel_cugraph-pyg.sh
+++ b/ci/test_wheel_cugraph-pyg.sh
@@ -43,8 +43,10 @@ TORCH_WHEEL_DIR="$(mktemp -d)"
 
 # 'cugraph-pyg' is still expected to be importable
 # and testable in an environment where 'torch' isn't installed.
+torch_installed=true
 if [ -z "$(ls -A ${TORCH_WHEEL_DIR} 2>/dev/null)" ]; then
   rapids-echo-stderr "No 'torch' wheels downloaded."
+  torch_installed=false
 else
   PIP_INSTALL_ARGS+=("${TORCH_WHEEL_DIR}"/torch-*.whl)
 fi
@@ -58,10 +60,6 @@ fi
 rapids-pip-retry install \
   "${PIP_INSTALL_ARGS[@]}"
 
-python -c "import cugraph_pyg"
-echo "--- DONE ---"
-exit 0
-
 # RAPIDS_DATASET_ROOT_DIR is used by test scripts
 export RAPIDS_DATASET_ROOT_DIR="$(realpath datasets)"
 mkdir -p "${RAPIDS_DATASET_ROOT_DIR}"
@@ -72,11 +70,12 @@ popd
 # Enable legacy behavior of torch.load for examples relying on ogb
 export TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD=1
 
-rapids-logger "pytest cugraph-pyg (single GPU)"
-./ci/run_cugraph_pyg_pytests.sh
+if [[ "${torch_installed}" == "true" ]]; then
+  rapids-logger "pytest cugraph-pyg (single GPU, with 'torch')"
+  ./ci/run_cugraph_pyg_pytests.sh
+fi
 
-rapids-logger "testing that cugraph-pyg is importable without 'torch'"
+rapids-logger "pytest cugraph-pyg (no 'torch')"
 pip uninstall --yes 'torch'
 python -c "import cugraph_pyg; print(cugraph_pyg.__version__)"
-
-popd
+./ci/run_cugraph_pyg_pytests.sh
diff --git a/ci/test_wheel_pylibwholegraph.sh b/ci/test_wheel_pylibwholegraph.sh
index e97cda35..6125f20d 100755
--- a/ci/test_wheel_pylibwholegraph.sh
+++ b/ci/test_wheel_pylibwholegraph.sh
@@ -36,8 +36,10 @@ TORCH_WHEEL_DIR="$(mktemp -d)"
 
 # 'cugraph-pyg' is still expected to be importable
 # and testable in an environment where 'torch' isn't installed.
+torch_installed=true
 if [ -z "$(ls -A ${TORCH_WHEEL_DIR} 2>/dev/null)" ]; then
   rapids-echo-stderr "No 'torch' wheels downloaded."
+  torch_installed=false
 else
   PIP_INSTALL_ARGS+=("${TORCH_WHEEL_DIR}"/torch-*.whl)
 fi
@@ -47,5 +49,12 @@ rapids-logger "Installing Packages"
 rapids-pip-retry install \
     "${PIP_INSTALL_ARGS[@]}"
 
-rapids-logger "pytest pylibwholegraph"
-ci/run_pylibwholegraph_pytests.sh
+if [[ "${torch_installed}" == "true" ]]; then
+  rapids-logger "pytest pylibwholegraph (with 'torch')"
+  ./ci/run_pylibwholegraph_pytests.sh
+fi
+
+rapids-logger "pytest pylibwholegraph (no 'torch')"
+pip uninstall --yes 'torch'
+python -c "import pylibwholegraph; print(pylibwholegraph.__version__)"
+./ci/run_pylibwholegraph_pytests.sh
diff --git a/pyproject.toml b/pyproject.toml
index b038729a..97a06025 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -10,7 +10,9 @@ exclude = [
 [tool.ruff.lint]
 ignore = [
     # whitespace before :
-    "E203",
+    "E203"
+]
+select = [
     # (flake8-tidy-imports) banned-api
     "TID251"
 ]
@@ -25,6 +27,6 @@ ignore = [
 ]
 
 # allow importing 'torch' directly in pylibwholegraph examples
-"python/pylibwholegraph/examples/" = [
+"python/pylibwholegraph/examples/*" = [
     "TID251"
 ]
diff --git a/python/cugraph-pyg/cugraph_pyg/tests/conftest.py b/python/cugraph-pyg/cugraph_pyg/tests/conftest.py
index f480aeb8..594dc3b7 100644
--- a/python/cugraph-pyg/cugraph_pyg/tests/conftest.py
+++ b/python/cugraph-pyg/cugraph_pyg/tests/conftest.py
@@ -1,9 +1,8 @@
-# SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 import pytest
 import os
-import torch
 
 
 from pylibcugraph.comms import (
@@ -28,8 +27,14 @@
     gpubenchmark = pytest_benchmark.plugin.benchmark
 
 
+@pytest.fixture(scope="function")
+def torch():
+    """Pass this to any test case that needs 'torch' to be installed"""
+    return pytest.importorskip("torch")
+
+
 @pytest.fixture(scope="module")
-def single_pytorch_worker():
+def single_pytorch_worker(torch):
     os.environ["MASTER_ADDR"] = "localhost"
     os.environ["MASTER_PORT"] = "12355"
     os.environ["LOCAL_RANK"] = "0"
@@ -44,14 +49,14 @@ def single_pytorch_worker():
 
 
 @pytest.fixture
-def basic_pyg_graph_1():
+def basic_pyg_graph_1(torch):
     edge_index = torch.tensor([[0, 1, 2, 3], [0, 0, 1, 1]])
     size = (4, 4)
     return edge_index, size
 
 
 @pytest.fixture
-def basic_pyg_graph_2():
+def basic_pyg_graph_2(torch):
     edge_index = torch.tensor(
         [
             [0, 1, 0, 2, 3, 0, 4, 0, 5, 0, 6, 7, 0, 8, 9],
@@ -63,7 +68,7 @@ def basic_pyg_graph_2():
 
 
 @pytest.fixture
-def sample_pyg_hetero_data():
+def sample_pyg_hetero_data(torch):
     torch.manual_seed(12345)
     raw_data_dict = {
         "v0": torch.randn(6, 3),
diff --git a/python/cugraph-pyg/cugraph_pyg/tests/tensor/test_dist_matrix_mg.py b/python/cugraph-pyg/cugraph_pyg/tests/tensor/test_dist_matrix_mg.py
index 0ef4ca00..ae2d050e 100644
--- a/python/cugraph-pyg/cugraph_pyg/tests/tensor/test_dist_matrix_mg.py
+++ b/python/cugraph-pyg/cugraph_pyg/tests/tensor/test_dist_matrix_mg.py
@@ -1,10 +1,9 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 
 import os
 import pytest
-import torch
 from cugraph_pyg.tensor import DistMatrix
 
 from pylibwholegraph.torch.initialize import init as wm_init
@@ -13,6 +12,7 @@
 
 def run_test_dist_matrix_creation(rank, world_size, device):
     """Test basic DistMatrix creation from tensors"""
+    torch = pytest.importorskip("torch")
     torch.cuda.set_device(rank)
 
     os.environ["MASTER_ADDR"] = "localhost"
@@ -55,6 +55,7 @@ def run_test_dist_matrix_creation(rank, world_size, device):
 
 def run_test_dist_matrix_empty_creation(rank, world_size, device):
     """Test DistMatrix creation with empty initialization"""
+    torch = pytest.importorskip("torch")
     torch.cuda.set_device(rank)
 
     os.environ["MASTER_ADDR"] = "localhost"
@@ -102,6 +103,7 @@ def run_test_dist_matrix_empty_creation(rank, world_size, device):
 
 def run_test_dist_matrix_invalid_cases(rank, world_size, device):
     """Test DistMatrix creation with invalid cases"""
+    torch = pytest.importorskip("torch")
     torch.cuda.set_device(rank)
 
     os.environ["MASTER_ADDR"] = "localhost"
@@ -138,7 +140,7 @@ def run_test_dist_matrix_invalid_cases(rank, world_size, device):
 
 
 @pytest.mark.parametrize("device", ["cpu", "cuda"])
-def test_dist_matrix(device):
+def test_dist_matrix(device, torch):
     """Run all DistMatrix tests"""
 
     world_size = torch.cuda.device_count()
diff --git a/python/pylibwholegraph/pylibwholegraph/test_utils/test_comm.py b/python/pylibwholegraph/pylibwholegraph/test_utils/test_comm.py
index 3197b53f..5860fbd5 100644
--- a/python/pylibwholegraph/pylibwholegraph/test_utils/test_comm.py
+++ b/python/pylibwholegraph/pylibwholegraph/test_utils/test_comm.py
@@ -1,8 +1,8 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
-import torch
 import numpy as np
+import pytest
 import pylibwholegraph.binding.wholememory_binding as wmb
 from pylibwholegraph.torch.dlpack_utils import torch_import_from_dlpack
 from packaging import version
@@ -17,6 +17,7 @@ def gen_csr_format_from_dense_matrix(
     csr_col_dtype,
     weight_dtype,
 ):
+    torch = pytest.importorskip("torch")
     row_num = matrix_tensor.shape[0]
     col_num = matrix_tensor.shape[1]
     assert row_num == graph_node_count
@@ -49,6 +50,7 @@ def gen_csr_graph(
     csr_col_dtype=torch.int32,
     weight_dtype=torch.float32,
 ):
+    torch = pytest.importorskip("torch")
     if neighbor_node_count is None:
         neighbor_node_count = graph_node_count
     all_count = graph_node_count * neighbor_node_count
@@ -95,6 +97,7 @@ def host_sample_all_neighbors(
     col_id_dtype,
     total_sample_count,
 ):
+    torch = pytest.importorskip("torch")
     output_dest_tensor = torch.empty((total_sample_count,), dtype=col_id_dtype)
     output_center_localid_tensor = torch.empty((total_sample_count,), dtype=torch.int32)
     output_edge_gid_tensor = torch.empty((total_sample_count,), dtype=torch.int64)
@@ -133,6 +136,7 @@ def copy_host_1D_tensor_to_wholememory(
 
 
 def host_get_sample_offset_tensor(host_csr_row_ptr, center_nodes, max_sample_count):
+    torch = pytest.importorskip("torch")
     center_nodes_count = center_nodes.size(0)
     output_sample_offset_tensor = torch.empty(
         (center_nodes_count + 1,), dtype=torch.int32
diff --git a/python/pylibwholegraph/pylibwholegraph/tests/conftest.py b/python/pylibwholegraph/pylibwholegraph/tests/conftest.py
index 1d80ddf8..4032ca07 100644
--- a/python/pylibwholegraph/pylibwholegraph/tests/conftest.py
+++ b/python/pylibwholegraph/pylibwholegraph/tests/conftest.py
@@ -4,7 +4,7 @@
 import pytest
 
 
-@pytest.fixture
+@pytest.fixture(scope="function")
 def torch():
     """Pass this to any test case that needs 'torch' to be installed"""
     return pytest.importorskip("torch")
diff --git a/python/pylibwholegraph/pylibwholegraph/tests/pylibwholegraph/test_wholememory_io.py b/python/pylibwholegraph/pylibwholegraph/tests/pylibwholegraph/test_wholememory_io.py
index 29380240..bf093dca 100644
--- a/python/pylibwholegraph/pylibwholegraph/tests/pylibwholegraph/test_wholememory_io.py
+++ b/python/pylibwholegraph/pylibwholegraph/tests/pylibwholegraph/test_wholememory_io.py
@@ -294,6 +294,7 @@ def store_routine_func(
     storage_offset,
     entry_partition,
 ):
+    torch = pytest.importorskip("torch")
     (wm_comm, _) = init_torch_env_and_create_wm_comm(
         world_rank, world_size, world_rank, world_size
     )
@@ -347,6 +348,7 @@ def test_wholememory_store(
     embedding_stride,
     storage_offset,
     partition_method,
+    torch,
 ):
     if embedding_stride < storage_offset + embedding_dim:
         pytest.skip(
diff --git a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_append_unique.py b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_append_unique.py
index 5856d943..77c2dfdb 100644
--- a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_append_unique.py
+++ b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_append_unique.py
@@ -4,6 +4,8 @@
 import pytest
 import pylibwholegraph.torch.graph_ops as wg_ops
 
+torch = pytest.importorski("torch")
+
 
 def host_neighbor_raw_to_unique(unique_node_tensor, neighbor_node_tensor):
     torch = pytest.importorskip("torch")
diff --git a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_weighted_sample_without_replacement.py b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_weighted_sample_without_replacement.py
index 1f47f7fc..a597f5d7 100644
--- a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_weighted_sample_without_replacement.py
+++ b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_weighted_sample_without_replacement.py
@@ -355,7 +355,7 @@ def routine_func(world_rank: int, world_size: int, **kwargs):
 @pytest.mark.parametrize("graph_edge_count", [1043])
 @pytest.mark.parametrize("max_sample_count", [11])
 @pytest.mark.parametrize("center_node_count", [13])
-@pytest.mark.parametrize("center_node_dtype", [torch.int32, torch.int64])
+@pytest.mark.parametrize("center_node_dtype", ["int32", "int64"])
 @pytest.mark.parametrize("col_id_dtype", [0, 1])
 @pytest.mark.parametrize("csr_weight_dtype", [2, 3])
 @pytest.mark.parametrize("wholememory_location", ([0, 1]))
@@ -393,7 +393,7 @@ def test_wholegraph_weighted_sample(
         graph_edge_count=graph_edge_count,
         max_sample_count=max_sample_count,
         center_node_count=center_node_count,
-        center_node_dtype=center_node_dtype,
+        center_node_dtype=getattr(torch, center_node_dtype),
         col_id_dtype=col_id_dtype,
         csr_weight_dtype=csr_weight_dtype,
         wholememory_location=wholememory_location,
diff --git a/python/pylibwholegraph/pylibwholegraph/torch/distributed_launch.py b/python/pylibwholegraph/pylibwholegraph/torch/distributed_launch.py
index e7990546..50dcaae0 100644
--- a/python/pylibwholegraph/pylibwholegraph/torch/distributed_launch.py
+++ b/python/pylibwholegraph/pylibwholegraph/torch/distributed_launch.py
@@ -1,9 +1,12 @@
-# SPDX-FileCopyrightText: Copyright (c) 2019-2024, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
+from pylibwholegraph.utils.imports import import_optional
 import os
 from argparse import ArgumentParser
 
+torch = import_optional("torch")
+
 
 class DistributedConfig(object):
     def __init__(self):
@@ -281,10 +284,8 @@ def distributed_launch_spawn(args, main_func):
         )
     )
 
-    import torch.multiprocessing as mp
-
     if distributed_config.local_size > 1:
-        mp.spawn(
+        torch.multiprocessing.spawn(
             main_spawn_routine,
             nprocs=distributed_config.local_size,
             args=(main_func, distributed_config),
diff --git a/python/pylibwholegraph/pylibwholegraph/torch/wholegraph_env.py b/python/pylibwholegraph/pylibwholegraph/torch/wholegraph_env.py
index 8829b9d7..72f0d8fd 100644
--- a/python/pylibwholegraph/pylibwholegraph/torch/wholegraph_env.py
+++ b/python/pylibwholegraph/pylibwholegraph/torch/wholegraph_env.py
@@ -173,8 +173,6 @@ def get_cpp_extension_src_path():
 
 
 def compile_cpp_extension():
-    import torch.utils.cpp_extension
-
     global torch_cpp_ext_loaded
     global torch_cpp_ext_lib
     cpp_extension_path = os.path.join(get_cpp_extension_src_path(), "torch_cpp_ext")

From 4b479f7a5d5b39b6eb8c11a2dee16754670ec92c Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Tue, 10 Mar 2026 15:45:07 -0500
Subject: [PATCH 16/43] more torch fixes... unconditional references in
 argument defaults

---
 ci/test_python.sh                             | 12 +++++++
 ci/test_wheel_cugraph-pyg.sh                  | 36 +++++++++----------
 .../pylibwholegraph/test_utils/test_comm.py   |  9 ++---
 .../ops/test_graph_add_csr_self_loop.py       |  3 +-
 ...h_unweighted_sample_without_replacement.py |  7 +++-
 ...aph_weighted_sample_without_replacement.py |  7 +++-
 6 files changed, 49 insertions(+), 25 deletions(-)

diff --git a/ci/test_python.sh b/ci/test_python.sh
index 09aeb27e..faff188e 100755
--- a/ci/test_python.sh
+++ b/ci/test_python.sh
@@ -55,6 +55,12 @@ if [[ "${RUNNER_ARCH}" != "ARM64" ]]; then
   rapids-logger "Check GPU usage"
   nvidia-smi
 
+  # 'torch' is an optional dependency of 'cugraph_pyg'... confirm that it's available
+  # here, to reduce the risk of accidentally skipping most tests because it accidentally
+  # wasn't installed.
+  rapids-logger "Confirming that PyTorch is installed"
+  python -c "import torch; assert torch.cuda.is_available() is True"
+
   rapids-logger "pytest cugraph_pyg (single GPU)"
   ./ci/run_cugraph_pyg_pytests.sh \
     --junitxml="${RAPIDS_TESTS_DIR}/junit-cugraph-pyg.xml" \
@@ -88,6 +94,12 @@ if [[ "${RUNNER_ARCH}" != "ARM64" ]]; then
   conda activate test_pylibwholegraph
   set -u
 
+  # 'torch' is an optional dependency of 'cugraph_pyg'... confirm that it's available
+  # here, to reduce the risk of accidentally skipping most tests because it accidentally
+  # wasn't installed.
+  rapids-logger "Confirming that PyTorch is installed"
+  python -c "import torch; assert torch.cuda.is_available() is True"
+
   rapids-print-env
 
   rapids-logger "Check GPU usage"
diff --git a/ci/test_wheel_cugraph-pyg.sh b/ci/test_wheel_cugraph-pyg.sh
index 4aed143f..f85ec63d 100755
--- a/ci/test_wheel_cugraph-pyg.sh
+++ b/ci/test_wheel_cugraph-pyg.sh
@@ -11,19 +11,19 @@ package_name="cugraph-pyg"
 RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
 
 # Download the libwholegraph, pylibwholegraph, and cugraph-pyg built in the previous step
-# LIBWHOLEGRAPH_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github cpp)
-# PYLIBWHOLEGRAPH_WHEELHOUSE=$(rapids-download-from-github "$(rapids-package-name "wheel_python" pylibwholegraph --stable --cuda "$RAPIDS_CUDA_VERSION")")
-# CUGRAPH_PYG_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-github python)
-CUGRAPH_GNN_COMMIT=5a4064e7bf27a2548b32012375996f976d23e4e9
-CUGRAPH_PYG_WHEELHOUSE=$(
-  RAPIDS_PY_WHEEL_NAME="cugraph-pyg_cu12" RAPIDS_PY_WHEEL_PURE="1" rapids-get-pr-artifact cugraph-gnn 425 python wheel "${CUGRAPH_GNN_COMMIT}"
-)
-LIBWHOLEGRAPH_WHEELHOUSE=$(
-  RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-get-pr-artifact cugraph-gnn 425 cpp wheel "${CUGRAPH_GNN_COMMIT}"
-)
-PYLIBWHOLEGRAPH_WHEELHOUSE=$(
-  rapids-get-pr-artifact cugraph-gnn 425 python wheel --pkg_name pylibwholegraph --stable "${CUGRAPH_GNN_COMMIT}"
-)
+LIBWHOLEGRAPH_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github cpp)
+PYLIBWHOLEGRAPH_WHEELHOUSE=$(rapids-download-from-github "$(rapids-package-name "wheel_python" pylibwholegraph --stable --cuda "$RAPIDS_CUDA_VERSION")")
+CUGRAPH_PYG_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-github python)
+# CUGRAPH_GNN_COMMIT=5a4064e7bf27a2548b32012375996f976d23e4e9
+# CUGRAPH_PYG_WHEELHOUSE=$(
+#   RAPIDS_PY_WHEEL_NAME="cugraph-pyg_cu12" RAPIDS_PY_WHEEL_PURE="1" rapids-get-pr-artifact cugraph-gnn 425 python wheel "${CUGRAPH_GNN_COMMIT}"
+# )
+# LIBWHOLEGRAPH_WHEELHOUSE=$(
+#   RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-get-pr-artifact cugraph-gnn 425 cpp wheel "${CUGRAPH_GNN_COMMIT}"
+# )
+# PYLIBWHOLEGRAPH_WHEELHOUSE=$(
+#   rapids-get-pr-artifact cugraph-gnn 425 python wheel --pkg_name pylibwholegraph --stable "${CUGRAPH_GNN_COMMIT}"
+# )
 
 # generate constraints (possibly pinning to oldest support versions of dependencies)
 rapids-generate-pip-constraints test_cugraph_pyg "${PIP_CONSTRAINT}"
@@ -61,11 +61,11 @@ rapids-pip-retry install \
   "${PIP_INSTALL_ARGS[@]}"
 
 # RAPIDS_DATASET_ROOT_DIR is used by test scripts
-export RAPIDS_DATASET_ROOT_DIR="$(realpath datasets)"
-mkdir -p "${RAPIDS_DATASET_ROOT_DIR}"
-pushd "${RAPIDS_DATASET_ROOT_DIR}"
-./get_test_data.sh --test
-popd
+# export RAPIDS_DATASET_ROOT_DIR="$(realpath datasets)"
+# mkdir -p "${RAPIDS_DATASET_ROOT_DIR}"
+# pushd "${RAPIDS_DATASET_ROOT_DIR}"
+# ./get_test_data.sh --test
+# popd
 
 # Enable legacy behavior of torch.load for examples relying on ogb
 export TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD=1
diff --git a/python/pylibwholegraph/pylibwholegraph/test_utils/test_comm.py b/python/pylibwholegraph/pylibwholegraph/test_utils/test_comm.py
index 5860fbd5..bbfd2163 100644
--- a/python/pylibwholegraph/pylibwholegraph/test_utils/test_comm.py
+++ b/python/pylibwholegraph/pylibwholegraph/test_utils/test_comm.py
@@ -45,10 +45,11 @@ def gen_csr_format_from_dense_matrix(
 def gen_csr_graph(
     graph_node_count,
     graph_edge_count,
-    neighbor_node_count=None,
-    csr_row_dtype=torch.int64,
-    csr_col_dtype=torch.int32,
-    weight_dtype=torch.float32,
+    *,
+    neighbor_node_count,
+    csr_row_dtype,
+    csr_col_dtype,
+    weight_dtype,
 ):
     torch = pytest.importorskip("torch")
     if neighbor_node_count is None:
diff --git a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_add_csr_self_loop.py b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_add_csr_self_loop.py
index 85cd0417..94e9b2c9 100644
--- a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_add_csr_self_loop.py
+++ b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_add_csr_self_loop.py
@@ -37,8 +37,9 @@ def routine_func(**kwargs):
         target_node_count,
         edge_num,
         neighbor_node_count,
-        csr_row_dtype=torch.int32,
+        csr_row_dtype=torch.int64,
         csr_col_dtype=torch.int32,
+        weight_dtype=torch.float32,
     )
     csr_row_ptr_tensor_cuda = csr_row_ptr_tensor.cuda()
     csr_col_ptr_tensor_cuda = csr_col_ptr_tensor.cuda()
diff --git a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_unweighted_sample_without_replacement.py b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_unweighted_sample_without_replacement.py
index 6719f5ea..b4439d42 100644
--- a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_unweighted_sample_without_replacement.py
+++ b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_unweighted_sample_without_replacement.py
@@ -380,7 +380,12 @@ def test_wholegraph_unweighted_sample(
     if col_id_dtype == wmb.WholeMemoryDataType.DtInt64:
         csr_col_dtype = torch.int64
     host_csr_row_ptr, host_csr_col_ptr, _ = gen_csr_graph(
-        graph_node_count, graph_edge_count, csr_col_dtype=csr_col_dtype
+        graph_node_count,
+        graph_edge_count,
+        graph_node_count=None,
+        csr_row_dtype=torch.int64,
+        csr_col_dtype=csr_col_dtype,
+        weight_dtype=torch.float32,
     )
     routine_func_partial = partial(
         routine_func,
diff --git a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_weighted_sample_without_replacement.py b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_weighted_sample_without_replacement.py
index a597f5d7..7e473f60 100644
--- a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_weighted_sample_without_replacement.py
+++ b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_weighted_sample_without_replacement.py
@@ -382,7 +382,12 @@ def test_wholegraph_weighted_sample(
     if col_id_dtype == 1:
         csr_col_dtype = torch.int64
     host_csr_row_ptr, host_csr_col_ptr, host_csr_weight_ptr = gen_csr_graph(
-        graph_node_count, graph_edge_count, csr_col_dtype=csr_col_dtype
+        graph_node_count,
+        graph_edge_count,
+        neighbor_node_count=None,
+        csr_row_dtype=torch.int64,
+        csr_col_dtype=csr_col_dtype,
+        weight_dtype=torch.float32,
     )
     routine_func_partial = partial(
         routine_func,

From 7bbf218fe375fa26f1844a6d4a9188b437d6b06c Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Tue, 10 Mar 2026 16:38:56 -0500
Subject: [PATCH 17/43] handle more unconditional 'torch' references (this time
 in type hints)

---
 ci/run_cugraph_pyg_pytests.sh                 |  2 +-
 ci/run_pylibwholegraph_pytests.sh             |  4 ++--
 ci/test_wheel_cugraph-pyg.sh                  | 14 +++++++------
 dependencies.yaml                             |  9 ---------
 .../cugraph_pyg/tensor/dist_matrix.py         | 20 +++++++++----------
 .../cugraph-pyg/cugraph_pyg/tests/conftest.py |  2 +-
 .../pylibwholegraph/torch/embedding.py        |  6 +++---
 .../pylibwholegraph/torch/tensor.py           |  2 +-
 .../pylibwholegraph/torch/wholegraph_env.py   |  2 +-
 9 files changed, 27 insertions(+), 34 deletions(-)

diff --git a/ci/run_cugraph_pyg_pytests.sh b/ci/run_cugraph_pyg_pytests.sh
index 2635d755..cb281a29 100755
--- a/ci/run_cugraph_pyg_pytests.sh
+++ b/ci/run_cugraph_pyg_pytests.sh
@@ -7,7 +7,7 @@ set -euo pipefail
 # Support invoking run_cugraph_pyg_pytests.sh outside the script directory
 cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/cugraph-pyg/cugraph_pyg
 
-pytest --cache-clear --benchmark-disable "$@" .
+pytest --cache-clear --benchmark-disable "$@"
 
 # Test examples (disabled due to lack of memory)
 #for e in "$(pwd)"/examples/*.py; do
diff --git a/ci/run_pylibwholegraph_pytests.sh b/ci/run_pylibwholegraph_pytests.sh
index d9c858e1..8bc88e3c 100755
--- a/ci/run_pylibwholegraph_pytests.sh
+++ b/ci/run_pylibwholegraph_pytests.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 set -euo pipefail
@@ -7,4 +7,4 @@ set -euo pipefail
 # Support invoking run_pytests.sh outside the script directory
 cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/pylibwholegraph/pylibwholegraph/
 
-pytest --cache-clear --forked --import-mode=append "$@" tests
+pytest --cache-clear --forked --import-mode=append "$@"
diff --git a/ci/test_wheel_cugraph-pyg.sh b/ci/test_wheel_cugraph-pyg.sh
index f85ec63d..6de536cd 100755
--- a/ci/test_wheel_cugraph-pyg.sh
+++ b/ci/test_wheel_cugraph-pyg.sh
@@ -14,7 +14,7 @@ RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
 LIBWHOLEGRAPH_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github cpp)
 PYLIBWHOLEGRAPH_WHEELHOUSE=$(rapids-download-from-github "$(rapids-package-name "wheel_python" pylibwholegraph --stable --cuda "$RAPIDS_CUDA_VERSION")")
 CUGRAPH_PYG_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-github python)
-# CUGRAPH_GNN_COMMIT=5a4064e7bf27a2548b32012375996f976d23e4e9
+# CUGRAPH_GNN_COMMIT=104b8bfe46011e52410319c19621126554e87068
 # CUGRAPH_PYG_WHEELHOUSE=$(
 #   RAPIDS_PY_WHEEL_NAME="cugraph-pyg_cu12" RAPIDS_PY_WHEEL_PURE="1" rapids-get-pr-artifact cugraph-gnn 425 python wheel "${CUGRAPH_GNN_COMMIT}"
 # )
@@ -61,11 +61,11 @@ rapids-pip-retry install \
   "${PIP_INSTALL_ARGS[@]}"
 
 # RAPIDS_DATASET_ROOT_DIR is used by test scripts
-# export RAPIDS_DATASET_ROOT_DIR="$(realpath datasets)"
-# mkdir -p "${RAPIDS_DATASET_ROOT_DIR}"
-# pushd "${RAPIDS_DATASET_ROOT_DIR}"
-# ./get_test_data.sh --test
-# popd
+export RAPIDS_DATASET_ROOT_DIR="$(realpath datasets)"
+mkdir -p "${RAPIDS_DATASET_ROOT_DIR}"
+pushd "${RAPIDS_DATASET_ROOT_DIR}"
+./get_test_data.sh --test
+popd
 
 # Enable legacy behavior of torch.load for examples relying on ogb
 export TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD=1
@@ -77,5 +77,7 @@ fi
 
 rapids-logger "pytest cugraph-pyg (no 'torch')"
 pip uninstall --yes 'torch'
+
+PYTHONPATH=/opt/work/python/cugraph-pyg/ \
 python -c "import cugraph_pyg; print(cugraph_pyg.__version__)"
 ./ci/run_cugraph_pyg_pytests.sh
diff --git a/dependencies.yaml b/dependencies.yaml
index 0c3023c0..abc150fd 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -385,15 +385,6 @@ dependencies:
           # Default to falling back to whatever 'pytorch' is pulled in via cugraph-pyg's dependencies.
           - matrix:
             packages:
-      - output_types: pyproject
-        matrices:
-          # avoid pulling in 'torch' in places like DLFW builds that prefer to install it other ways
-          - matrix:
-              no_pytorch: "true"
-            packages:
-          - matrix:
-            packages:
-              - &pytorch_pip torch>=2.9.0
       # wheels: handle GPU vs. CPU and version pinning together
       #
       # The 'pytorch.org' indices referenced in --extra-index-url below host CPU-only variants too,
diff --git a/python/cugraph-pyg/cugraph_pyg/tensor/dist_matrix.py b/python/cugraph-pyg/cugraph_pyg/tensor/dist_matrix.py
index ec331f3d..c9560ff4 100644
--- a/python/cugraph-pyg/cugraph_pyg/tensor/dist_matrix.py
+++ b/python/cugraph-pyg/cugraph_pyg/tensor/dist_matrix.py
@@ -18,14 +18,14 @@ def __init__(
         self,
         src: Optional[
             Union[
-                Tuple[torch.Tensor, torch.Tensor],
+                Tuple["torch.Tensor", "torch.Tensor"],
                 Tuple[DistTensor, DistTensor],
                 str,
                 List[str],
             ]
         ] = None,
         shape: Optional[Union[list, tuple]] = None,
-        dtype: Optional[torch.dtype] = None,
+        dtype: Optional["torch.dtype"] = None,
         device: Optional[Literal["cpu", "cuda"]] = "cpu",
         backend: Optional[Literal["nccl", "vmm"]] = "nccl",
         format: Optional[Literal["csc", "coo"]] = "coo",
@@ -82,8 +82,8 @@ def __init__(
 
     def __setitem__(
         self,
-        idx: Union[torch.Tensor, slice],
-        val: Union[torch.Tensor, tuple[torch.Tensor, torch.Tensor]],
+        idx: Union["torch.Tensor", slice],
+        val: Union["torch.Tensor", tuple["torch.Tensor", "torch.Tensor"]],
     ):
         if isinstance(idx, slice):
             size = self._col.shape[0]
@@ -106,7 +106,7 @@ def __setitem__(
             self._col[idx] = val[0]
             self._row[idx] = val[1]
 
-    def __getitem__(self, idx: "torch.Tensor") -> torch.Tensor:
+    def __getitem__(self, idx: "torch.Tensor") -> "torch.Tensor":
         if self._format != "coo":
             raise ValueError("Getting is currently only supported for COO format")
         if idx.dim() != 1:
@@ -114,11 +114,11 @@ def __getitem__(self, idx: "torch.Tensor") -> torch.Tensor:
 
         return torch.stack([self._col[idx], self._row[idx]])
 
-    def get_local_tensor(self) -> Tuple[torch.Tensor, torch.Tensor]:
+    def get_local_tensor(self) -> Tuple["torch.Tensor", "torch.Tensor"]:
         return (self._col.get_local_tensor(), self._row.get_local_tensor())
 
     @property
-    def local_col(self) -> torch.Tensor:
+    def local_col(self) -> "torch.Tensor":
         world_size = torch.distributed.get_world_size()
         rank = torch.distributed.get_rank()
 
@@ -134,7 +134,7 @@ def local_col(self) -> torch.Tensor:
         return self._col[ix]
 
     @property
-    def local_row(self) -> torch.Tensor:
+    def local_row(self) -> "torch.Tensor":
         world_size = torch.distributed.get_world_size()
         rank = torch.distributed.get_rank()
 
@@ -150,7 +150,7 @@ def local_row(self) -> torch.Tensor:
         return self._row[ix]
 
     @property
-    def local_coo(self) -> torch.Tensor:
+    def local_coo(self) -> "torch.Tensor":
         return torch.stack([self.local_col, self.local_row])
 
     @property
@@ -158,5 +158,5 @@ def shape(self) -> Tuple[int, int]:
         return (self._col.shape[0], self._row.shape[0])
 
     @property
-    def dtype(self) -> torch.dtype:
+    def dtype(self) -> "torch.dtype":
         return self._col.dtype
diff --git a/python/cugraph-pyg/cugraph_pyg/tests/conftest.py b/python/cugraph-pyg/cugraph_pyg/tests/conftest.py
index 594dc3b7..81864fbd 100644
--- a/python/cugraph-pyg/cugraph_pyg/tests/conftest.py
+++ b/python/cugraph-pyg/cugraph_pyg/tests/conftest.py
@@ -27,7 +27,7 @@
     gpubenchmark = pytest_benchmark.plugin.benchmark
 
 
-@pytest.fixture(scope="function")
+@pytest.fixture(scope="module")
 def torch():
     """Pass this to any test case that needs 'torch' to be installed"""
     return pytest.importorskip("torch")
diff --git a/python/pylibwholegraph/pylibwholegraph/torch/embedding.py b/python/pylibwholegraph/pylibwholegraph/torch/embedding.py
index 93bba90a..9dfdf600 100644
--- a/python/pylibwholegraph/pylibwholegraph/torch/embedding.py
+++ b/python/pylibwholegraph/pylibwholegraph/torch/embedding.py
@@ -218,7 +218,7 @@ def forward(
         dummy_input: "torch.Tensor",
         wm_embedding,
         is_training: bool = False,
-        force_dtype: Union[torch.dtype, None] = None,
+        force_dtype: Union["torch.dtype", None] = None,
     ):
         output_tensor = wm_embedding.gather(
             indice, is_training=is_training, force_dtype=force_dtype
@@ -279,7 +279,7 @@ def gather(
         indice: "torch.Tensor",
         *,
         is_training: bool = False,
-        force_dtype: Union[torch.dtype, None] = None,
+        force_dtype: Union["torch.dtype", None] = None,
     ):
         assert indice.dim() == 1
         embedding_dim = self.get_embedding_tensor().shape[1]
@@ -550,7 +550,7 @@ def __init__(self, wm_embedding: WholeMemoryEmbedding):
         self.embedding_gather_fn = EmbeddingLookupFn.apply
 
     def forward(
-        self, indice: "torch.Tensor", force_dtype: Union[torch.dtype, None] = None
+        self, indice: "torch.Tensor", force_dtype: Union["torch.dtype", None] = None
     ):
         return self.embedding_gather_fn(
             indice,
diff --git a/python/pylibwholegraph/pylibwholegraph/torch/tensor.py b/python/pylibwholegraph/pylibwholegraph/torch/tensor.py
index e2de562b..73710ec8 100644
--- a/python/pylibwholegraph/pylibwholegraph/torch/tensor.py
+++ b/python/pylibwholegraph/pylibwholegraph/torch/tensor.py
@@ -50,7 +50,7 @@ def get_comm(self):
         )
 
     def gather(
-        self, indice: "torch.Tensor", *, force_dtype: Union[torch.dtype, None] = None
+        self, indice: "torch.Tensor", *, force_dtype: Union["torch.dtype", None] = None
     ):
         assert indice.dim() == 1
         embedding_dim = self.shape[1] if self.dim() == 2 else 1
diff --git a/python/pylibwholegraph/pylibwholegraph/torch/wholegraph_env.py b/python/pylibwholegraph/pylibwholegraph/torch/wholegraph_env.py
index 72f0d8fd..f4981d06 100644
--- a/python/pylibwholegraph/pylibwholegraph/torch/wholegraph_env.py
+++ b/python/pylibwholegraph/pylibwholegraph/torch/wholegraph_env.py
@@ -156,7 +156,7 @@ def get_wholegraph_env_fns(use_default=True) -> int:
     return wholegraph_env_context.get_env_fns()
 
 
-def wrap_torch_tensor(t: Union[torch.Tensor, None]) -> wmb.WrappedLocalTensor:
+def wrap_torch_tensor(t: Union["torch.Tensor", None]) -> wmb.WrappedLocalTensor:
     py_desc = wmb.PyWholeMemoryTensorDescription()
     wm_t = wmb.WrappedLocalTensor()
     if t is None:

From 4cacebfc1058481f413b80ac4cf7e8ad84010be9 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Tue, 10 Mar 2026 16:41:23 -0500
Subject: [PATCH 18/43] revert

---
 ci/test_wheel_cugraph-pyg.sh | 1 -
 1 file changed, 1 deletion(-)

diff --git a/ci/test_wheel_cugraph-pyg.sh b/ci/test_wheel_cugraph-pyg.sh
index 6de536cd..b227b348 100755
--- a/ci/test_wheel_cugraph-pyg.sh
+++ b/ci/test_wheel_cugraph-pyg.sh
@@ -78,6 +78,5 @@ fi
 rapids-logger "pytest cugraph-pyg (no 'torch')"
 pip uninstall --yes 'torch'
 
-PYTHONPATH=/opt/work/python/cugraph-pyg/ \
 python -c "import cugraph_pyg; print(cugraph_pyg.__version__)"
 ./ci/run_cugraph_pyg_pytests.sh

From 36843b6c7b669e17242c80bb7587cee49dfd7487 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Wed, 11 Mar 2026 11:43:15 -0500
Subject: [PATCH 19/43] check in debugging code temporarily

---
 ci/test_wheel_cugraph-pyg.sh                  | 34 +++++++++----------
 .../pylibwholegraph/torch/embedding.py        |  2 ++
 test.sh                                       | 12 +++++++
 3 files changed, 31 insertions(+), 17 deletions(-)
 create mode 100644 test.sh

diff --git a/ci/test_wheel_cugraph-pyg.sh b/ci/test_wheel_cugraph-pyg.sh
index b227b348..df6f0032 100755
--- a/ci/test_wheel_cugraph-pyg.sh
+++ b/ci/test_wheel_cugraph-pyg.sh
@@ -11,19 +11,19 @@ package_name="cugraph-pyg"
 RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
 
 # Download the libwholegraph, pylibwholegraph, and cugraph-pyg built in the previous step
-LIBWHOLEGRAPH_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github cpp)
-PYLIBWHOLEGRAPH_WHEELHOUSE=$(rapids-download-from-github "$(rapids-package-name "wheel_python" pylibwholegraph --stable --cuda "$RAPIDS_CUDA_VERSION")")
-CUGRAPH_PYG_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-github python)
-# CUGRAPH_GNN_COMMIT=104b8bfe46011e52410319c19621126554e87068
-# CUGRAPH_PYG_WHEELHOUSE=$(
-#   RAPIDS_PY_WHEEL_NAME="cugraph-pyg_cu12" RAPIDS_PY_WHEEL_PURE="1" rapids-get-pr-artifact cugraph-gnn 425 python wheel "${CUGRAPH_GNN_COMMIT}"
-# )
-# LIBWHOLEGRAPH_WHEELHOUSE=$(
-#   RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-get-pr-artifact cugraph-gnn 425 cpp wheel "${CUGRAPH_GNN_COMMIT}"
-# )
-# PYLIBWHOLEGRAPH_WHEELHOUSE=$(
-#   rapids-get-pr-artifact cugraph-gnn 425 python wheel --pkg_name pylibwholegraph --stable "${CUGRAPH_GNN_COMMIT}"
-# )
+# LIBWHOLEGRAPH_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github cpp)
+# PYLIBWHOLEGRAPH_WHEELHOUSE=$(rapids-download-from-github "$(rapids-package-name "wheel_python" pylibwholegraph --stable --cuda "$RAPIDS_CUDA_VERSION")")
+# CUGRAPH_PYG_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-github python)
+CUGRAPH_GNN_COMMIT=4cacebfc1058481f413b80ac4cf7e8ad84010be9
+CUGRAPH_PYG_WHEELHOUSE=$(
+  RAPIDS_PY_WHEEL_NAME="cugraph-pyg_cu12" RAPIDS_PY_WHEEL_PURE="1" rapids-get-pr-artifact cugraph-gnn 425 python wheel "${CUGRAPH_GNN_COMMIT}"
+)
+LIBWHOLEGRAPH_WHEELHOUSE=$(
+  RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-get-pr-artifact cugraph-gnn 425 cpp wheel "${CUGRAPH_GNN_COMMIT}"
+)
+PYLIBWHOLEGRAPH_WHEELHOUSE=$(
+  rapids-get-pr-artifact cugraph-gnn 425 python wheel --pkg_name pylibwholegraph --stable "${CUGRAPH_GNN_COMMIT}"
+)
 
 # generate constraints (possibly pinning to oldest support versions of dependencies)
 rapids-generate-pip-constraints test_cugraph_pyg "${PIP_CONSTRAINT}"
@@ -62,10 +62,10 @@ rapids-pip-retry install \
 
 # RAPIDS_DATASET_ROOT_DIR is used by test scripts
 export RAPIDS_DATASET_ROOT_DIR="$(realpath datasets)"
-mkdir -p "${RAPIDS_DATASET_ROOT_DIR}"
-pushd "${RAPIDS_DATASET_ROOT_DIR}"
-./get_test_data.sh --test
-popd
+# mkdir -p "${RAPIDS_DATASET_ROOT_DIR}"
+# pushd "${RAPIDS_DATASET_ROOT_DIR}"
+# ./get_test_data.sh --test
+# popd
 
 # Enable legacy behavior of torch.load for examples relying on ogb
 export TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD=1
diff --git a/python/pylibwholegraph/pylibwholegraph/torch/embedding.py b/python/pylibwholegraph/pylibwholegraph/torch/embedding.py
index 9dfdf600..37c6de3c 100644
--- a/python/pylibwholegraph/pylibwholegraph/torch/embedding.py
+++ b/python/pylibwholegraph/pylibwholegraph/torch/embedding.py
@@ -539,6 +539,8 @@ def destroy_embedding(wm_embedding: WholeMemoryEmbedding):
     wm_embedding.wmb_embedding = None
 
 
+# TODO: all of these class overrides
+# AttributeError: module 'torch' has no attribute 'autograd'
 class WholeMemoryEmbeddingModule(torch.nn.Module):
     """
     torch.nn.Module wrapper of WholeMemoryEmbedding
diff --git a/test.sh b/test.sh
new file mode 100644
index 00000000..81eeb90a
--- /dev/null
+++ b/test.sh
@@ -0,0 +1,12 @@
+
+
+docker run \
+    --rm \
+    --gpus all \
+    --env GH_TOKEN=$(gh auth token) \
+    -v $(pwd):/opt/work \
+    -w /opt/work \
+    -it rapidsai/citestwheel:26.04-cuda12.9.1-ubuntu22.04-py3.11 \
+    bash
+
+ci/test_wheel_cugraph-pyg.sh

From 11ed00e9a3e2b7bca947e3c1f31781cc431f2ae0 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Wed, 11 Mar 2026 13:27:52 -0500
Subject: [PATCH 20/43] classes that inherit from 'torch' also need to handle
 the dependency being optional

---
 .pre-commit-config.yaml                       |   2 +-
 ci/test_wheel_cugraph-pyg.sh                  |  46 +++---
 ci/test_wheel_pylibwholegraph.sh              |  11 +-
 .../pylibwholegraph/torch/data_loader.py      |  25 ++-
 .../pylibwholegraph/torch/embedding.py        | 126 +++++++++------
 .../pylibwholegraph/torch/gnn_model.py        | 152 ++++++++++--------
 6 files changed, 215 insertions(+), 147 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index e16dc623..f2eecb04 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -19,7 +19,7 @@ repos:
     rev: v0.14.3
     hooks:
       - id: ruff-check
-        args: [--config, "pyproject.toml"]
+        args: [--fix, --config, "pyproject.toml"]
       - id: ruff-format
         args: [--config, "pyproject.toml"]
   - repo: https://github.com/asottile/yesqa
diff --git a/ci/test_wheel_cugraph-pyg.sh b/ci/test_wheel_cugraph-pyg.sh
index df6f0032..077223b1 100755
--- a/ci/test_wheel_cugraph-pyg.sh
+++ b/ci/test_wheel_cugraph-pyg.sh
@@ -11,19 +11,19 @@ package_name="cugraph-pyg"
 RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
 
 # Download the libwholegraph, pylibwholegraph, and cugraph-pyg built in the previous step
-# LIBWHOLEGRAPH_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github cpp)
-# PYLIBWHOLEGRAPH_WHEELHOUSE=$(rapids-download-from-github "$(rapids-package-name "wheel_python" pylibwholegraph --stable --cuda "$RAPIDS_CUDA_VERSION")")
-# CUGRAPH_PYG_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-github python)
-CUGRAPH_GNN_COMMIT=4cacebfc1058481f413b80ac4cf7e8ad84010be9
-CUGRAPH_PYG_WHEELHOUSE=$(
-  RAPIDS_PY_WHEEL_NAME="cugraph-pyg_cu12" RAPIDS_PY_WHEEL_PURE="1" rapids-get-pr-artifact cugraph-gnn 425 python wheel "${CUGRAPH_GNN_COMMIT}"
-)
-LIBWHOLEGRAPH_WHEELHOUSE=$(
-  RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-get-pr-artifact cugraph-gnn 425 cpp wheel "${CUGRAPH_GNN_COMMIT}"
-)
-PYLIBWHOLEGRAPH_WHEELHOUSE=$(
-  rapids-get-pr-artifact cugraph-gnn 425 python wheel --pkg_name pylibwholegraph --stable "${CUGRAPH_GNN_COMMIT}"
-)
+LIBWHOLEGRAPH_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github cpp)
+PYLIBWHOLEGRAPH_WHEELHOUSE=$(rapids-download-from-github "$(rapids-package-name "wheel_python" pylibwholegraph --stable --cuda "$RAPIDS_CUDA_VERSION")")
+CUGRAPH_PYG_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-github python)
+# CUGRAPH_GNN_COMMIT=4cacebfc1058481f413b80ac4cf7e8ad84010be9
+# CUGRAPH_PYG_WHEELHOUSE=$(
+#   RAPIDS_PY_WHEEL_NAME="cugraph-pyg_cu12" RAPIDS_PY_WHEEL_PURE="1" rapids-get-pr-artifact cugraph-gnn 425 python wheel "${CUGRAPH_GNN_COMMIT}"
+# )
+# LIBWHOLEGRAPH_WHEELHOUSE=$(
+#   RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-get-pr-artifact cugraph-gnn 425 cpp wheel "${CUGRAPH_GNN_COMMIT}"
+# )
+# PYLIBWHOLEGRAPH_WHEELHOUSE=$(
+#   rapids-get-pr-artifact cugraph-gnn 425 python wheel --pkg_name pylibwholegraph --stable "${CUGRAPH_GNN_COMMIT}"
+# )
 
 # generate constraints (possibly pinning to oldest support versions of dependencies)
 rapids-generate-pip-constraints test_cugraph_pyg "${PIP_CONSTRAINT}"
@@ -43,10 +43,10 @@ TORCH_WHEEL_DIR="$(mktemp -d)"
 
 # 'cugraph-pyg' is still expected to be importable
 # and testable in an environment where 'torch' isn't installed.
-torch_installed=true
+torch_downloaded=true
 if [ -z "$(ls -A ${TORCH_WHEEL_DIR} 2>/dev/null)" ]; then
   rapids-echo-stderr "No 'torch' wheels downloaded."
-  torch_installed=false
+  torch_downloaded=false
 else
   PIP_INSTALL_ARGS+=("${TORCH_WHEEL_DIR}"/torch-*.whl)
 fi
@@ -62,21 +62,25 @@ rapids-pip-retry install \
 
 # RAPIDS_DATASET_ROOT_DIR is used by test scripts
 export RAPIDS_DATASET_ROOT_DIR="$(realpath datasets)"
-# mkdir -p "${RAPIDS_DATASET_ROOT_DIR}"
-# pushd "${RAPIDS_DATASET_ROOT_DIR}"
-# ./get_test_data.sh --test
-# popd
+mkdir -p "${RAPIDS_DATASET_ROOT_DIR}"
+pushd "${RAPIDS_DATASET_ROOT_DIR}"
+./get_test_data.sh --test
+popd
 
 # Enable legacy behavior of torch.load for examples relying on ogb
 export TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD=1
 
-if [[ "${torch_installed}" == "true" ]]; then
+if [[ "${torch_downloaded}" == "true" ]]; then
+  # 'torch' is an optional dependency of 'cugraph-pyg'... confirm that it's actually
+  # installed here and that we've installed a package with CUDA support.
+  rapids-logger "Confirming that PyTorch is installed"
+  python -c "import torch; assert torch.cuda.is_available()"
+
   rapids-logger "pytest cugraph-pyg (single GPU, with 'torch')"
   ./ci/run_cugraph_pyg_pytests.sh
 fi
 
 rapids-logger "pytest cugraph-pyg (no 'torch')"
 pip uninstall --yes 'torch'
-
 python -c "import cugraph_pyg; print(cugraph_pyg.__version__)"
 ./ci/run_cugraph_pyg_pytests.sh
diff --git a/ci/test_wheel_pylibwholegraph.sh b/ci/test_wheel_pylibwholegraph.sh
index 6125f20d..ff243373 100755
--- a/ci/test_wheel_pylibwholegraph.sh
+++ b/ci/test_wheel_pylibwholegraph.sh
@@ -36,10 +36,10 @@ TORCH_WHEEL_DIR="$(mktemp -d)"
 
 # 'cugraph-pyg' is still expected to be importable
 # and testable in an environment where 'torch' isn't installed.
-torch_installed=true
+torch_downloaded=true
 if [ -z "$(ls -A ${TORCH_WHEEL_DIR} 2>/dev/null)" ]; then
   rapids-echo-stderr "No 'torch' wheels downloaded."
-  torch_installed=false
+  torch_downloaded=false
 else
   PIP_INSTALL_ARGS+=("${TORCH_WHEEL_DIR}"/torch-*.whl)
 fi
@@ -49,7 +49,12 @@ rapids-logger "Installing Packages"
 rapids-pip-retry install \
     "${PIP_INSTALL_ARGS[@]}"
 
-if [[ "${torch_installed}" == "true" ]]; then
+if [[ "${torch_downloaded}" == "true" ]]; then
+  # 'torch' is an optional dependency of 'cugraph-pyg'... confirm that it's actually
+  # installed here and that we've installed a package with CUDA support.
+  rapids-logger "Confirming that PyTorch is installed"
+  python -c "import torch; assert torch.cuda.is_available()"
+
   rapids-logger "pytest pylibwholegraph (with 'torch')"
   ./ci/run_pylibwholegraph_pytests.sh
 fi
diff --git a/python/pylibwholegraph/pylibwholegraph/torch/data_loader.py b/python/pylibwholegraph/pylibwholegraph/torch/data_loader.py
index b87801f9..ac54cde6 100644
--- a/python/pylibwholegraph/pylibwholegraph/torch/data_loader.py
+++ b/python/pylibwholegraph/pylibwholegraph/torch/data_loader.py
@@ -2,20 +2,29 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import numpy as np
-from pylibwholegraph.utils.imports import import_optional
+from pylibwholegraph.utils.imports import import_optional, MissingModule
 
 torch = import_optional("torch")
 
 
-class NodeClassificationDataset(torch.utils.data.Dataset):
-    def __init__(self, raw_dataset):
-        self.dataset = raw_dataset
+if not isinstance(torch, MissingModule):
 
-    def __getitem__(self, index):
-        return self.dataset[index]
+    class NodeClassificationDataset(torch.utils.data.Dataset):
+        def __init__(self, raw_dataset):
+            self.dataset = raw_dataset
 
-    def __len__(self):
-        return len(self.dataset)
+        def __getitem__(self, index):
+            return self.dataset[index]
+
+        def __len__(self):
+            return len(self.dataset)
+else:
+
+    class NodeClassificationDataset:
+        def __init__(self, raw_dataset):
+            raise ModuleNotFoundError(
+                "NodeClassificationDataset requires 'torch' to be installed."
+            )
 
 
 def create_node_classification_datasets(data_and_label: dict):
diff --git a/python/pylibwholegraph/pylibwholegraph/torch/embedding.py b/python/pylibwholegraph/pylibwholegraph/torch/embedding.py
index 37c6de3c..70b8e563 100644
--- a/python/pylibwholegraph/pylibwholegraph/torch/embedding.py
+++ b/python/pylibwholegraph/pylibwholegraph/torch/embedding.py
@@ -2,7 +2,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import pylibwholegraph.binding.wholememory_binding as wmb
-from pylibwholegraph.utils.imports import import_optional
+from pylibwholegraph.utils.imports import MissingModule, import_optional
 from .utils import torch_dtype_to_wholememory_dtype, get_file_size
 from .utils import str_to_wmb_wholememory_location, str_to_wmb_wholememory_memory_type
 from .utils import (
@@ -210,31 +210,60 @@ def create_builtin_cache_policy(
     )
 
 
-class EmbeddingLookupFn(torch.autograd.Function):
-    @staticmethod
-    def forward(
-        ctx,
-        indice: "torch.Tensor",
-        dummy_input: "torch.Tensor",
-        wm_embedding,
-        is_training: bool = False,
-        force_dtype: Union["torch.dtype", None] = None,
-    ):
-        output_tensor = wm_embedding.gather(
-            indice, is_training=is_training, force_dtype=force_dtype
-        )
-        if is_training and wm_embedding.need_grad():
-            ctx.save_for_backward(indice, output_tensor, dummy_input)
-            ctx.wm_embedding = wm_embedding
-        return output_tensor
+if not isinstance(torch, MissingModule):
+
+    class EmbeddingLookupFn(torch.autograd.Function):
+        @staticmethod
+        def forward(
+            ctx,
+            indice: "torch.Tensor",
+            dummy_input: "torch.Tensor",
+            wm_embedding,
+            is_training: bool = False,
+            force_dtype: Union["torch.dtype", None] = None,
+        ):
+            output_tensor = wm_embedding.gather(
+                indice, is_training=is_training, force_dtype=force_dtype
+            )
+            if is_training and wm_embedding.need_grad():
+                ctx.save_for_backward(indice, output_tensor, dummy_input)
+                ctx.wm_embedding = wm_embedding
+            return output_tensor
+
+        @staticmethod
+        def backward(ctx, grad_outputs: "torch.Tensor"):
+            indice, output_tensor, dummy_input = ctx.saved_tensors
+            wm_embedding = ctx.wm_embedding
+            wm_embedding.add_gradients(indice, grad_outputs)
+            ctx.wm_embedding = None
+            return None, torch.zeros_like(dummy_input), None, None, None
+
+else:
+
+    class EmbeddingLookupFn:
+        def __init__(self, *args, **kwargs):
+            raise ModuleNotFoundError(
+                "EmbeddingLookupFn requires 'torch' to be installed."
+            )
 
-    @staticmethod
-    def backward(ctx, grad_outputs: "torch.Tensor"):
-        indice, output_tensor, dummy_input = ctx.saved_tensors
-        wm_embedding = ctx.wm_embedding
-        wm_embedding.add_gradients(indice, grad_outputs)
-        ctx.wm_embedding = None
-        return None, torch.zeros_like(dummy_input), None, None, None
+        @staticmethod
+        def forward(
+            ctx,
+            indice: "torch.Tensor",
+            dummy_input: "torch.Tensor",
+            wm_embedding,
+            is_training: bool = False,
+            force_dtype: Union["torch.dtype", None] = None,
+        ):
+            raise ModuleNotFoundError(
+                "EmbeddingLookupFn requires 'torch' to be installed."
+            )
+
+        @staticmethod
+        def backward(ctx, grad_outputs: "torch.Tensor"):
+            raise ModuleNotFoundError(
+                "EmbeddingLookupFn requires 'torch' to be installed."
+            )
 
 
 class WholeMemoryEmbedding(object):
@@ -539,28 +568,35 @@ def destroy_embedding(wm_embedding: WholeMemoryEmbedding):
     wm_embedding.wmb_embedding = None
 
 
-# TODO: all of these class overrides
-# AttributeError: module 'torch' has no attribute 'autograd'
-class WholeMemoryEmbeddingModule(torch.nn.Module):
-    """
-    torch.nn.Module wrapper of WholeMemoryEmbedding
-    """
+if not isinstance(torch, MissingModule):
 
-    def __init__(self, wm_embedding: WholeMemoryEmbedding):
-        super().__init__()
-        self.wm_embedding = wm_embedding
-        self.embedding_gather_fn = EmbeddingLookupFn.apply
+    class WholeMemoryEmbeddingModule(torch.nn.Module):
+        """
+        torch.nn.Module wrapper of WholeMemoryEmbedding
+        """
 
-    def forward(
-        self, indice: "torch.Tensor", force_dtype: Union["torch.dtype", None] = None
-    ):
-        return self.embedding_gather_fn(
-            indice,
-            self.wm_embedding.dummy_input,
-            self.wm_embedding,
-            self.training,
-            force_dtype,
-        )
+        def __init__(self, wm_embedding: WholeMemoryEmbedding):
+            super().__init__()
+            self.wm_embedding = wm_embedding
+            self.embedding_gather_fn = EmbeddingLookupFn.apply
+
+        def forward(
+            self, indice: "torch.Tensor", force_dtype: Union["torch.dtype", None] = None
+        ):
+            return self.embedding_gather_fn(
+                indice,
+                self.wm_embedding.dummy_input,
+                self.wm_embedding,
+                self.training,
+                force_dtype,
+            )
+else:
+
+    class WholeMemoryEmbeddingModule:
+        def __init__(self, wm_embedding: WholeMemoryEmbedding):
+            raise ModuleNotFoundError(
+                "WholeMemoryEmbeddingModule requires 'torch' to be installed."
+            )
 
 
 def create_wholememory_optimizer(
diff --git a/python/pylibwholegraph/pylibwholegraph/torch/gnn_model.py b/python/pylibwholegraph/pylibwholegraph/torch/gnn_model.py
index 888e07af..89af8118 100644
--- a/python/pylibwholegraph/pylibwholegraph/torch/gnn_model.py
+++ b/python/pylibwholegraph/pylibwholegraph/torch/gnn_model.py
@@ -1,7 +1,7 @@
 # SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
-from pylibwholegraph.utils.imports import import_optional
+from pylibwholegraph.utils.imports import import_optional, MissingModule
 from .graph_structure import GraphStructure
 from .embedding import WholeMemoryEmbedding, WholeMemoryEmbeddingModule
 from .common_options import parse_max_neighbors
@@ -120,76 +120,90 @@ def layer_forward(layer, x_feat, x_target_feat, sub_graph):
     return x_feat
 
 
-class HomoGNNModel(torch.nn.Module):
-    def __init__(
-        self,
-        graph_structure: GraphStructure,
-        node_embedding: WholeMemoryEmbedding,
-        args,
-    ):
-        super().__init__()
-        hidden_feat_dim = args.hiddensize
-        self.graph_structure = graph_structure
-        self.node_embedding = node_embedding
-        self.num_layer = args.layernum
-        self.hidden_feat_dim = args.hiddensize
-        num_head = args.heads if (args.model == "gat") else 1
-        assert hidden_feat_dim % num_head == 0
-        in_feat_dim = self.node_embedding.shape[1]
-        self.gnn_layers = create_gnn_layers(
-            in_feat_dim,
-            hidden_feat_dim,
-            args.classnum,
-            args.layernum,
-            num_head,
-            args.model,
-        )
-        self.mean_output = True if args.model == "gat" else False
-        self.add_self_loop = True if args.model == "gat" else False
-        self.gather_fn = WholeMemoryEmbeddingModule(self.node_embedding)
-        self.dropout = args.dropout
-        self.max_neighbors = parse_max_neighbors(args.layernum, args.neighbors)
-        self.max_inference_neighbors = parse_max_neighbors(
-            args.layernum, args.inferencesample
-        )
+if not isinstance(torch, MissingModule):
+
+    class HomoGNNModel(torch.nn.Module):
+        def __init__(
+            self,
+            graph_structure: GraphStructure,
+            node_embedding: WholeMemoryEmbedding,
+            args,
+        ):
+            super().__init__()
+            hidden_feat_dim = args.hiddensize
+            self.graph_structure = graph_structure
+            self.node_embedding = node_embedding
+            self.num_layer = args.layernum
+            self.hidden_feat_dim = args.hiddensize
+            num_head = args.heads if (args.model == "gat") else 1
+            assert hidden_feat_dim % num_head == 0
+            in_feat_dim = self.node_embedding.shape[1]
+            self.gnn_layers = create_gnn_layers(
+                in_feat_dim,
+                hidden_feat_dim,
+                args.classnum,
+                args.layernum,
+                num_head,
+                args.model,
+            )
+            self.mean_output = True if args.model == "gat" else False
+            self.add_self_loop = True if args.model == "gat" else False
+            self.gather_fn = WholeMemoryEmbeddingModule(self.node_embedding)
+            self.dropout = args.dropout
+            self.max_neighbors = parse_max_neighbors(args.layernum, args.neighbors)
+            self.max_inference_neighbors = parse_max_neighbors(
+                args.layernum, args.inferencesample
+            )
 
-    def forward(self, ids):
-        global framework_name
-        max_neighbors = (
-            self.max_neighbors if self.training else self.max_inference_neighbors
-        )
-        ids = ids.to(self.graph_structure.csr_col_ind.dtype).cuda()
-        (
-            target_gids,
-            edge_indice,
-            csr_row_ptrs,
-            csr_col_inds,
-        ) = self.graph_structure.multilayer_sample_without_replacement(
-            ids, max_neighbors
-        )
-        x_feat = self.gather_fn(target_gids[0], force_dtype=torch.float32)
-        for i in range(self.num_layer):
-            x_target_feat = x_feat[: target_gids[i + 1].numel()]
-            sub_graph = create_sub_graph(
-                target_gids[i],
-                target_gids[i + 1],
-                edge_indice[i],
-                csr_row_ptrs[i],
-                csr_col_inds[i],
-                max_neighbors[self.num_layer - 1 - i],
-                self.add_self_loop,
+        def forward(self, ids):
+            global framework_name
+            max_neighbors = (
+                self.max_neighbors if self.training else self.max_inference_neighbors
             )
-            x_feat = layer_forward(
-                self.gnn_layers[i],
-                x_feat,
-                x_target_feat,
-                sub_graph,
+            ids = ids.to(self.graph_structure.csr_col_ind.dtype).cuda()
+            (
+                target_gids,
+                edge_indice,
+                csr_row_ptrs,
+                csr_col_inds,
+            ) = self.graph_structure.multilayer_sample_without_replacement(
+                ids, max_neighbors
             )
-            if i != self.num_layer - 1:
-                x_feat = torch.nn.functional.relu(x_feat)
-                x_feat = torch.nn.functional.dropout(
-                    x_feat, self.dropout, training=self.training
+            x_feat = self.gather_fn(target_gids[0], force_dtype=torch.float32)
+            for i in range(self.num_layer):
+                x_target_feat = x_feat[: target_gids[i + 1].numel()]
+                sub_graph = create_sub_graph(
+                    target_gids[i],
+                    target_gids[i + 1],
+                    edge_indice[i],
+                    csr_row_ptrs[i],
+                    csr_col_inds[i],
+                    max_neighbors[self.num_layer - 1 - i],
+                    self.add_self_loop,
+                )
+                x_feat = layer_forward(
+                    self.gnn_layers[i],
+                    x_feat,
+                    x_target_feat,
+                    sub_graph,
                 )
+                if i != self.num_layer - 1:
+                    x_feat = torch.nn.functional.relu(x_feat)
+                    x_feat = torch.nn.functional.dropout(
+                        x_feat, self.dropout, training=self.training
+                    )
 
-        out_feat = x_feat
-        return out_feat
+            out_feat = x_feat
+            return out_feat
+else:
+
+    class HomoGNNModel:
+        def __init__(
+            self,
+            graph_structure: GraphStructure,
+            node_embedding: WholeMemoryEmbedding,
+            args,
+        ):
+            raise ModuleNotFoundError(
+                "EmbeddingLookupFn requires 'torch' to be installed."
+            )

From b1cb02c500a2794bc24701fbe4bb2647254d39d2 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Wed, 11 Mar 2026 13:28:47 -0500
Subject: [PATCH 21/43] remove debugging code

---
 test.sh | 14 --------------
 1 file changed, 14 deletions(-)
 delete mode 100644 test.sh

diff --git a/test.sh b/test.sh
deleted file mode 100644
index 30c82c22..00000000
--- a/test.sh
+++ /dev/null
@@ -1,14 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-
-
-docker run \
-    --rm \
-    --gpus all \
-    --env GH_TOKEN=$(gh auth token) \
-    -v $(pwd):/opt/work \
-    -w /opt/work \
-    -it rapidsai/citestwheel:26.04-cuda12.9.1-ubuntu22.04-py3.11 \
-    bash
-
-ci/test_wheel_cugraph-pyg.sh

From ca6e314da11247edf7ec903d339f8ca82f85a54b Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Wed, 11 Mar 2026 14:18:25 -0500
Subject: [PATCH 22/43] fix typo with pytest.importorskip()

---
 .github/workflows/pr.yaml                                | 9 ---------
 .../wholegraph_torch/ops/test_graph_append_unique.py     | 2 +-
 2 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 864c5abe..c92839f5 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -17,7 +17,6 @@ jobs:
       - devcontainer
       - checks
       - conda-cpp-build
-      - conda-cpp-tests
       - conda-python-build
       - conda-python-build-noarch
       - conda-python-tests
@@ -180,14 +179,6 @@ jobs:
       build_type: pull-request
       node_type: cpu8
       script: ci/build_cpp.sh
-  conda-cpp-tests:
-    needs: [conda-cpp-build, changed-files]
-    secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@main
-    if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_cpp
-    with:
-      build_type: pull-request
-      script: ci/test_cpp.sh
   conda-python-build:
     needs: conda-cpp-build
     secrets: inherit
diff --git a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_append_unique.py b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_append_unique.py
index 77c2dfdb..804b8a6a 100644
--- a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_append_unique.py
+++ b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_append_unique.py
@@ -4,7 +4,7 @@
 import pytest
 import pylibwholegraph.torch.graph_ops as wg_ops
 
-torch = pytest.importorski("torch")
+torch = pytest.importorskip("torch")
 
 
 def host_neighbor_raw_to_unique(unique_node_tensor, neighbor_node_tensor):

From 2f3d4f8ee3fd004121736191a15c523549bbe610 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Wed, 11 Mar 2026 14:58:18 -0500
Subject: [PATCH 23/43] more fixes

---
 ci/run_cugraph_pyg_pytests.sh     | 10 ++++++++--
 ci/run_pylibwholegraph_pytests.sh |  4 ++--
 ci/test_wheel_cugraph-pyg.sh      |  9 ++++++---
 ci/test_wheel_pylibwholegraph.sh  |  6 ++++--
 4 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/ci/run_cugraph_pyg_pytests.sh b/ci/run_cugraph_pyg_pytests.sh
index cb281a29..4431a013 100755
--- a/ci/run_cugraph_pyg_pytests.sh
+++ b/ci/run_cugraph_pyg_pytests.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 set -euo pipefail
@@ -7,7 +7,13 @@ set -euo pipefail
 # Support invoking run_cugraph_pyg_pytests.sh outside the script directory
 cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/cugraph-pyg/cugraph_pyg
 
-pytest --cache-clear --benchmark-disable "$@"
+pytest --cache-clear --benchmark-disable "$@" .
+
+# Used to skip certain examples in CI due to memory limitations
+export CI=true
+
+# Enable legacy behavior of torch.load for examples relying on ogb
+export TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD=1
 
 # Test examples (disabled due to lack of memory)
 #for e in "$(pwd)"/examples/*.py; do
diff --git a/ci/run_pylibwholegraph_pytests.sh b/ci/run_pylibwholegraph_pytests.sh
index 8bc88e3c..d9c858e1 100755
--- a/ci/run_pylibwholegraph_pytests.sh
+++ b/ci/run_pylibwholegraph_pytests.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 set -euo pipefail
@@ -7,4 +7,4 @@ set -euo pipefail
 # Support invoking run_pytests.sh outside the script directory
 cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/pylibwholegraph/pylibwholegraph/
 
-pytest --cache-clear --forked --import-mode=append "$@"
+pytest --cache-clear --forked --import-mode=append "$@" tests
diff --git a/ci/test_wheel_cugraph-pyg.sh b/ci/test_wheel_cugraph-pyg.sh
index 077223b1..6500736c 100755
--- a/ci/test_wheel_cugraph-pyg.sh
+++ b/ci/test_wheel_cugraph-pyg.sh
@@ -14,7 +14,8 @@ RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
 LIBWHOLEGRAPH_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github cpp)
 PYLIBWHOLEGRAPH_WHEELHOUSE=$(rapids-download-from-github "$(rapids-package-name "wheel_python" pylibwholegraph --stable --cuda "$RAPIDS_CUDA_VERSION")")
 CUGRAPH_PYG_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-github python)
-# CUGRAPH_GNN_COMMIT=4cacebfc1058481f413b80ac4cf7e8ad84010be9
+
+# CUGRAPH_GNN_COMMIT=b1cb02c500a2794bc24701fbe4bb2647254d39d2
 # CUGRAPH_PYG_WHEELHOUSE=$(
 #   RAPIDS_PY_WHEEL_NAME="cugraph-pyg_cu12" RAPIDS_PY_WHEEL_PURE="1" rapids-get-pr-artifact cugraph-gnn 425 python wheel "${CUGRAPH_GNN_COMMIT}"
 # )
@@ -80,7 +81,9 @@ if [[ "${torch_downloaded}" == "true" ]]; then
   ./ci/run_cugraph_pyg_pytests.sh
 fi
 
-rapids-logger "pytest cugraph-pyg (no 'torch')"
+rapids-logger "import cugraph-pyg (no 'torch')"
 pip uninstall --yes 'torch'
-python -c "import cugraph_pyg; print(cugraph_pyg.__version__)"
+python -c "import cugraph_pyg; print(f'cugraph-pyg version: {cugraph_pyg.__version__}')"
+
+rapids-logger "pytest cugraph-pyg (no 'torch')"
 ./ci/run_cugraph_pyg_pytests.sh
diff --git a/ci/test_wheel_pylibwholegraph.sh b/ci/test_wheel_pylibwholegraph.sh
index ff243373..4fd34a6f 100755
--- a/ci/test_wheel_pylibwholegraph.sh
+++ b/ci/test_wheel_pylibwholegraph.sh
@@ -59,7 +59,9 @@ if [[ "${torch_downloaded}" == "true" ]]; then
   ./ci/run_pylibwholegraph_pytests.sh
 fi
 
-rapids-logger "pytest pylibwholegraph (no 'torch')"
+rapids-logger "import cugraph-pyg (no 'torch')"
 pip uninstall --yes 'torch'
-python -c "import pylibwholegraph; print(pylibwholegraph.__version__)"
+python -c "import cugraph_pyg; print(f'pylibwholegraph version: {pylibwholegraph.__version__}')"
+
+rapids-logger "pytest pylibwholegraph (no 'torch')"
 ./ci/run_pylibwholegraph_pytests.sh

From 22fb7494b031603e7dfdd71717392991aad234f6 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Wed, 11 Mar 2026 17:01:02 -0500
Subject: [PATCH 24/43] fix more imports

---
 ci/test_wheel_pylibwholegraph.sh              |  4 +--
 .../cugraph_pyg/sampler/sampler_utils.py      |  4 +--
 ...h_unweighted_sample_without_replacement.py |  2 +-
 .../pylibwholegraph/torch/data_loader.py      | 20 ++++++++-----
 .../pylibwholegraph/torch/embedding.py        | 29 ++++++++++++-------
 .../pylibwholegraph/torch/gnn_model.py        | 15 ++++++----
 6 files changed, 45 insertions(+), 29 deletions(-)

diff --git a/ci/test_wheel_pylibwholegraph.sh b/ci/test_wheel_pylibwholegraph.sh
index 4fd34a6f..9fb99102 100755
--- a/ci/test_wheel_pylibwholegraph.sh
+++ b/ci/test_wheel_pylibwholegraph.sh
@@ -59,9 +59,9 @@ if [[ "${torch_downloaded}" == "true" ]]; then
   ./ci/run_pylibwholegraph_pytests.sh
 fi
 
-rapids-logger "import cugraph-pyg (no 'torch')"
+rapids-logger "import pylibwholegraph (no 'torch')"
 pip uninstall --yes 'torch'
-python -c "import cugraph_pyg; print(f'pylibwholegraph version: {pylibwholegraph.__version__}')"
+python -c "import pylibwholegraph print(f'pylibwholegraph version: {pylibwholegraph.__version__}')"
 
 rapids-logger "pytest pylibwholegraph (no 'torch')"
 ./ci/run_pylibwholegraph_pytests.sh
diff --git a/python/cugraph-pyg/cugraph_pyg/sampler/sampler_utils.py b/python/cugraph-pyg/cugraph_pyg/sampler/sampler_utils.py
index f8bb1f6e..53644afa 100644
--- a/python/cugraph-pyg/cugraph_pyg/sampler/sampler_utils.py
+++ b/python/cugraph-pyg/cugraph_pyg/sampler/sampler_utils.py
@@ -12,10 +12,8 @@
 import cupy
 import pylibcugraph
 
-torch_geometric = import_optional("torch_geometric")
-
 torch = import_optional("torch")
-HeteroSamplerOutput = torch_geometric.sampler.base.HeteroSamplerOutput
+torch_geometric = import_optional("torch_geometric")
 
 
 def verify_metadata(metadata: Optional[Dict[str, Union[str, Tuple[str, str, str]]]]):
diff --git a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_unweighted_sample_without_replacement.py b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_unweighted_sample_without_replacement.py
index b4439d42..7101ef06 100644
--- a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_unweighted_sample_without_replacement.py
+++ b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_unweighted_sample_without_replacement.py
@@ -382,7 +382,7 @@ def test_wholegraph_unweighted_sample(
     host_csr_row_ptr, host_csr_col_ptr, _ = gen_csr_graph(
         graph_node_count,
         graph_edge_count,
-        graph_node_count=None,
+        neighbor_node_count=None,
         csr_row_dtype=torch.int64,
         csr_col_dtype=csr_col_dtype,
         weight_dtype=torch.float32,
diff --git a/python/pylibwholegraph/pylibwholegraph/torch/data_loader.py b/python/pylibwholegraph/pylibwholegraph/torch/data_loader.py
index ac54cde6..041c2d77 100644
--- a/python/pylibwholegraph/pylibwholegraph/torch/data_loader.py
+++ b/python/pylibwholegraph/pylibwholegraph/torch/data_loader.py
@@ -6,10 +6,16 @@
 
 torch = import_optional("torch")
 
+# NOTE: using more specific 'import_optional()' than just 'torch' for import-time checks
+#       (e.g. those needed for defining base classes) can be helpful because 'torch' can appear
+#       to be available even after a 'pip uninstall torch' if any files are left behind in
+#       'site-packages/torch'.
+torch_utils_data = import_optional("torch.utils.data")
 
-if not isinstance(torch, MissingModule):
 
-    class NodeClassificationDataset(torch.utils.data.Dataset):
+if not isinstance(torch_utils_data, MissingModule):
+
+    class NodeClassificationDataset(torch_utils_data.Dataset):
         def __init__(self, raw_dataset):
             self.dataset = raw_dataset
 
@@ -23,7 +29,7 @@ def __len__(self):
     class NodeClassificationDataset:
         def __init__(self, raw_dataset):
             raise ModuleNotFoundError(
-                "NodeClassificationDataset requires 'torch' to be installed."
+                "NodeClassificationDataset requires 'torch.utils.data'. Install 'torch'."
             )
 
 
@@ -65,14 +71,14 @@ def get_train_dataloader(
     num_replicas: int = 1,
     num_workers: int = 0,
 ):
-    train_sampler = torch.utils.data.distributed.DistributedSampler(
+    train_sampler = torch_utils_data.distributed.DistributedSampler(
         train_dataset,
         num_replicas=num_replicas,
         rank=replica_id,
         shuffle=True,
         drop_last=True,
     )
-    train_dataloader = torch.utils.data.DataLoader(
+    train_dataloader = torch_utils_data.DataLoader(
         train_dataset,
         batch_size=batch_size,
         num_workers=num_workers,
@@ -86,10 +92,10 @@ def get_train_dataloader(
 def get_valid_test_dataloader(
     valid_test_dataset, batch_size: int, *, num_workers: int = 0
 ):
-    valid_test_sampler = torch.utils.data.distributed.DistributedSampler(
+    valid_test_sampler = torch_utils_data.distributed.DistributedSampler(
         valid_test_dataset, num_replicas=1, rank=0, shuffle=False, drop_last=False
     )
-    valid_test_dataloader = torch.utils.data.DataLoader(
+    valid_test_dataloader = torch_utils_data.DataLoader(
         valid_test_dataset,
         batch_size=batch_size,
         num_workers=num_workers,
diff --git a/python/pylibwholegraph/pylibwholegraph/torch/embedding.py b/python/pylibwholegraph/pylibwholegraph/torch/embedding.py
index 70b8e563..8ac7ea13 100644
--- a/python/pylibwholegraph/pylibwholegraph/torch/embedding.py
+++ b/python/pylibwholegraph/pylibwholegraph/torch/embedding.py
@@ -19,9 +19,15 @@
 from .tensor import WholeMemoryTensor
 from .wholegraph_env import wrap_torch_tensor, get_wholegraph_env_fns, get_stream
 
-
 torch = import_optional("torch")
 
+# NOTE: using more specific 'import_optional()' than just 'torch' for import-time checks
+#       (e.g. those needed for defining base classes) can be helpful because 'torch' can appear
+#       to be available even after a 'pip uninstall torch' if any files are left behind in
+#       'site-packages/torch'.
+torch_autograd = import_optional("torch.autograd")
+torch_nn = import_optional("torch.nn")
+
 
 class WholeMemoryOptimizer(object):
     """
@@ -210,9 +216,10 @@ def create_builtin_cache_policy(
     )
 
 
-if not isinstance(torch, MissingModule):
+# NOTE: the 'hasattr()' is necessary because sometimes 'pip uninstall'
+if not isinstance(torch_autograd, MissingModule):
 
-    class EmbeddingLookupFn(torch.autograd.Function):
+    class EmbeddingLookupFn(torch_autograd.Function):
         @staticmethod
         def forward(
             ctx,
@@ -243,7 +250,7 @@ def backward(ctx, grad_outputs: "torch.Tensor"):
     class EmbeddingLookupFn:
         def __init__(self, *args, **kwargs):
             raise ModuleNotFoundError(
-                "EmbeddingLookupFn requires 'torch' to be installed."
+                "EmbeddingLookupFn requires 'torch.autograd'. Install 'torch'."
             )
 
         @staticmethod
@@ -256,13 +263,13 @@ def forward(
             force_dtype: Union["torch.dtype", None] = None,
         ):
             raise ModuleNotFoundError(
-                "EmbeddingLookupFn requires 'torch' to be installed."
+                "EmbeddingLookupFn requires 'torch.autograd'. Install 'torch'."
             )
 
         @staticmethod
         def backward(ctx, grad_outputs: "torch.Tensor"):
             raise ModuleNotFoundError(
-                "EmbeddingLookupFn requires 'torch' to be installed."
+                "EmbeddingLookupFn requires 'torch.autograd'. Install 'torch'."
             )
 
 
@@ -285,7 +292,7 @@ def __init__(
 
         self.wmb_optimizer = None
 
-        self.dummy_input = torch.nn.Parameter(torch.zeros(1), requires_grad=False)
+        self.dummy_input = torch_nn.Parameter(torch.zeros(1), requires_grad=False)
         self.need_apply = False
         self.sparse_indices = []
         self.sparse_grads = []
@@ -484,7 +491,7 @@ def create_embedding(
             local_tensor,
             local_offset,
         ) = wm_embedding.get_embedding_tensor().get_local_tensor()
-        torch.nn.init.xavier_uniform_(local_tensor)
+        torch_nn.init.xavier_uniform_(local_tensor)
     comm.barrier()
     return wm_embedding
 
@@ -568,9 +575,9 @@ def destroy_embedding(wm_embedding: WholeMemoryEmbedding):
     wm_embedding.wmb_embedding = None
 
 
-if not isinstance(torch, MissingModule):
+if not isinstance(torch_nn, MissingModule):
 
-    class WholeMemoryEmbeddingModule(torch.nn.Module):
+    class WholeMemoryEmbeddingModule(torch_nn.Module):
         """
         torch.nn.Module wrapper of WholeMemoryEmbedding
         """
@@ -595,7 +602,7 @@ def forward(
     class WholeMemoryEmbeddingModule:
         def __init__(self, wm_embedding: WholeMemoryEmbedding):
             raise ModuleNotFoundError(
-                "WholeMemoryEmbeddingModule requires 'torch' to be installed."
+                "WholeMemoryEmbeddingModule requires 'torch.nn.Module'. Install 'torch'."
             )
 
 
diff --git a/python/pylibwholegraph/pylibwholegraph/torch/gnn_model.py b/python/pylibwholegraph/pylibwholegraph/torch/gnn_model.py
index 89af8118..67fcb6d6 100644
--- a/python/pylibwholegraph/pylibwholegraph/torch/gnn_model.py
+++ b/python/pylibwholegraph/pylibwholegraph/torch/gnn_model.py
@@ -8,6 +8,11 @@
 
 torch = import_optional("torch")
 
+# NOTE: using more specific 'import_optional()' than just 'torch' for import-time checks
+#       (e.g. those needed for defining base classes) can be helpful because 'torch' can appear
+#       to be available even after a 'pip uninstall torch' if any files are left behind in
+#       'site-packages/torch'.
+torch_nn = import_optional("torch.nn")
 
 framework_name = None
 
@@ -29,7 +34,7 @@ def set_framework(framework: str):
 def create_gnn_layers(
     in_feat_dim, hidden_feat_dim, class_count, num_layer, num_head, model_type
 ):
-    gnn_layers = torch.nn.ModuleList()
+    gnn_layers = torch_nn.ModuleList()
     global framework_name
     for i in range(num_layer):
         layer_output_dim = (
@@ -120,9 +125,9 @@ def layer_forward(layer, x_feat, x_target_feat, sub_graph):
     return x_feat
 
 
-if not isinstance(torch, MissingModule):
+if not isinstance(torch_nn, MissingModule):
 
-    class HomoGNNModel(torch.nn.Module):
+    class HomoGNNModel(torch_nn.Module):
         def __init__(
             self,
             graph_structure: GraphStructure,
@@ -188,8 +193,8 @@ def forward(self, ids):
                     sub_graph,
                 )
                 if i != self.num_layer - 1:
-                    x_feat = torch.nn.functional.relu(x_feat)
-                    x_feat = torch.nn.functional.dropout(
+                    x_feat = torch_nn.functional.relu(x_feat)
+                    x_feat = torch_nn.functional.dropout(
                         x_feat, self.dropout, training=self.training
                     )
 

From 79b78541eafc5484b0a695a2852411454b68e364 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Wed, 11 Mar 2026 17:11:49 -0500
Subject: [PATCH 25/43] pytest params need to be lazy too

---
 .../tests/data/test_feature_store.py          | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/python/cugraph-pyg/cugraph_pyg/tests/data/test_feature_store.py b/python/cugraph-pyg/cugraph_pyg/tests/data/test_feature_store.py
index f64bee55..d14db14c 100644
--- a/python/cugraph-pyg/cugraph_pyg/tests/data/test_feature_store.py
+++ b/python/cugraph-pyg/cugraph_pyg/tests/data/test_feature_store.py
@@ -53,18 +53,19 @@ def test_feature_store_basic_api(single_pytorch_worker):
 @pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available")
 @pytest.mark.sg
 @pytest.mark.parametrize(
-    "dtype",
+    "dtype_name",
     [
-        torch.float32,
-        torch.float16,
-        torch.int8,
-        torch.int16,
-        torch.int32,
-        torch.int64,
-        torch.float64,
+        "float32",
+        "float16",
+        "int8",
+        "int16",
+        "int32",
+        "int64",
+        "float64",
     ],
 )
-def test_feature_store_basic_api_types(single_pytorch_worker, dtype):
+def test_feature_store_basic_api_types(single_pytorch_worker, dtype_name, torch):
+    dtype = getattr(torch, dtype_name)
     features = torch.arange(0, 2000)
     features = features.reshape((features.numel() // 100, 100)).to(dtype)
 

From 2633d4fd82c07033d61bd03a8ce475500030179e Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Wed, 11 Mar 2026 20:30:52 -0500
Subject: [PATCH 26/43] pre-commit

---
 python/cugraph-pyg/cugraph_pyg/tests/data/test_feature_store.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cugraph-pyg/cugraph_pyg/tests/data/test_feature_store.py b/python/cugraph-pyg/cugraph_pyg/tests/data/test_feature_store.py
index d14db14c..fc29c0a8 100644
--- a/python/cugraph-pyg/cugraph_pyg/tests/data/test_feature_store.py
+++ b/python/cugraph-pyg/cugraph_pyg/tests/data/test_feature_store.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 import pytest

From 603979696017f350e171a5bf4462010ed42d29e4 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Wed, 11 Mar 2026 21:50:19 -0500
Subject: [PATCH 27/43] more testing fixes

---
 ci/test_wheel_pylibwholegraph.sh                            | 2 +-
 .../wholegraph_torch/ops/test_graph_add_csr_self_loop.py    | 2 +-
 .../tests/wholegraph_torch/ops/test_graph_append_unique.py  | 6 ++----
 ...test_wholegraph_unweighted_sample_without_replacement.py | 6 ++----
 4 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/ci/test_wheel_pylibwholegraph.sh b/ci/test_wheel_pylibwholegraph.sh
index 9fb99102..8a6aef48 100755
--- a/ci/test_wheel_pylibwholegraph.sh
+++ b/ci/test_wheel_pylibwholegraph.sh
@@ -61,7 +61,7 @@ fi
 
 rapids-logger "import pylibwholegraph (no 'torch')"
 pip uninstall --yes 'torch'
-python -c "import pylibwholegraph print(f'pylibwholegraph version: {pylibwholegraph.__version__}')"
+python -c "import pylibwholegraph; print(f'pylibwholegraph version: {pylibwholegraph.__version__}')"
 
 rapids-logger "pytest pylibwholegraph (no 'torch')"
 ./ci/run_pylibwholegraph_pytests.sh
diff --git a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_add_csr_self_loop.py b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_add_csr_self_loop.py
index 94e9b2c9..91e3c388 100644
--- a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_add_csr_self_loop.py
+++ b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_add_csr_self_loop.py
@@ -36,7 +36,7 @@ def routine_func(**kwargs):
     csr_row_ptr_tensor, csr_col_ptr_tensor, _ = gen_csr_graph(
         target_node_count,
         edge_num,
-        neighbor_node_count,
+        neighbor_node_count=neighbor_node_count,
         csr_row_dtype=torch.int64,
         csr_col_dtype=torch.int32,
         weight_dtype=torch.float32,
diff --git a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_append_unique.py b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_append_unique.py
index 804b8a6a..e94c1a9a 100644
--- a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_append_unique.py
+++ b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_append_unique.py
@@ -4,8 +4,6 @@
 import pytest
 import pylibwholegraph.torch.graph_ops as wg_ops
 
-torch = pytest.importorskip("torch")
-
 
 def host_neighbor_raw_to_unique(unique_node_tensor, neighbor_node_tensor):
     torch = pytest.importorskip("torch")
@@ -76,7 +74,7 @@ def routine_func(**kwargs):
 
 @pytest.mark.parametrize("target_node_count", [10, 113])
 @pytest.mark.parametrize("neighbor_node_count", [104, 1987])
-@pytest.mark.parametrize("target_node_dtype", [torch.int32, torch.int64])
+@pytest.mark.parametrize("target_node_dtype", ["int32", "int64"])
 @pytest.mark.parametrize("need_neighbor_raw_to_unique", [True, False])
 def test_append_unique(
     target_node_count,
@@ -90,6 +88,6 @@ def test_append_unique(
     routine_func(
         target_node_count=target_node_count,
         neighbor_node_count=neighbor_node_count,
-        target_node_dtype=target_node_dtype,
+        target_node_dtype=getattr(torch, target_node_dtype),
         need_neighbor_raw_to_unique=need_neighbor_raw_to_unique,
     )
diff --git a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_unweighted_sample_without_replacement.py b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_unweighted_sample_without_replacement.py
index 7101ef06..75f1cd9a 100644
--- a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_unweighted_sample_without_replacement.py
+++ b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_unweighted_sample_without_replacement.py
@@ -18,8 +18,6 @@
 import pylibwholegraph.torch.wholegraph_ops as wg_ops
 import random
 
-torch = pytest.importorskip("torch")
-
 
 def unweighte_sample_without_replacement_base(random_values, M, N):
     torch = pytest.importorskip("torch")
@@ -355,7 +353,7 @@ def routine_func(world_rank: int, world_size: int, **kwargs):
 @pytest.mark.parametrize("graph_edge_count", [1043])
 @pytest.mark.parametrize("max_sample_count", [11, -1])
 @pytest.mark.parametrize("center_node_count", [13])
-@pytest.mark.parametrize("center_node_dtype", [torch.int32, torch.int64])
+@pytest.mark.parametrize("center_node_dtype", ["int32", "int64"])
 @pytest.mark.parametrize("col_id_dtype", [0, 1])
 @pytest.mark.parametrize("wholememory_location", ([0, 1]))
 @pytest.mark.parametrize("wholememory_type", ([0, 1, 2]))
@@ -395,7 +393,7 @@ def test_wholegraph_unweighted_sample(
         graph_edge_count=graph_edge_count,
         max_sample_count=max_sample_count,
         center_node_count=center_node_count,
-        center_node_dtype=center_node_dtype,
+        center_node_dtype=getattr(torch, center_node_dtype),
         col_id_dtype=col_id_dtype,
         wholememory_location=wholememory_location,
         wholememory_type=wholememory_type,

From 005a89089ff846fa7d1fc24353173d0a675d55e0 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Thu, 12 Mar 2026 11:46:32 -0500
Subject: [PATCH 28/43] work around nvJitLink symbol issues, fix a few more
 test skips, other misc. fixes

---
 ci/download-torch-wheels.sh                   |  2 +-
 ci/run_cugraph_pyg_pytests.sh                 |  4 +-
 ci/run_pylibwholegraph_pytests.sh             |  4 +-
 ci/test_wheel_cugraph-pyg.sh                  | 52 ++++++++++++-------
 ci/test_wheel_pylibwholegraph.sh              | 25 ++++++++-
 ci/uninstall-torch-wheels.sh                  | 16 ++++++
 .../test_wholememory_binding.py               |  2 +-
 .../test_wholememory_tensor.py                |  4 +-
 .../ops/test_graph_add_csr_self_loop.py       |  2 +-
 .../ops/test_wholegraph_gather_scatter.py     |  2 +-
 10 files changed, 84 insertions(+), 29 deletions(-)
 create mode 100755 ci/uninstall-torch-wheels.sh

diff --git a/ci/download-torch-wheels.sh b/ci/download-torch-wheels.sh
index 21e84051..24c68bf7 100755
--- a/ci/download-torch-wheels.sh
+++ b/ci/download-torch-wheels.sh
@@ -18,7 +18,7 @@ set -e -u -o pipefail
 TORCH_WHEEL_DIR="${1}"
 
 # skip download attempt on CUDA versions where we know there isn't a 'torch' CUDA wheel.
-CUDA_MAJOR=$(echo "${RAPIDS_CUDA_VERSION}" | cut -d'.' -f1)
+CUDA_MAJOR="${RAPIDS_CUDA_VERSION%%.*}"
 CUDA_MINOR=$(echo "${RAPIDS_CUDA_VERSION}" | cut -d'.' -f2)
 if \
     { [ "${CUDA_MAJOR}" -eq 12 ] && [ "${CUDA_MINOR}" -lt 9 ]; } \
diff --git a/ci/run_cugraph_pyg_pytests.sh b/ci/run_cugraph_pyg_pytests.sh
index 4431a013..da255e71 100755
--- a/ci/run_cugraph_pyg_pytests.sh
+++ b/ci/run_cugraph_pyg_pytests.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 set -euo pipefail
@@ -7,7 +7,7 @@ set -euo pipefail
 # Support invoking run_cugraph_pyg_pytests.sh outside the script directory
 cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/cugraph-pyg/cugraph_pyg
 
-pytest --cache-clear --benchmark-disable "$@" .
+pytest -rs --cache-clear --benchmark-disable "$@" .
 
 # Used to skip certain examples in CI due to memory limitations
 export CI=true
diff --git a/ci/run_pylibwholegraph_pytests.sh b/ci/run_pylibwholegraph_pytests.sh
index d9c858e1..805698d0 100755
--- a/ci/run_pylibwholegraph_pytests.sh
+++ b/ci/run_pylibwholegraph_pytests.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 set -euo pipefail
@@ -7,4 +7,4 @@ set -euo pipefail
 # Support invoking run_pytests.sh outside the script directory
 cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/pylibwholegraph/pylibwholegraph/
 
-pytest --cache-clear --forked --import-mode=append "$@" tests
+pytest -rs --cache-clear --forked --import-mode=append "$@" tests
diff --git a/ci/test_wheel_cugraph-pyg.sh b/ci/test_wheel_cugraph-pyg.sh
index 6500736c..33fbf4b0 100755
--- a/ci/test_wheel_cugraph-pyg.sh
+++ b/ci/test_wheel_cugraph-pyg.sh
@@ -11,20 +11,20 @@ package_name="cugraph-pyg"
 RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
 
 # Download the libwholegraph, pylibwholegraph, and cugraph-pyg built in the previous step
-LIBWHOLEGRAPH_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github cpp)
-PYLIBWHOLEGRAPH_WHEELHOUSE=$(rapids-download-from-github "$(rapids-package-name "wheel_python" pylibwholegraph --stable --cuda "$RAPIDS_CUDA_VERSION")")
-CUGRAPH_PYG_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-github python)
-
-# CUGRAPH_GNN_COMMIT=b1cb02c500a2794bc24701fbe4bb2647254d39d2
-# CUGRAPH_PYG_WHEELHOUSE=$(
-#   RAPIDS_PY_WHEEL_NAME="cugraph-pyg_cu12" RAPIDS_PY_WHEEL_PURE="1" rapids-get-pr-artifact cugraph-gnn 425 python wheel "${CUGRAPH_GNN_COMMIT}"
-# )
-# LIBWHOLEGRAPH_WHEELHOUSE=$(
-#   RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-get-pr-artifact cugraph-gnn 425 cpp wheel "${CUGRAPH_GNN_COMMIT}"
-# )
-# PYLIBWHOLEGRAPH_WHEELHOUSE=$(
-#   rapids-get-pr-artifact cugraph-gnn 425 python wheel --pkg_name pylibwholegraph --stable "${CUGRAPH_GNN_COMMIT}"
-# )
+# LIBWHOLEGRAPH_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github cpp)
+# PYLIBWHOLEGRAPH_WHEELHOUSE=$(rapids-download-from-github "$(rapids-package-name "wheel_python" pylibwholegraph --stable --cuda "$RAPIDS_CUDA_VERSION")")
+# CUGRAPH_PYG_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-github python)
+
+CUGRAPH_GNN_COMMIT=603979696017f350e171a5bf4462010ed42d29e4
+CUGRAPH_PYG_WHEELHOUSE=$(
+  RAPIDS_PY_WHEEL_NAME="cugraph-pyg_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-get-pr-artifact cugraph-gnn 425 python wheel "${CUGRAPH_GNN_COMMIT}"
+)
+LIBWHOLEGRAPH_WHEELHOUSE=$(
+  RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-get-pr-artifact cugraph-gnn 425 cpp wheel "${CUGRAPH_GNN_COMMIT}"
+)
+PYLIBWHOLEGRAPH_WHEELHOUSE=$(
+  rapids-get-pr-artifact cugraph-gnn 425 python wheel --pkg_name pylibwholegraph --stable "${CUGRAPH_GNN_COMMIT}"
+)
 
 # generate constraints (possibly pinning to oldest support versions of dependencies)
 rapids-generate-pip-constraints test_cugraph_pyg "${PIP_CONSTRAINT}"
@@ -44,6 +44,7 @@ TORCH_WHEEL_DIR="$(mktemp -d)"
 
 # 'cugraph-pyg' is still expected to be importable
 # and testable in an environment where 'torch' isn't installed.
+CUDA_MAJOR="${RAPIDS_CUDA_VERSION%%.*}"
 torch_downloaded=true
 if [ -z "$(ls -A ${TORCH_WHEEL_DIR} 2>/dev/null)" ]; then
   rapids-echo-stderr "No 'torch' wheels downloaded."
@@ -64,14 +65,28 @@ rapids-pip-retry install \
 # RAPIDS_DATASET_ROOT_DIR is used by test scripts
 export RAPIDS_DATASET_ROOT_DIR="$(realpath datasets)"
 mkdir -p "${RAPIDS_DATASET_ROOT_DIR}"
-pushd "${RAPIDS_DATASET_ROOT_DIR}"
-./get_test_data.sh --test
-popd
+# pushd "${RAPIDS_DATASET_ROOT_DIR}"
+# ./get_test_data.sh --test
+# popd
 
 # Enable legacy behavior of torch.load for examples relying on ogb
 export TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD=1
 
 if [[ "${torch_downloaded}" == "true" ]]; then
+  # TODO: remove this when RAPIDS wheels and 'torch' CUDA wheels have compatible package requirements
+  #
+  #    * https://github.com/rapidsai/cugraph/issues/5443
+  #    * https://github.com/rapidsai/build-planning/issues/257
+  #    * https://github.com/rapidsai/build-planning/issues/255
+  #
+  CUDA_MAJOR="${RAPIDS_CUDA_VERSION%%.*}"
+  CUDA_MINOR=$(echo "${RAPIDS_CUDA_VERSION}" | cut -d'.' -f2)
+  if [[ "${CUDA_MAJOR}" == "13" ]]; then
+    pip install \
+      --upgrade \
+      "nvidia-nvjitlink>=${CUDA_MAJOR}.${CUDA_MINOR}"
+  fi
+
   # 'torch' is an optional dependency of 'cugraph-pyg'... confirm that it's actually
   # installed here and that we've installed a package with CUDA support.
   rapids-logger "Confirming that PyTorch is installed"
@@ -82,7 +97,8 @@ if [[ "${torch_downloaded}" == "true" ]]; then
 fi
 
 rapids-logger "import cugraph-pyg (no 'torch')"
-pip uninstall --yes 'torch'
+./ci/uninstall-torch-wheels.sh
+
 python -c "import cugraph_pyg; print(f'cugraph-pyg version: {cugraph_pyg.__version__}')"
 
 rapids-logger "pytest cugraph-pyg (no 'torch')"
diff --git a/ci/test_wheel_pylibwholegraph.sh b/ci/test_wheel_pylibwholegraph.sh
index 8a6aef48..d586f028 100755
--- a/ci/test_wheel_pylibwholegraph.sh
+++ b/ci/test_wheel_pylibwholegraph.sh
@@ -16,6 +16,14 @@ RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
 LIBWHOLEGRAPH_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github cpp)
 PYLIBWHOLEGRAPH_WHEELHOUSE=$(rapids-download-from-github "$(rapids-package-name "wheel_python" pylibwholegraph --stable --cuda "$RAPIDS_CUDA_VERSION")")
 
+# CUGRAPH_GNN_COMMIT=603979696017f350e171a5bf4462010ed42d29e4
+# LIBWHOLEGRAPH_WHEELHOUSE=$(
+#   RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-get-pr-artifact cugraph-gnn 425 cpp wheel "${CUGRAPH_GNN_COMMIT}"
+# )
+# PYLIBWHOLEGRAPH_WHEELHOUSE=$(
+#   rapids-get-pr-artifact cugraph-gnn 425 python wheel --pkg_name pylibwholegraph --stable "${CUGRAPH_GNN_COMMIT}"
+# )
+
 RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"}
 RAPIDS_COVERAGE_DIR=${RAPIDS_COVERAGE_DIR:-"${PWD}/coverage-results"}
 mkdir -p "${RAPIDS_TESTS_DIR}" "${RAPIDS_COVERAGE_DIR}"
@@ -49,7 +57,21 @@ rapids-logger "Installing Packages"
 rapids-pip-retry install \
     "${PIP_INSTALL_ARGS[@]}"
 
+
 if [[ "${torch_downloaded}" == "true" ]]; then
+  # TODO: remove this when RAPIDS wheels and 'torch' CUDA wheels have compatible package requirements
+  #
+  #    * https://github.com/rapidsai/build-planning/issues/257
+  #    * https://github.com/rapidsai/build-planning/issues/255
+  #
+  CUDA_MAJOR="${RAPIDS_CUDA_VERSION%%.*}"
+  CUDA_MINOR=$(echo "${RAPIDS_CUDA_VERSION}" | cut -d'.' -f2)
+  if [[ "${CUDA_MAJOR}" == "13" ]]; then
+    pip install \
+      --upgrade \
+      "nvidia-nvjitlink>=${CUDA_MAJOR}.${CUDA_MINOR}"
+  fi
+
   # 'torch' is an optional dependency of 'cugraph-pyg'... confirm that it's actually
   # installed here and that we've installed a package with CUDA support.
   rapids-logger "Confirming that PyTorch is installed"
@@ -60,7 +82,8 @@ if [[ "${torch_downloaded}" == "true" ]]; then
 fi
 
 rapids-logger "import pylibwholegraph (no 'torch')"
-pip uninstall --yes 'torch'
+./ci/uninstall-torch-wheels.sh
+
 python -c "import pylibwholegraph; print(f'pylibwholegraph version: {pylibwholegraph.__version__}')"
 
 rapids-logger "pytest pylibwholegraph (no 'torch')"
diff --git a/ci/uninstall-torch-wheels.sh b/ci/uninstall-torch-wheels.sh
new file mode 100755
index 00000000..3590bdc0
--- /dev/null
+++ b/ci/uninstall-torch-wheels.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+set -euo pipefail
+
+pip uninstall --yes 'torch'
+
+# 'pytest' leaves behind some pycache files in site-packages/torch that make 'import torch'
+# seem to "work" even though there's not really a package there, leading to errors like
+# "module 'torch' has no attribute 'distributed'"
+#
+# For the sake of testing, just fully delete 'torch' from site-packages to simulate an environment
+# where it was never installed.
+SITE_PACKAGES=$(python -c "import site; print(site.getsitepackages()[0])")
+rm -rf "${SITE_PACKAGES}/torch"
diff --git a/python/pylibwholegraph/pylibwholegraph/tests/pylibwholegraph/test_wholememory_binding.py b/python/pylibwholegraph/pylibwholegraph/tests/pylibwholegraph/test_wholememory_binding.py
index c80afd9c..366d03e3 100644
--- a/python/pylibwholegraph/pylibwholegraph/tests/pylibwholegraph/test_wholememory_binding.py
+++ b/python/pylibwholegraph/pylibwholegraph/tests/pylibwholegraph/test_wholememory_binding.py
@@ -105,7 +105,7 @@ def routine_func(world_rank: int, world_size: int):
     wmb.finalize()
 
 
-def test_dlpack():
+def test_dlpack(torch):
     gpu_count = wmb.fork_get_gpu_count()
     assert gpu_count > 0
     multiprocess_run(gpu_count, routine_func)
diff --git a/python/pylibwholegraph/pylibwholegraph/tests/pylibwholegraph/test_wholememory_tensor.py b/python/pylibwholegraph/pylibwholegraph/tests/pylibwholegraph/test_wholememory_tensor.py
index 648f7dc8..0e53c209 100644
--- a/python/pylibwholegraph/pylibwholegraph/tests/pylibwholegraph/test_wholememory_tensor.py
+++ b/python/pylibwholegraph/pylibwholegraph/tests/pylibwholegraph/test_wholememory_tensor.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2019-2024, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 import pylibwholegraph.binding.wholememory_binding as wmb
@@ -107,7 +107,7 @@ def routine_func(world_rank: int, world_size: int):
     wmb.finalize()
 
 
-def test_wholememory_tensor():
+def test_wholememory_tensor(torch):
     gpu_count = wmb.fork_get_gpu_count()
     assert gpu_count > 0
     multiprocess_run(gpu_count, routine_func)
diff --git a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_add_csr_self_loop.py b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_add_csr_self_loop.py
index 91e3c388..07fb409a 100644
--- a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_add_csr_self_loop.py
+++ b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_graph_add_csr_self_loop.py
@@ -37,7 +37,7 @@ def routine_func(**kwargs):
         target_node_count,
         edge_num,
         neighbor_node_count=neighbor_node_count,
-        csr_row_dtype=torch.int64,
+        csr_row_dtype=torch.int32,
         csr_col_dtype=torch.int32,
         weight_dtype=torch.float32,
     )
diff --git a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_gather_scatter.py b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_gather_scatter.py
index cd1af84f..0395d2a6 100644
--- a/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_gather_scatter.py
+++ b/python/pylibwholegraph/pylibwholegraph/tests/wholegraph_torch/ops/test_wholegraph_gather_scatter.py
@@ -174,7 +174,7 @@ def routine_func(world_rank: int, world_size: int):
     wmb.finalize()
 
 
-def test_wholegraph_gather_scatter():
+def test_wholegraph_gather_scatter(torch):
     gpu_count = wmb.fork_get_gpu_count()
     assert gpu_count > 0
     multiprocess_run(gpu_count, routine_func)

From 22ded28c9126072aa1d79512212e8090e111fb82 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Thu, 12 Mar 2026 12:57:44 -0500
Subject: [PATCH 29/43] revert temporary testing stuff

---
 .github/workflows/pr.yaml        | 11 ++++++++++-
 ci/test_wheel_cugraph-pyg.sh     | 23 ++++++-----------------
 ci/test_wheel_pylibwholegraph.sh |  9 +--------
 3 files changed, 17 insertions(+), 26 deletions(-)

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index c92839f5..127f4c2a 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -17,6 +17,7 @@ jobs:
       - devcontainer
       - checks
       - conda-cpp-build
+      - conda-cpp-tests
       - conda-python-build
       - conda-python-build-noarch
       - conda-python-tests
@@ -179,6 +180,14 @@ jobs:
       build_type: pull-request
       node_type: cpu8
       script: ci/build_cpp.sh
+  conda-cpp-tests:
+    needs: [conda-cpp-build, changed-files]
+    secrets: inherit
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@main
+    if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_cpp
+    with:
+      build_type: pull-request
+      script: ci/test_cpp.sh
   conda-python-build:
     needs: conda-cpp-build
     secrets: inherit
@@ -215,7 +224,7 @@ jobs:
     with:
       arch: "amd64"
       build_type: pull-request
-      container_image: "rapidsai/ci-conda:26.04-latest"
+      container_image: "rapidsai/ci-conda:26.06-latest"
       script: "ci/build_docs.sh"
   wheel-build-libwholegraph:
     needs: checks
diff --git a/ci/test_wheel_cugraph-pyg.sh b/ci/test_wheel_cugraph-pyg.sh
index 33fbf4b0..dc0d799b 100755
--- a/ci/test_wheel_cugraph-pyg.sh
+++ b/ci/test_wheel_cugraph-pyg.sh
@@ -11,20 +11,9 @@ package_name="cugraph-pyg"
 RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
 
 # Download the libwholegraph, pylibwholegraph, and cugraph-pyg built in the previous step
-# LIBWHOLEGRAPH_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github cpp)
-# PYLIBWHOLEGRAPH_WHEELHOUSE=$(rapids-download-from-github "$(rapids-package-name "wheel_python" pylibwholegraph --stable --cuda "$RAPIDS_CUDA_VERSION")")
-# CUGRAPH_PYG_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-github python)
-
-CUGRAPH_GNN_COMMIT=603979696017f350e171a5bf4462010ed42d29e4
-CUGRAPH_PYG_WHEELHOUSE=$(
-  RAPIDS_PY_WHEEL_NAME="cugraph-pyg_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-get-pr-artifact cugraph-gnn 425 python wheel "${CUGRAPH_GNN_COMMIT}"
-)
-LIBWHOLEGRAPH_WHEELHOUSE=$(
-  RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-get-pr-artifact cugraph-gnn 425 cpp wheel "${CUGRAPH_GNN_COMMIT}"
-)
-PYLIBWHOLEGRAPH_WHEELHOUSE=$(
-  rapids-get-pr-artifact cugraph-gnn 425 python wheel --pkg_name pylibwholegraph --stable "${CUGRAPH_GNN_COMMIT}"
-)
+LIBWHOLEGRAPH_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github cpp)
+PYLIBWHOLEGRAPH_WHEELHOUSE=$(rapids-download-from-github "$(rapids-package-name "wheel_python" pylibwholegraph --stable --cuda "$RAPIDS_CUDA_VERSION")")
+CUGRAPH_PYG_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-github python)
 
 # generate constraints (possibly pinning to oldest support versions of dependencies)
 rapids-generate-pip-constraints test_cugraph_pyg "${PIP_CONSTRAINT}"
@@ -65,9 +54,9 @@ rapids-pip-retry install \
 # RAPIDS_DATASET_ROOT_DIR is used by test scripts
 export RAPIDS_DATASET_ROOT_DIR="$(realpath datasets)"
 mkdir -p "${RAPIDS_DATASET_ROOT_DIR}"
-# pushd "${RAPIDS_DATASET_ROOT_DIR}"
-# ./get_test_data.sh --test
-# popd
+pushd "${RAPIDS_DATASET_ROOT_DIR}"
+./get_test_data.sh --test
+popd
 
 # Enable legacy behavior of torch.load for examples relying on ogb
 export TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD=1
diff --git a/ci/test_wheel_pylibwholegraph.sh b/ci/test_wheel_pylibwholegraph.sh
index d586f028..1086d439 100755
--- a/ci/test_wheel_pylibwholegraph.sh
+++ b/ci/test_wheel_pylibwholegraph.sh
@@ -16,14 +16,6 @@ RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
 LIBWHOLEGRAPH_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github cpp)
 PYLIBWHOLEGRAPH_WHEELHOUSE=$(rapids-download-from-github "$(rapids-package-name "wheel_python" pylibwholegraph --stable --cuda "$RAPIDS_CUDA_VERSION")")
 
-# CUGRAPH_GNN_COMMIT=603979696017f350e171a5bf4462010ed42d29e4
-# LIBWHOLEGRAPH_WHEELHOUSE=$(
-#   RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-get-pr-artifact cugraph-gnn 425 cpp wheel "${CUGRAPH_GNN_COMMIT}"
-# )
-# PYLIBWHOLEGRAPH_WHEELHOUSE=$(
-#   rapids-get-pr-artifact cugraph-gnn 425 python wheel --pkg_name pylibwholegraph --stable "${CUGRAPH_GNN_COMMIT}"
-# )
-
 RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"}
 RAPIDS_COVERAGE_DIR=${RAPIDS_COVERAGE_DIR:-"${PWD}/coverage-results"}
 mkdir -p "${RAPIDS_TESTS_DIR}" "${RAPIDS_COVERAGE_DIR}"
@@ -61,6 +53,7 @@ rapids-pip-retry install \
 if [[ "${torch_downloaded}" == "true" ]]; then
   # TODO: remove this when RAPIDS wheels and 'torch' CUDA wheels have compatible package requirements
   #
+  #    * https://github.com/rapidsai/cugraph/issues/5443
   #    * https://github.com/rapidsai/build-planning/issues/257
   #    * https://github.com/rapidsai/build-planning/issues/255
   #

From bbe4c972e2e157b0b3f24cb09b75f043a175c50d Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Thu, 12 Mar 2026 13:44:38 -0500
Subject: [PATCH 30/43] remove comment

---
 python/pylibwholegraph/pylibwholegraph/torch/embedding.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/python/pylibwholegraph/pylibwholegraph/torch/embedding.py b/python/pylibwholegraph/pylibwholegraph/torch/embedding.py
index 8ac7ea13..b89ebe93 100644
--- a/python/pylibwholegraph/pylibwholegraph/torch/embedding.py
+++ b/python/pylibwholegraph/pylibwholegraph/torch/embedding.py
@@ -216,7 +216,6 @@ def create_builtin_cache_policy(
     )
 
 
-# NOTE: the 'hasattr()' is necessary because sometimes 'pip uninstall'
 if not isinstance(torch_autograd, MissingModule):
 
     class EmbeddingLookupFn(torch_autograd.Function):

From 21920891edd35143b59cfa44a03813b0100080af Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Thu, 12 Mar 2026 20:41:02 -0500
Subject: [PATCH 31/43] Apply suggestion from @jameslamb

---
 .github/workflows/pr.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index db6d1f82..8f76fc2e 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -224,7 +224,7 @@ jobs:
     with:
       arch: "amd64"
       build_type: pull-request
-      container_image: "rapidsai/ci-conda:26.06-latest"
+      container_image: "rapidsai/ci-conda:26.04-latest"
       script: "ci/build_docs.sh"
   wheel-build-libwholegraph:
     needs: checks

From b827cc22ac0fa864da45139a0f3af17e3b8c70ea Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Thu, 12 Mar 2026 21:19:12 -0500
Subject: [PATCH 32/43] fix copy-paste mistakes

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
---
 ci/test_wheel_pylibwholegraph.sh                          | 4 ++--
 python/pylibwholegraph/pylibwholegraph/torch/gnn_model.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/ci/test_wheel_pylibwholegraph.sh b/ci/test_wheel_pylibwholegraph.sh
index 1086d439..ec31f656 100755
--- a/ci/test_wheel_pylibwholegraph.sh
+++ b/ci/test_wheel_pylibwholegraph.sh
@@ -34,7 +34,7 @@ PIP_INSTALL_ARGS=(
 TORCH_WHEEL_DIR="$(mktemp -d)"
 ./ci/download-torch-wheels.sh "${TORCH_WHEEL_DIR}"
 
-# 'cugraph-pyg' is still expected to be importable
+# 'pylibwholegraph' is still expected to be importable
 # and testable in an environment where 'torch' isn't installed.
 torch_downloaded=true
 if [ -z "$(ls -A ${TORCH_WHEEL_DIR} 2>/dev/null)" ]; then
@@ -65,7 +65,7 @@ if [[ "${torch_downloaded}" == "true" ]]; then
       "nvidia-nvjitlink>=${CUDA_MAJOR}.${CUDA_MINOR}"
   fi
 
-  # 'torch' is an optional dependency of 'cugraph-pyg'... confirm that it's actually
+  # 'torch' is an optional dependency of 'pylibwholegraph'... confirm that it's actually
   # installed here and that we've installed a package with CUDA support.
   rapids-logger "Confirming that PyTorch is installed"
   python -c "import torch; assert torch.cuda.is_available()"
diff --git a/python/pylibwholegraph/pylibwholegraph/torch/gnn_model.py b/python/pylibwholegraph/pylibwholegraph/torch/gnn_model.py
index 67fcb6d6..a0422b89 100644
--- a/python/pylibwholegraph/pylibwholegraph/torch/gnn_model.py
+++ b/python/pylibwholegraph/pylibwholegraph/torch/gnn_model.py
@@ -210,5 +210,5 @@ def __init__(
             args,
         ):
             raise ModuleNotFoundError(
-                "EmbeddingLookupFn requires 'torch' to be installed."
+                "HomoGNNModel requires 'torch' to be installed."
             )

From 6a958e6221123d845132a9ad4997fa77d8e3502f Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Thu, 12 Mar 2026 21:21:54 -0500
Subject: [PATCH 33/43] standardize dependencies.yaml filters

---
 ci/download-torch-wheels.sh                               | 2 +-
 dependencies.yaml                                         | 8 ++++----
 python/pylibwholegraph/pylibwholegraph/torch/gnn_model.py | 4 +---
 3 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/ci/download-torch-wheels.sh b/ci/download-torch-wheels.sh
index 24c68bf7..82b22787 100755
--- a/ci/download-torch-wheels.sh
+++ b/ci/download-torch-wheels.sh
@@ -39,7 +39,7 @@ fi
 rapids-dependency-file-generator \
     --output requirements \
     --file-key "torch_only" \
-    --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};dependencies=${RAPIDS_DEPENDENCIES};require_gpu_pytorch=true" \
+    --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};dependencies=${RAPIDS_DEPENDENCIES};require_gpu=true" \
 | tee ./torch-constraints.txt
 
 rapids-pip-retry download \
diff --git a/dependencies.yaml b/dependencies.yaml
index abc150fd..bc277925 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -408,26 +408,26 @@ dependencies:
           - matrix:
               cuda: "12.9"
               dependencies: "oldest"
-              require_gpu_pytorch: "true"
+              require_gpu: "true"
             packages:
               - &torch_cu129_index --extra-index-url=https://download.pytorch.org/whl/cu129
               - torch==2.8.0+cu129
           - matrix:
               cuda: "12.9"
-              require_gpu_pytorch: "true"
+              require_gpu: "true"
             packages:
               - *torch_cu129_index
               - torch==2.10.0+cu129
           - matrix:
               cuda: "13.0"
               dependencies: "oldest"
-              require_gpu_pytorch: "true"
+              require_gpu: "true"
             packages:
               - &torch_index_cu13 --extra-index-url=https://download.pytorch.org/whl/cu130
               - torch==2.8.0+cu130
           - matrix:
               cuda: "13.0"
-              require_gpu_pytorch: "true"
+              require_gpu: "true"
             packages:
               - *torch_index_cu13
               - torch==2.10.0+cu130
diff --git a/python/pylibwholegraph/pylibwholegraph/torch/gnn_model.py b/python/pylibwholegraph/pylibwholegraph/torch/gnn_model.py
index a0422b89..c6e2813a 100644
--- a/python/pylibwholegraph/pylibwholegraph/torch/gnn_model.py
+++ b/python/pylibwholegraph/pylibwholegraph/torch/gnn_model.py
@@ -209,6 +209,4 @@ def __init__(
             node_embedding: WholeMemoryEmbedding,
             args,
         ):
-            raise ModuleNotFoundError(
-                "HomoGNNModel requires 'torch' to be installed."
-            )
+            raise ModuleNotFoundError("HomoGNNModel requires 'torch' to be installed.")

From 2c3d0d03e9b2b2dd38ed41773483d4e9aa77a993 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Thu, 12 Mar 2026 21:53:14 -0500
Subject: [PATCH 34/43] Update ci/validate_wheel.sh

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
---
 ci/validate_wheel.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/validate_wheel.sh b/ci/validate_wheel.sh
index 88ba85aa..a759fb7b 100755
--- a/ci/validate_wheel.sh
+++ b/ci/validate_wheel.sh
@@ -50,7 +50,7 @@ WHEEL_FILE="$(echo ${wheel_dir_relative_path}/*.whl)"
 # NOTE: group of specifiers after 'torch' to avoid a false positive like 'torch-geometric'
 # Use '|| true' so grep not finding any matches (exit 1) does not kill the script under set -e
 unzip -p "${WHEEL_FILE}" '*.dist-info/METADATA' \
-| grep -E '^Requires-Dist:.*torch[><=!~ ]+.*' \
+| grep -E '^Requires-Dist:.*\btorch\b([><=!~ ].*)?' \
 | tee matches.txt || true
 
 if [[ -s ./matches.txt ]]; then

From 40cdfa875858a1cd2bd9ac9a1e5d10944b427381 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Thu, 12 Mar 2026 21:53:40 -0500
Subject: [PATCH 35/43] Update pyproject.toml

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 97a06025..6c8d15f5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -18,7 +18,7 @@ select = [
 ]
 
 [tool.ruff.lint.flake8-tidy-imports.banned-api]
-"torch".msg = "Use the 'torch' fixture instead of 'import torch' in tests (see conftest.py)."
+"torch".msg = "Use 'import_optional(\"torch\")' in library code, or the 'torch' pytest fixture in test code (see conftest.py), instead of 'import torch'."
 
 [tool.ruff.lint.per-file-ignores]
 # allow importing 'torch' directly in cugraph-pyg examples

From 41c5277d04a5acb88bf6f0e1c6c8b248d066b237 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Thu, 12 Mar 2026 22:03:30 -0500
Subject: [PATCH 36/43] Apply suggestion from @jameslamb

---
 ci/validate_wheel.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/validate_wheel.sh b/ci/validate_wheel.sh
index a759fb7b..88ba85aa 100755
--- a/ci/validate_wheel.sh
+++ b/ci/validate_wheel.sh
@@ -50,7 +50,7 @@ WHEEL_FILE="$(echo ${wheel_dir_relative_path}/*.whl)"
 # NOTE: group of specifiers after 'torch' to avoid a false positive like 'torch-geometric'
 # Use '|| true' so grep not finding any matches (exit 1) does not kill the script under set -e
 unzip -p "${WHEEL_FILE}" '*.dist-info/METADATA' \
-| grep -E '^Requires-Dist:.*\btorch\b([><=!~ ].*)?' \
+| grep -E '^Requires-Dist:.*torch[><=!~ ]+.*' \
 | tee matches.txt || true
 
 if [[ -s ./matches.txt ]]; then

From eed447c9b15273338605078911ef8e2633ba39a1 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Fri, 13 Mar 2026 14:54:14 -0500
Subject: [PATCH 37/43] one more import

---
 python/pylibwholegraph/pylibwholegraph/torch/wholegraph_env.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/python/pylibwholegraph/pylibwholegraph/torch/wholegraph_env.py b/python/pylibwholegraph/pylibwholegraph/torch/wholegraph_env.py
index f4981d06..dff09220 100644
--- a/python/pylibwholegraph/pylibwholegraph/torch/wholegraph_env.py
+++ b/python/pylibwholegraph/pylibwholegraph/torch/wholegraph_env.py
@@ -10,6 +10,7 @@
 from .utils import wholememory_dtype_to_torch_dtype, torch_dtype_to_wholememory_dtype
 
 torch = import_optional("torch")
+torch_utils = import_optional("torch.utils")
 
 default_wholegraph_env_context = None
 torch_cpp_ext_loaded = False
@@ -192,7 +193,7 @@ def compile_cpp_extension():
         extra_ldflags.append(
             "".join(["-L", os.path.join(os.environ["LIBWHOLEGRAPH_DIR"], "lib")])
         )
-    torch.utils.cpp_extension.load(
+    torch_utils.cpp_extension.load(
         name="pylibwholegraph.pylibwholegraph_torch_ext",
         sources=[
             os.path.join(cpp_extension_path, "wholegraph_torch_ext.cpp"),

From 79a6efecf918403f568c5e356b00d73bd4ddf2af Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Fri, 13 Mar 2026 15:47:49 -0500
Subject: [PATCH 38/43] fix

---
 .../pylibwholegraph/pylibwholegraph/torch/wholegraph_env.py   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/pylibwholegraph/pylibwholegraph/torch/wholegraph_env.py b/python/pylibwholegraph/pylibwholegraph/torch/wholegraph_env.py
index dff09220..d9c90a5e 100644
--- a/python/pylibwholegraph/pylibwholegraph/torch/wholegraph_env.py
+++ b/python/pylibwholegraph/pylibwholegraph/torch/wholegraph_env.py
@@ -10,7 +10,7 @@
 from .utils import wholememory_dtype_to_torch_dtype, torch_dtype_to_wholememory_dtype
 
 torch = import_optional("torch")
-torch_utils = import_optional("torch.utils")
+torch_utils_cpp_ext = import_optional("torch.utils.cpp_extension")
 
 default_wholegraph_env_context = None
 torch_cpp_ext_loaded = False
@@ -193,7 +193,7 @@ def compile_cpp_extension():
         extra_ldflags.append(
             "".join(["-L", os.path.join(os.environ["LIBWHOLEGRAPH_DIR"], "lib")])
         )
-    torch_utils.cpp_extension.load(
+    torch_utils_cpp_ext.load(
         name="pylibwholegraph.pylibwholegraph_torch_ext",
         sources=[
             os.path.join(cpp_extension_path, "wholegraph_torch_ext.cpp"),

From a61a427b35e5e31b749e57ff632941293ee287d6 Mon Sep 17 00:00:00 2001
From: Alexandria Barghi <abarghi@nvidia.com>
Date: Mon, 16 Mar 2026 15:29:34 -0700
Subject: [PATCH 39/43] make optional imports lazy

---
 .../pylibwholegraph/utils/imports.py               | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/python/pylibwholegraph/pylibwholegraph/utils/imports.py b/python/pylibwholegraph/pylibwholegraph/utils/imports.py
index 67be22fe..c0a9851e 100644
--- a/python/pylibwholegraph/pylibwholegraph/utils/imports.py
+++ b/python/pylibwholegraph/pylibwholegraph/utils/imports.py
@@ -21,6 +21,18 @@ def __getattr__(self, attr):
         raise RuntimeError(f"This feature requires the '{self.name}' package/module")
 
 
+class FoundModule:
+    def __init__(self, mod):
+        self.mod = mod
+        self.imported = False
+
+    def __getattr__(self, attr):
+        if not self.imported:
+            self.mod = import_module(self.mod)
+            self.imported = True
+        return getattr(self.mod, attr)
+
+
 def import_optional(mod, default_mod_class=MissingModule):
     """
     import the "optional" module 'mod' and return the module object or object.
@@ -41,6 +53,6 @@ def import_optional(mod, default_mod_class=MissingModule):
     RuntimeError: This feature requires the 'torch' package/module
     """
     try:
-        return import_module(mod)
+        return FoundModule(mod)
     except ModuleNotFoundError:
         return default_mod_class(mod_name=mod)

From 96201b6c96227d68bdc00c8b6cd1901ee3c93458 Mon Sep 17 00:00:00 2001
From: Alexandria Barghi <abarghi@nvidia.com>
Date: Mon, 16 Mar 2026 15:54:38 -0700
Subject: [PATCH 40/43] fix module check - meant to change to use find_spec

---
 python/pylibwholegraph/pylibwholegraph/utils/imports.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/python/pylibwholegraph/pylibwholegraph/utils/imports.py b/python/pylibwholegraph/pylibwholegraph/utils/imports.py
index c0a9851e..9b9ec65b 100644
--- a/python/pylibwholegraph/pylibwholegraph/utils/imports.py
+++ b/python/pylibwholegraph/pylibwholegraph/utils/imports.py
@@ -2,6 +2,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 from importlib import import_module
+from importlib.util import find_spec
 
 
 class MissingModule:
@@ -52,7 +53,7 @@ def import_optional(mod, default_mod_class=MissingModule):
       ...
     RuntimeError: This feature requires the 'torch' package/module
     """
-    try:
+    if find_spec(mod) is not None:
         return FoundModule(mod)
-    except ModuleNotFoundError:
+    else:
         return default_mod_class(mod_name=mod)

From 456857ae02e36f02aad06b2bc251bb729a5f30d0 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Tue, 17 Mar 2026 10:45:02 -0500
Subject: [PATCH 41/43] handle dotted imports, make ruff selections explicit

---
 .pre-commit-config.yaml                       |  3 +++
 pyproject.toml                                |  6 +++++
 .../cugraph-pyg/cugraph_pyg/utils/imports.py  | 27 ++++++++++++++++---
 .../pylibwholegraph/_doctor_check.py          |  2 +-
 .../pylibwholegraph/utils/imports.py          | 10 ++++++-
 5 files changed, 43 insertions(+), 5 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index cb393465..f284c3df 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -28,6 +28,9 @@ repos:
       - id: yesqa
         additional_dependencies:
           - flake8==7.1.1
+        exclude: |
+          (?x)
+            python/pylibwholegraph/pylibwholegraph/_doctor_check[.]py$
   - repo: https://github.com/pre-commit/mirrors-clang-format
     rev: v20.1.4
     hooks:
diff --git a/pyproject.toml b/pyproject.toml
index 6c8d15f5..5662e9f0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -13,6 +13,12 @@ ignore = [
     "E203"
 ]
 select = [
+    # (pycodestyle)
+    "E4",
+    "E7",
+    "E9",
+    # (pyflakes)
+    "F",
     # (flake8-tidy-imports) banned-api
     "TID251"
 ]
diff --git a/python/cugraph-pyg/cugraph_pyg/utils/imports.py b/python/cugraph-pyg/cugraph_pyg/utils/imports.py
index b4e4df42..270b2eca 100644
--- a/python/cugraph-pyg/cugraph_pyg/utils/imports.py
+++ b/python/cugraph-pyg/cugraph_pyg/utils/imports.py
@@ -1,8 +1,9 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from packaging.requirements import Requirement
 from importlib import import_module
+from importlib.util import find_spec
 
 
 def package_available(requirement: str) -> bool:
@@ -39,6 +40,18 @@ def __getattr__(self, attr):
         raise RuntimeError(f"This feature requires the {self.name} package/module")
 
 
+class FoundModule:
+    def __init__(self, mod):
+        self.mod = mod
+        self.imported = False
+
+    def __getattr__(self, attr):
+        if not self.imported:
+            self.mod = import_module(self.mod)
+            self.imported = True
+        return getattr(self.mod, attr)
+
+
 def import_optional(mod, default_mod_class=MissingModule):
     """
     import the "optional" module 'mod' and return the module object or object.
@@ -80,7 +93,15 @@ def import_optional(mod, default_mod_class=MissingModule):
     <class 'pandas.core.frame.DataFrame'>
     >>
     """
+    # this try-except is necessary to handle dotted imports,
+    # like `import_optional("torch.autograd")`
+    mod_found = False
     try:
-        return import_module(mod)
-    except ModuleNotFoundError:
+        mod_found = find_spec(mod) is not None
+    except ImportError:
+        mod_found = False
+
+    if mod_found:
+        return FoundModule(mod)
+    else:
         return default_mod_class(mod_name=mod)
diff --git a/python/pylibwholegraph/pylibwholegraph/_doctor_check.py b/python/pylibwholegraph/pylibwholegraph/_doctor_check.py
index 33ac107d..a76e8483 100644
--- a/python/pylibwholegraph/pylibwholegraph/_doctor_check.py
+++ b/python/pylibwholegraph/pylibwholegraph/_doctor_check.py
@@ -27,7 +27,7 @@ def pylibwholegraph_smoke_check(**kwargs):
         )
 
     try:
-        import torch
+        import torch  # noqa: TID251
 
         assert torch.cuda.is_available()
 
diff --git a/python/pylibwholegraph/pylibwholegraph/utils/imports.py b/python/pylibwholegraph/pylibwholegraph/utils/imports.py
index 9b9ec65b..564a6b97 100644
--- a/python/pylibwholegraph/pylibwholegraph/utils/imports.py
+++ b/python/pylibwholegraph/pylibwholegraph/utils/imports.py
@@ -53,7 +53,15 @@ def import_optional(mod, default_mod_class=MissingModule):
       ...
     RuntimeError: This feature requires the 'torch' package/module
     """
-    if find_spec(mod) is not None:
+    # this try-except is necessary to handle dotted imports,
+    # like `import_optional("torch.autograd")`
+    mod_found = False
+    try:
+        mod_found = find_spec(mod) is not None
+    except ImportError:
+        mod_found = False
+
+    if mod_found:
         return FoundModule(mod)
     else:
         return default_mod_class(mod_name=mod)

From 0afcdd767e099fb961c83c93bbae96ea31c16b32 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Tue, 17 Mar 2026 11:52:01 -0500
Subject: [PATCH 42/43] more import-time patching

---
 python/cugraph-pyg/cugraph_pyg/data/feature_store.py | 7 +++++--
 python/cugraph-pyg/cugraph_pyg/data/graph_store.py   | 5 ++++-
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/python/cugraph-pyg/cugraph_pyg/data/feature_store.py b/python/cugraph-pyg/cugraph_pyg/data/feature_store.py
index ba2081ca..fd645cb7 100644
--- a/python/cugraph-pyg/cugraph_pyg/data/feature_store.py
+++ b/python/cugraph-pyg/cugraph_pyg/data/feature_store.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 import warnings
@@ -18,9 +18,12 @@
 wgth = import_optional("pylibwholegraph.torch")
 
 
+# If 'torch_geometric' is available but 'torch' is not, accessing
+# 'torch_geometric.data.GraphStore' will fail because `torch_geometric`
+# unconditionally imports 'torch'... so need to check that both are available.
 class FeatureStore(
     object
-    if isinstance(torch_geometric, MissingModule)
+    if (isinstance(torch_geometric, MissingModule) or isinstance(torch, MissingModule))
     else torch_geometric.data.FeatureStore
 ):
     """
diff --git a/python/cugraph-pyg/cugraph_pyg/data/graph_store.py b/python/cugraph-pyg/cugraph_pyg/data/graph_store.py
index eada6a61..7a522912 100644
--- a/python/cugraph-pyg/cugraph_pyg/data/graph_store.py
+++ b/python/cugraph-pyg/cugraph_pyg/data/graph_store.py
@@ -30,9 +30,12 @@
 ]
 
 
+# If 'torch_geometric' is available but 'torch' is not, accessing
+# 'torch_geometric.data.GraphStore' will fail because `torch_geometric`
+# unconditionally imports 'torch'... so need to check that both are available.
 class GraphStore(
     object
-    if isinstance(torch_geometric, MissingModule)
+    if (isinstance(torch_geometric, MissingModule) or isinstance(torch, MissingModule))
     else torch_geometric.data.GraphStore
 ):
     """

From 27f8fdd56ced43cfbb7a417558b0f148dc060d56 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Tue, 17 Mar 2026 14:10:17 -0500
Subject: [PATCH 43/43] remove unnecessary CUDA_MAJOR

---
 ci/test_wheel_cugraph-pyg.sh | 1 -
 1 file changed, 1 deletion(-)

diff --git a/ci/test_wheel_cugraph-pyg.sh b/ci/test_wheel_cugraph-pyg.sh
index dc0d799b..54c425d1 100755
--- a/ci/test_wheel_cugraph-pyg.sh
+++ b/ci/test_wheel_cugraph-pyg.sh
@@ -33,7 +33,6 @@ TORCH_WHEEL_DIR="$(mktemp -d)"
 
 # 'cugraph-pyg' is still expected to be importable
 # and testable in an environment where 'torch' isn't installed.
-CUDA_MAJOR="${RAPIDS_CUDA_VERSION%%.*}"
 torch_downloaded=true
 if [ -z "$(ls -A ${TORCH_WHEEL_DIR} 2>/dev/null)" ]; then
   rapids-echo-stderr "No 'torch' wheels downloaded."