diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh index 73d242fe..c3b35d82 100755 --- a/ci/build_wheel.sh +++ b/ci/build_wheel.sh @@ -14,6 +14,7 @@ package_type=$3 # EXCLUDE_ARGS=( --exclude libcuda.so.1 + --exclude "libnccl.so.*" --exclude libnvidia-ml.so.1 --exclude librapids_logger.so --exclude librmm.so diff --git a/ci/build_wheel_pylibwholegraph.sh b/ci/build_wheel_pylibwholegraph.sh index 32b6901a..93b6d1cd 100755 --- a/ci/build_wheel_pylibwholegraph.sh +++ b/ci/build_wheel_pylibwholegraph.sh @@ -17,7 +17,7 @@ RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen "${RAPIDS_CUDA_VERSION}")" LIBWHOLEGRAPH_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libwholegraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github cpp) echo "libwholegraph-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo "${LIBWHOLEGRAPH_WHEELHOUSE}"/libwholegraph_*.whl)" >> "${PIP_CONSTRAINT}" -export SKBUILD_CMAKE_ARGS="-DBUILD_SHARED_LIBS=ON;-DCMAKE_MESSAGE_LOG_LEVEL=VERBOSE;-DCUDA_STATIC_RUNTIME=ON;-DWHOLEGRAPH_BUILD_WHEELS=ON" +export SKBUILD_CMAKE_ARGS="-DBUILD_SHARED_LIBS=ON;-DCMAKE_MESSAGE_LOG_LEVEL=VERBOSE;-DCUDA_STATIC_RUNTIME=ON" ./ci/build_wheel.sh pylibwholegraph ${package_dir} python ./ci/validate_wheel.sh ${package_dir} "${RAPIDS_WHEEL_BLD_OUTPUT_DIR}" diff --git a/ci/test_wheel_pylibwholegraph.sh b/ci/test_wheel_pylibwholegraph.sh index da40ea70..f59d200c 100755 --- a/ci/test_wheel_pylibwholegraph.sh +++ b/ci/test_wheel_pylibwholegraph.sh @@ -5,6 +5,12 @@ set -e # abort the script on error set -o pipefail # piped commands propagate their error set -E # ERR traps are inherited by subcommands +# Delete system libnccl.so to ensure the wheel is used. +# (but only do this in CI, to avoid breaking local dev environments) +if [[ "${CI:-}" == "true" ]]; then + rm -rf /usr/lib64/libnccl* +fi + source rapids-init-pip RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index c82d2ef0..35bb1d7e 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -45,6 +45,7 @@ option(WHOLEGRAPH_EXCLUDE_NVSHMEM_FROM_ALL "Exclude nvshmem targets from wholeGr ON ) option(BUILD_BENCHMARKS "Configure CMake to build benchmark" ON) +option(USE_NCCL_RUNTIME_WHEEL "Use the NCCL wheel at runtime instead of the system library" OFF) # ################################################################################################## # * Set options based on user defined one ----------------------------------- diff --git a/dependencies.yaml b/dependencies.yaml index cba7125b..aba23208 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -21,6 +21,7 @@ files: - depends_on_dask_cudf - depends_on_cupy - depends_on_cuml + - depends_on_nccl - depends_on_pytorch - depends_on_pyg - python_run_cugraph_pyg @@ -49,6 +50,7 @@ files: - test_cpp - depends_on_libwholegraph - depends_on_libwholegraph_tests + - depends_on_nccl test_notebooks: output: none includes: @@ -105,8 +107,9 @@ files: extras: table: project includes: - - depends_on_rapids_logger - depends_on_libraft + - depends_on_nccl + - depends_on_rapids_logger py_build_pylibwholegraph: output: pyproject pyproject_dir: python/pylibwholegraph @@ -342,7 +345,6 @@ dependencies: - output_types: conda packages: - *cmake_ver - - nccl>=2.19 test_notebook: common: - output_types: [conda, requirements] @@ -393,6 +395,21 @@ dependencies: packages: - pytorch>=2.3 - {matrix: null, packages: ["pytorch>=2.3"]} + depends_on_nccl: + common: + - output_types: conda + packages: + - nccl>=2.19 + specific: + - output_types: [pyproject, requirements] + matrices: + - matrix: + cuda: "12.*" + cuda_suffixed: "true" + packages: + - nvidia-nccl-cu12>=2.19 + - matrix: + packages: depends_on_ogb: common: - output_types: [conda, requirements, pyproject] diff --git a/python/cugraph-pyg/pyproject.toml b/python/cugraph-pyg/pyproject.toml index 3bb02858..7a24d840 100644 --- a/python/cugraph-pyg/pyproject.toml +++ b/python/cugraph-pyg/pyproject.toml @@ -80,4 +80,4 @@ select = [ ] # PyPI limit is 100 MiB, fail CI before we get too close to that -max_allowed_size_compressed = '75M' +max_allowed_size_compressed = '10Mi' diff --git a/python/libwholegraph/CMakeLists.txt b/python/libwholegraph/CMakeLists.txt index 20a7cbab..4ac98532 100644 --- a/python/libwholegraph/CMakeLists.txt +++ b/python/libwholegraph/CMakeLists.txt @@ -30,5 +30,16 @@ project( SET(BUILD_TESTS OFF) SET(BUILD_BENCHMARKS OFF) +SET(USE_NCCL_RUNTIME_WHEEL ON) add_subdirectory(../../cpp/ libwholegraph) + +if(USE_NCCL_RUNTIME_WHEEL) + list(APPEND rpaths "$ORIGIN/../../nvidia/nccl/lib") +endif() + +set_property( + TARGET wholegraph + PROPERTY INSTALL_RPATH ${rpaths} + APPEND +) diff --git a/python/libwholegraph/pyproject.toml b/python/libwholegraph/pyproject.toml index e84cd2fe..bccbe59e 100644 --- a/python/libwholegraph/pyproject.toml +++ b/python/libwholegraph/pyproject.toml @@ -59,8 +59,8 @@ select = [ "distro-too-large-compressed", ] -# detect when package size grows significantly -max_allowed_size_compressed = '0.4G' +# PyPI limit is 100 MiB, fail CI before we get too close to that +max_allowed_size_compressed = '80Mi' [tool.scikit-build] build-dir = "build/{wheel_tag}" diff --git a/python/pylibwholegraph/pyproject.toml b/python/pylibwholegraph/pyproject.toml index 0ff191e2..a036ba36 100644 --- a/python/pylibwholegraph/pyproject.toml +++ b/python/pylibwholegraph/pyproject.toml @@ -87,5 +87,5 @@ select = [ "distro-too-large-compressed", ] -# detect when package size grows significantly -max_allowed_size_compressed = '400M' +# PyPI limit is 100 MiB, fail CI before we get too close to that +max_allowed_size_compressed = '10Mi'