diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 9d35e3f..4775d28 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -13,6 +13,8 @@ ENV DEFAULT_CONDA_ENV=rapids FROM ${PYTHON_PACKAGE_MANAGER}-base +ARG TARGETARCH + ARG CUDA ENV CUDAARCHS="RAPIDS" ENV CUDA_VERSION="${CUDA_VERSION:-${CUDA}}" @@ -24,7 +26,35 @@ ENV PYTHONSAFEPATH="1" ENV PYTHONUNBUFFERED="1" ENV PYTHONDONTWRITEBYTECODE="1" +ENV HISTFILE="/home/coder/.cache/._bash_history" + +### +# sccache configuration +### +ENV AWS_ROLE_ARN="arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs" ENV SCCACHE_REGION="us-east-2" ENV SCCACHE_BUCKET="rapids-sccache-devs" -ENV VAULT_HOST="https://vault.ops.k8s.rapids.ai" -ENV HISTFILE="/home/coder/.cache/._bash_history" +# 2hr (1 minute longer than sccache-dist request timeout) +ENV SCCACHE_IDLE_TIMEOUT=7200 + +### +# sccache-dist configuration +### +# Enable sccache-dist by default +ENV DEVCONTAINER_UTILS_ENABLE_SCCACHE_DIST=1 +# Compile locally if max retries exceeded +ENV SCCACHE_DIST_FALLBACK_TO_LOCAL_COMPILE=true +# Retry transient errors 4 times (for a total of 5 attempts) +ENV SCCACHE_DIST_MAX_RETRIES=4 +ENV SCCACHE_DIST_CONNECT_TIMEOUT=30 +ENV SCCACHE_DIST_CONNECTION_POOL=false +# 1hr 59min (to accommodate debug builds) +ENV SCCACHE_DIST_REQUEST_TIMEOUT=7140 +ENV SCCACHE_DIST_KEEPALIVE_ENABLED=true +ENV SCCACHE_DIST_KEEPALIVE_INTERVAL=20 +ENV SCCACHE_DIST_KEEPALIVE_TIMEOUT=600 +ENV SCCACHE_DIST_URL="https://${TARGETARCH}.linux.sccache.rapids.nvidia.com" + +# Build as much in parallel as possible +ENV INFER_NUM_DEVICE_ARCHITECTURES=1 +ENV MAX_DEVICE_OBJ_TO_COMPILE_IN_PARALLEL=20 diff --git a/.devcontainer/cuda12.9-conda/devcontainer.json b/.devcontainer/cuda12.9-conda/devcontainer.json index 249d301..846f272 100644 --- a/.devcontainer/cuda12.9-conda/devcontainer.json +++ b/.devcontainer/cuda12.9-conda/devcontainer.json @@ -5,12 +5,19 @@ "args": { "CUDA": "12.9", "PYTHON_PACKAGE_MANAGER": "conda", - "BASE": "rapidsai/devcontainers:25.08-cpp-mambaforge-ubuntu22.04" + "BASE": "rapidsai/devcontainers:25.10-cpp-mambaforge" } }, + "runArgs": [ + "--rm", + "--name", + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.10-cuda12.9-conda", + "--ulimit", + "nofile=500000" + ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.8": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.10": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" diff --git a/.devcontainer/cuda12.9-pip/devcontainer.json b/.devcontainer/cuda12.9-pip/devcontainer.json index cbe5d21..4f3a444 100644 --- a/.devcontainer/cuda12.9-pip/devcontainer.json +++ b/.devcontainer/cuda12.9-pip/devcontainer.json @@ -5,19 +5,26 @@ "args": { "CUDA": "12.9", "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:25.08-cpp-cuda12.9-ubuntu22.04" + "BASE": "rapidsai/devcontainers:25.10-cpp-cuda12.9" } }, + "runArgs": [ + "--rm", + "--name", + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.10-cuda12.9-pip", + "--ulimit", + "nofile=500000" + ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/cuda:25.8": { + "ghcr.io/rapidsai/devcontainers/features/cuda:25.10": { "version": "12.9", "installcuBLAS": true, "installcuSOLVER": true, "installcuRAND": true, "installcuSPARSE": true }, - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.8": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.10": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/cuda", diff --git a/.devcontainer/cuda13.0-conda/devcontainer.json b/.devcontainer/cuda13.0-conda/devcontainer.json new file mode 100644 index 0000000..ecdf70b --- /dev/null +++ b/.devcontainer/cuda13.0-conda/devcontainer.json @@ -0,0 +1,44 @@ +{ + "build": { + "context": "${localWorkspaceFolder}/.devcontainer", + "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile", + "args": { + "CUDA": "13.0", + "PYTHON_PACKAGE_MANAGER": "conda", + "BASE": "rapidsai/devcontainers:25.10-cpp-mambaforge" + } + }, + "runArgs": [ + "--rm", + "--name", + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.10-cuda13.0-conda", + "--ulimit", + "nofile=500000" + ], + "hostRequirements": {"gpu": "optional"}, + "features": { + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.10": {} + }, + "overrideFeatureInstallOrder": [ + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" + ], + "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda13.0-envs}"], + "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"], + "workspaceFolder": "/home/coder", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/cumlprims_mg,type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../.conda/pkgs,target=/home/coder/.conda/pkgs,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda13.0-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent" + ], + "customizations": { + "vscode": { + "extensions": [ + "ms-python.flake8", + "nvidia.nsight-vscode-edition" + ] + } + } +} diff --git a/.devcontainer/cuda13.0-pip/devcontainer.json b/.devcontainer/cuda13.0-pip/devcontainer.json new file mode 100644 index 0000000..9fe5bb8 --- /dev/null +++ b/.devcontainer/cuda13.0-pip/devcontainer.json @@ -0,0 +1,51 @@ +{ + "build": { + "context": "${localWorkspaceFolder}/.devcontainer", + "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile", + "args": { + "CUDA": "13.0", + "PYTHON_PACKAGE_MANAGER": "pip", + "BASE": "rapidsai/devcontainers:25.10-cpp-cuda13.0" + } + }, + "runArgs": [ + "--rm", + "--name", + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.10-cuda13.0-pip", + "--ulimit", + "nofile=500000" + ], + "hostRequirements": {"gpu": "optional"}, + "features": { + "ghcr.io/rapidsai/devcontainers/features/cuda:25.10": { + "version": "13.0", + "installcuBLAS": true, + "installcuSOLVER": true, + "installcuRAND": true, + "installcuSPARSE": true + }, + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.10": {} + }, + "overrideFeatureInstallOrder": [ + "ghcr.io/rapidsai/devcontainers/features/cuda", + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" + ], + "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda13.0-venvs}"], + "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"], + "workspaceFolder": "/home/coder", + "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/cumlprims_mg,type=bind,consistency=consistent", + "mounts": [ + "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda13.0-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent" + ], + "customizations": { + "vscode": { + "extensions": [ + "ms-python.flake8", + "nvidia.nsight-vscode-edition" + ] + } + } +} diff --git a/.github/release.yml b/.github/release.yml new file mode 100644 index 0000000..2c9a858 --- /dev/null +++ b/.github/release.yml @@ -0,0 +1,27 @@ +# GitHub Auto-Generated Release Notes Configuration for RAPIDS +# This file configures how GitHub automatically generates release notes + +changelog: + exclude: + labels: + - ignore-for-release + - dependencies + authors: + - rapids-bot[bot] + - dependabot[bot] + categories: + - title: 🚨 Breaking Changes + labels: + - breaking + - title: 🐛 Bug Fixes + labels: + - bug + - title: 📖 Documentation + labels: + - doc + - title: 🚀 New Features + labels: + - feature request + - title: 🛠️ Improvements + labels: + - improvement diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index aac898a..7081eb7 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -34,7 +34,7 @@ concurrency: jobs: cpp-build: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.08 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.10 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -45,7 +45,7 @@ jobs: if: ${{ !startsWith(github.ref, 'refs/tags/') }} needs: [cpp-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-25.08 + uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-25.10 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -55,7 +55,7 @@ jobs: if: ${{ startsWith(github.ref, 'refs/tags/') }} needs: [cpp-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-25.08 + uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-25.10 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index f253400..ef976a4 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -14,7 +14,7 @@ jobs: - devcontainer - telemetry-setup secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-25.08 + uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-25.10 telemetry-setup: runs-on: ubuntu-latest continue-on-error: true @@ -29,27 +29,34 @@ jobs: checks: secrets: inherit needs: telemetry-setup - uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-25.08 + uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-25.10 with: ignored_pr_jobs: telemetry-summarize conda-cpp-build: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.08 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.10 with: build_type: pull-request script: ci/build_cpp.sh devcontainer: secrets: inherit needs: telemetry-setup - uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-25.08 + uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-25.10 with: - arch: '["amd64"]' - cuda: '["12.9"]' + arch: '["amd64", "arm64"]' + cuda: '["13.0"]' + node_type: "cpu8" + rapids-aux-secret-1: GIST_REPO_READ_ORG_GITHUB_TOKEN + env: | + SCCACHE_DIST_MAX_RETRIES=inf + SCCACHE_SERVER_LOG=sccache=debug + SCCACHE_DIST_FALLBACK_TO_LOCAL_COMPILE=false + SCCACHE_DIST_AUTH_TOKEN_VAR=RAPIDS_AUX_SECRET_1 build_command: | - sccache -z; - build-all --verbose; - sccache -s; + sccache --zero-stats; + build-all -j0 -DDISABLE_DEPRECATION_WARNINGS=ON --verbose 2>&1 | tee telemetry-artifacts/build.log; + sccache --show-adv-stats | tee telemetry-artifacts/sccache-stats.txt; telemetry-summarize: # This job must use a self-hosted runner to record telemetry traces. runs-on: linux-amd64-cpu4 diff --git a/.github/workflows/trigger-breaking-change-alert.yaml b/.github/workflows/trigger-breaking-change-alert.yaml index 593fcb1..48bf37a 100644 --- a/.github/workflows/trigger-breaking-change-alert.yaml +++ b/.github/workflows/trigger-breaking-change-alert.yaml @@ -12,7 +12,7 @@ jobs: trigger-notifier: if: contains(github.event.pull_request.labels.*.name, 'breaking') secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/breaking-change-alert.yaml@branch-25.08 + uses: rapidsai/shared-workflows/.github/workflows/breaking-change-alert.yaml@branch-25.10 with: sender_login: ${{ github.event.sender.login }} sender_avatar: ${{ github.event.sender.avatar_url }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index bab091f..72eff35 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,24 +4,24 @@ # To run: `pre-commit run --all-files` repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v5.0.0 + rev: v6.0.0 hooks: - id: check-added-large-files - id: debug-statements - id: mixed-line-ending - repo: https://github.com/pre-commit/mirrors-clang-format - rev: v20.1.4 + rev: v21.1.0 hooks: - id: clang-format types_or: [c, c++, cuda] args: ["-fallback-style=none", "-style=file", "-i"] - repo: https://github.com/rapidsai/dependency-file-generator - rev: v1.19.0 + rev: v1.20.0 hooks: - id: rapids-dependency-file-generator - args: ["--clean"] + args: ["--clean", "--warn-all", "--strict"] - repo: https://github.com/rapidsai/pre-commit-hooks - rev: v0.6.0 + rev: v0.7.0 hooks: - id: verify-copyright files: | diff --git a/RAPIDS_BRANCH b/RAPIDS_BRANCH new file mode 100644 index 0000000..9b1c52d --- /dev/null +++ b/RAPIDS_BRANCH @@ -0,0 +1 @@ +branch-25.10 diff --git a/README.md b/README.md index 603117e..fcce340 100644 --- a/README.md +++ b/README.md @@ -1,20 +1,20 @@ # cuMLPrims -This repository contains C++ and CUDA code of muti-node multi-GPU (MNMG) ML mathematical primitives and some algorithms, that are used by [the main cuML project](https://github.com/rapidsai/cuml). The build system uses CMake for build configuration, and an out-of-source build is recommended. +This repository contains C++ and CUDA code of multi-node multi-GPU (MNMG) ML mathematical primitives and some algorithms, that are used by [the main cuML project](https://github.com/rapidsai/cuml). The build system uses CMake for build configuration, and an out-of-source build is recommended. -As of version 0.13, the MNMG code included in cuMLPrims follows the model one-process-per-GPU (OPG), where the code uses a communication library (based on cuML's comms) and each process has one GPU assigned to it. This is in contrast to single-process-multi-GPU (SPMG) approaches, which are no longer part of the code base. +The MNMG code included in cuMLPrims follows the model one-process-per-GPU (OPG), where the code uses a communication library (based on cuML's comms) and each process has one GPU assigned to it. This is in contrast to single-process-multi-GPU (SPMG) approaches, which are no longer part of the code base. ## Folder Structure -The folder structure mirrors closely the structure of github cuML. The folders are: +The folder structure mirrors closely the structure of GitHub cuML. The folders are: - `ci`: Folders containing CI related scripts to run tests for each MR and create the conda packages. -- `conda` Contains Conda recipe for `libcumlprims` Conda package in the `rapidsai` channel. +- `conda`: Contains Conda recipe for `libcumlprims` Conda package in the `rapidsai` channel. - `cpp`: Contains the source code. - `cpp/cmake`: CMake related scripts. - `cpp/include`: The include folder for headers that are necessary to be installed/distributed to use the libcumlprims.so artifact by users of the library. - `cpp/src_prims_opg`: Contains source code for MNMG ML primitives. It also contains source code for algorithms that use the primitives that are still included in cuMLPrims as opposed to cuML. - - `cpp/test`: Googletest based unit tests (work in progress). + - `cpp/test`: Googletest based unit tests. ## Building cuMLPrims: @@ -22,22 +22,20 @@ The folder structure mirrors closely the structure of github cuML. The folders a The main artifact produced by the build system is the shared library libcumlprims. Ensure the following dependencies are satisfied: -1. CMake (>= 3.14.5) -2. CUDA (>= 10.0) -3. GCC (>= 5.4.0) -4. NCCL (>= 2.4.6.1) -5. UCX with CUDA support (optional)(>=1.7) - enables point-to-point messaging in the cuML communicator. -6. zlib +1. CMake (>= 3.30.4) +2. CUDA (>= 12.0) +3. GCC (>= 14) +4. NCCL (>= 2.5) -It is recommended to use conda for environment/package management. If doing so, it is recommended to use the convenience environment .yml file located in [**the cuML repository**, in `conda/environments/cuml_dev_cudax.y.yml` (replace x.y for your CUDA version)](https://github.com/rapidsai/cuml/tree/branch-0.14/conda/environments). This file contains most of the dependencies ment1ioned above (notable exceptions are gcc and zlib). To use it, for example to create an environment named cuml_dev for CUDA 10.0 and Python 3.7, you can use the follow command: +It is recommended to use conda for environment/package management. See `conda/environments/` for available environment files. ```bash -conda env create -n cuml_dev python=3.7 --file=conda/environments/cuml_dev_cuda10.0.yml +conda env create --name cumlprims_dev --file conda/environments/all_cuda-130_arch-$(arch).yaml ``` ### Using build.sh script -As a convenience, a `build.sh` script is provided which can be used to execute the build commands in an automated manner. Note that the libraries will be installed to the location set in `$INSTALL_PREFIX` if set (i.e. `export INSTALL_PREFIX=/install/path`), otherwise to `$CONDA_PREFIX`. +As a convenience, a `build.sh` script is provided which can be used to execute the build commands in an automated manner. Note that the libraries will be installed to the location set in `$INSTALL_PREFIX` if set (i.e. `export INSTALL_PREFIX=/install/path`), otherwise to `$CONDA_PREFIX`. ```bash $ ./build.sh # build the libcuml library and tests # install them to $INSTALL_PREFIX if set, otherwise $CONDA_PREFIX @@ -46,12 +44,12 @@ $ ./build.sh # build the libcuml library and tests Other `build.sh` options: ```bash -$ ./build.sh clean # remove any prior build artifacts and configuration (start over) +$ ./build.sh clean # remove any prior build artifacts and configuration (start over) $ ./build.sh libcumlprims -v # build and install libcumlprims with verbose output $ ./build.sh libcumlprims -g # build and install libcumlprims for debug $ PARALLEL_LEVEL=4 ./build.sh libcumlprims # build and install libcumlprims limiting parallel build jobs to 4 (make -j4) -$ ./build.sh libcuml -n # build libcuml but do not install -$ ./build.sh libcumlprims --allgpuarch # build the tests for all supported GPU architectures +$ ./build.sh libcuml -n # build libcuml but do not install +$ ./build.sh libcumlprims --allgpuarch # build the tests for all supported GPU architectures ``` @@ -78,13 +76,13 @@ If using a conda environment (recommended), then cmake can be configured appropr $ cmake .. -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX ``` -Note: The following warning message is dependent upon the version of cmake and the `CMAKE_INSTALL_PREFIX` used. If this warning is displayed, the build should still run succesfully. We are currently working to resolve this open issue. You can silence this warning by adding `-DCMAKE_IGNORE_PATH=$CONDA_PREFIX/lib` to your `cmake` command. +Note: The following warning message is dependent upon the version of cmake and the `CMAKE_INSTALL_PREFIX` used. If this warning is displayed, the build should still run successfully. We are currently working to resolve this open issue. You can silence this warning by adding `-DCMAKE_IGNORE_PATH=$CONDA_PREFIX/lib` to your `cmake` command. ``` Cannot generate a safe runtime search path for target ml_test because files in some directories may conflict with libraries in implicit directories: ``` -There are many options to configure the build process, see the [customizing build section](#libcuml-&-libcumlc++). +There are many options to configure the build process, see the [customizing build section](#custom-build-options). 3. Build `libcumlprims`: @@ -107,11 +105,7 @@ cuMLPrims CMake has the following configurable flags available: | Flag | Possible Values | Default Value | Behavior | | --- | --- | --- | --- | -| BUILD_OPG_TESTS | [ON, OFF] | OFF | Build MPI cumlcomms based C++ unit tests (in progress, refer to step 4. of the build steps). | -| BUILD_CUMLPRIMS_LIBRARY | [ON, OFF] | ON | Enable/disable building libcumprims shared library. | -| CMAKE_CXX11_ABI | [ON, OFF] | ON | Enable/disable the GLIBCXX11 ABI | -| DISABLE_OPENMP | [ON, OFF] | OFF | Set to `ON` to disable OpenMP | -| KERNEL_INFO | [ON, OFF] | OFF | Enable/disable kernel resource usage info in nvcc. | -| LINE_INFO | [ON, OFF] | OFF | Enable/disable lineinfo in nvcc. | -| NVTX | [ON, OFF] | OFF | Enable/disable nvtx markers in libcumlprims. | -| GPU_ARCHS | List of GPU architectures, semicolon-separated | 60;70;75 | List of GPU architectures that all artifacts are compiled for. | +| BUILD_OPG_TESTS | [ON, OFF] | OFF | Build MPI cumlcomms based C++ unit tests (in progress, refer to step 4. of the build steps). | +| BUILD_CUMLPRIMS_LIBRARY | [ON, OFF] | ON | Enable/disable building libcumlprims shared library. | +| DISABLE_OPENMP | [ON, OFF] | OFF | Set to `ON` to disable OpenMP | +| NVTX | [ON, OFF] | OFF | Enable/disable nvtx markers in libcumlprims.| diff --git a/VERSION b/VERSION index 3af4bda..296e352 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -25.08.00 +25.10.00 diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index cccdf51..0e82f27 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -55,4 +55,5 @@ find .devcontainer/ -type f -name devcontainer.json -print0 | while IFS= read -r sed_runner "s@rapidsai/devcontainers:[0-9.]*@rapidsai/devcontainers:${NEXT_SHORT_TAG}@g" "${filename}" sed_runner "s@rapidsai/devcontainers/features/cuda:[0-9.]*@rapidsai/devcontainers/features/cuda:${NEXT_SHORT_TAG_PEP440}@" "${filename}" sed_runner "s@rapidsai/devcontainers/features/rapids-build-utils:[0-9.]*@rapidsai/devcontainers/features/rapids-build-utils:${NEXT_SHORT_TAG_PEP440}@" "${filename}" + sed_runner "s@rapids-\${localWorkspaceFolderBasename}-[0-9.]*@rapids-\${localWorkspaceFolderBasename}-${NEXT_SHORT_TAG}@g" "${filename}" done diff --git a/cmake/RAPIDS.cmake b/cmake/RAPIDS.cmake index 8f04915..65c33b0 100644 --- a/cmake/RAPIDS.cmake +++ b/cmake/RAPIDS.cmake @@ -18,9 +18,9 @@ cmake_minimum_required(VERSION 3.30.4 FATAL_ERROR) # Allow users to control which version is used -if(NOT rapids-cmake-version OR NOT rapids-cmake-version MATCHES [[^([0-9][0-9])\.([0-9][0-9])$]]) +if(NOT rapids-cmake-branch OR NOT rapids-cmake-version) message( - FATAL_ERROR "The CMake variable rapids-cmake-version must be defined in the format MAJOR.MINOR." + FATAL_ERROR "The CMake variable `rapids-cmake-branch` or `rapids-cmake-version` must be defined" ) endif() @@ -33,7 +33,7 @@ endif() # Allow users to control which branch is fetched if(NOT rapids-cmake-branch) # Define a default branch if the user doesn't set one - set(rapids-cmake-branch "branch-${rapids-cmake-version}") + set(rapids-cmake-branch "release/${rapids-cmake-version}") endif() # Allow users to control the exact URL passed to FetchContent diff --git a/cmake/rapids_config.cmake b/cmake/rapids_config.cmake index 5aa8e55..5f02b05 100644 --- a/cmake/rapids_config.cmake +++ b/cmake/rapids_config.cmake @@ -25,5 +25,15 @@ else() "Could not determine RAPIDS version. Contents of VERSION file:\n${_rapids_version_formatted}") endif() +# Use STRINGS to trim whitespace/newlines +file(STRINGS "${CMAKE_CURRENT_LIST_DIR}/../RAPIDS_BRANCH" _rapids_branch) +if(NOT _rapids_branch) + message( + FATAL_ERROR + "Could not determine branch name to use for checking out rapids-cmake. The file \"${CMAKE_CURRENT_LIST_DIR}/../RAPIDS_BRANCH\" is missing." + ) +endif() + set(rapids-cmake-version "${RAPIDS_VERSION_MAJOR_MINOR}") +set(rapids-cmake-branch "${_rapids_branch}") include("${CMAKE_CURRENT_LIST_DIR}/RAPIDS.cmake") diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml index b321b20..555e19a 100644 --- a/conda/environments/all_cuda-129_arch-aarch64.yaml +++ b/conda/environments/all_cuda-129_arch-aarch64.yaml @@ -14,13 +14,13 @@ dependencies: - cuda-nvtx-dev - cuda-version=12.9 - cxx-compiler -- gcc_linux-aarch64=13.* +- gcc_linux-aarch64=14.* - libcublas-dev - libcurand-dev - libcusolver-dev - libcusparse-dev -- libraft-headers==25.8.* -- librmm==25.8.* +- libraft-headers==25.10.* +- librmm==25.10.* - ninja - pre-commit - python>=3.10,<3.14 diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml index 563668c..9c4bf94 100644 --- a/conda/environments/all_cuda-129_arch-x86_64.yaml +++ b/conda/environments/all_cuda-129_arch-x86_64.yaml @@ -14,13 +14,13 @@ dependencies: - cuda-nvtx-dev - cuda-version=12.9 - cxx-compiler -- gcc_linux-64=13.* +- gcc_linux-64=14.* - libcublas-dev - libcurand-dev - libcusolver-dev - libcusparse-dev -- libraft-headers==25.8.* -- librmm==25.8.* +- libraft-headers==25.10.* +- librmm==25.10.* - ninja - pre-commit - python>=3.10,<3.14 diff --git a/conda/environments/all_cuda-130_arch-aarch64.yaml b/conda/environments/all_cuda-130_arch-aarch64.yaml new file mode 100644 index 0000000..5c74279 --- /dev/null +++ b/conda/environments/all_cuda-130_arch-aarch64.yaml @@ -0,0 +1,28 @@ +# This file is generated by `rapids-dependency-file-generator`. +# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. +channels: +- rapidsai +- rapidsai-nightly +- conda-forge +dependencies: +- c-compiler +- clang-tools==20.1.4 +- clang==20.1.4 +- cmake>=3.30.4 +- cuda-cudart-dev +- cuda-nvcc +- cuda-nvtx-dev +- cuda-version=13.0 +- cxx-compiler +- gcc_linux-aarch64=14.* +- libcublas-dev +- libcurand-dev +- libcusolver-dev +- libcusparse-dev +- libraft-headers==25.10.* +- librmm==25.10.* +- ninja +- pre-commit +- python>=3.10,<3.14 +- sysroot_linux-aarch64==2.28 +name: all_cuda-130_arch-aarch64 diff --git a/conda/environments/all_cuda-130_arch-x86_64.yaml b/conda/environments/all_cuda-130_arch-x86_64.yaml new file mode 100644 index 0000000..70f83f3 --- /dev/null +++ b/conda/environments/all_cuda-130_arch-x86_64.yaml @@ -0,0 +1,28 @@ +# This file is generated by `rapids-dependency-file-generator`. +# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. +channels: +- rapidsai +- rapidsai-nightly +- conda-forge +dependencies: +- c-compiler +- clang-tools==20.1.4 +- clang==20.1.4 +- cmake>=3.30.4 +- cuda-cudart-dev +- cuda-nvcc +- cuda-nvtx-dev +- cuda-version=13.0 +- cxx-compiler +- gcc_linux-64=14.* +- libcublas-dev +- libcurand-dev +- libcusolver-dev +- libcusparse-dev +- libraft-headers==25.10.* +- librmm==25.10.* +- ninja +- pre-commit +- python>=3.10,<3.14 +- sysroot_linux-64==2.28 +name: all_cuda-130_arch-x86_64 diff --git a/conda/recipes/libcumlprims/conda_build_config.yaml b/conda/recipes/libcumlprims/conda_build_config.yaml index caf5489..1082f0d 100644 --- a/conda/recipes/libcumlprims/conda_build_config.yaml +++ b/conda/recipes/libcumlprims/conda_build_config.yaml @@ -1,8 +1,8 @@ c_compiler_version: - - 13 + - 14 cxx_compiler_version: - - 13 + - 14 cuda_compiler: - cuda-nvcc diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index cb28690..1e82405 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -54,6 +54,7 @@ option(CUMLPRIMS_MG_RAFT_CLONE_ON_PIN "Explicitly clone RAFT branch when pinned option(DETECT_CONDA_ENV "Enable detection of conda environment for dependencies" ON) option(DISABLE_OPENMP "Disable OpenMP" OFF) option(NVTX "Enable nvtx markers" OFF) +option(DISABLE_DEPRECATION_WARNINGS "Disable warnings generated from deprecated declarations." OFF) if(BUILD_TESTS) message(STATUS "CUMLPRIMS_MG: Setting BUILD_TESTS to OFF because they don't compile") @@ -279,4 +280,4 @@ rapids_export( # include(cmake/doxygen.cmake) # add_doxygen_target(IN_DOXYFILE src_prims/Doxyfile.in # OUT_DOXYFILE ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile -# CWD ${CMAKE_CURRENT_BINARY_DIR}) \ No newline at end of file +# CWD ${CMAKE_CURRENT_BINARY_DIR}) diff --git a/cpp/cmake/modules/ConfigureCUDA.cmake b/cpp/cmake/modules/ConfigureCUDA.cmake index f471714..e51226c 100644 --- a/cpp/cmake/modules/ConfigureCUDA.cmake +++ b/cpp/cmake/modules/ConfigureCUDA.cmake @@ -24,19 +24,14 @@ list(APPEND CUMLPRIMS_MG_CUDA_FLAGS --expt-extended-lambda --expt-relaxed-conste # list(APPEND CUMLPRIMS_MG_CUDA_FLAGS -Werror=cross-execution-space-call) # list(APPEND CUMLPRIMS_MG_CUDA_FLAGS -Xcompiler=-Wall,-Werror,-Wno-error=deprecated-declarations) -if(DISABLE_DEPRECATION_WARNING) - list(APPEND CUMLPRIMS_MG_CXX_FLAGS -Wno-deprecated-declarations) - list(APPEND CUMLPRIMS_MG_CUDA_FLAGS -Xcompiler=-Wno-deprecated-declarations) +if(DISABLE_DEPRECATION_WARNINGS) + list(APPEND CUMLPRIMS_MG_CXX_FLAGS -Wno-deprecated-declarations -DRAFT_HIDE_DEPRECATION_WARNINGS) + list(APPEND CUMLPRIMS_MG_CUDA_FLAGS -Xcompiler=-Wno-deprecated-declarations -DRAFT_HIDE_DEPRECATION_WARNINGS) endif() # make sure we produce smallest binary size -list(APPEND CUMLPRIMS_MG_CUDA_FLAGS -Xfatbin=-compress-all) -if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA" - AND (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.9 AND CMAKE_CUDA_COMPILER_VERSION - VERSION_LESS 13.0) -) - list(APPEND CUMLPRIMS_MG_CUDA_FLAGS -Xfatbin=--compress-level=3) -endif() +include(${rapids-cmake-dir}/cuda/enable_fatbin_compression.cmake) +rapids_cuda_enable_fatbin_compression(VARIABLE CUMLPRIMS_MG_CUDA_FLAGS TUNE_FOR rapids) # Option to enable line info in CUDA device compilation to allow introspection when profiling / memchecking if(CUDA_ENABLE_LINEINFO) diff --git a/cpp/cmake/thirdparty/get_raft.cmake b/cpp/cmake/thirdparty/get_raft.cmake index c503006..698b690 100644 --- a/cpp/cmake/thirdparty/get_raft.cmake +++ b/cpp/cmake/thirdparty/get_raft.cmake @@ -1,5 +1,5 @@ #============================================================================= -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2025, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -20,7 +20,7 @@ function(find_and_configure_raft) cmake_parse_arguments(PKG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN} ) - if(PKG_CLONE_ON_PIN AND NOT PKG_PINNED_TAG STREQUAL "branch-${CUMLPRIMS_MG_BRANCH_VERSION_raft}") + if(PKG_CLONE_ON_PIN AND NOT PKG_PINNED_TAG STREQUAL "${rapids-cmake-checkout-tag}") message(STATUS "CUMLPRIMS_MG: RAFT pinned tag found: ${PKG_PINNED_TAG}. Cloning raft locally.") set(CPM_DOWNLOAD_raft ON) elseif(PKG_USE_RAFT_STATIC AND (NOT CPM_raft_SOURCE)) @@ -53,13 +53,12 @@ function(find_and_configure_raft) endfunction() set(CUMLPRIMS_MG_MIN_VERSION_raft "${CUMLPRIMS_MG_VERSION_MAJOR}.${CUMLPRIMS_MG_VERSION_MINOR}.00") -set(CUMLPRIMS_MG_BRANCH_VERSION_raft "${CUMLPRIMS_MG_VERSION_MAJOR}.${CUMLPRIMS_MG_VERSION_MINOR}") # Change pinned tag here to test a commit in CI # To use a different RAFT locally, set the CMake variable # CPM_raft_SOURCE=/path/to/local/raft find_and_configure_raft(VERSION ${CUMLPRIMS_MG_MIN_VERSION_raft} FORK rapidsai - PINNED_TAG branch-${CUMLPRIMS_MG_BRANCH_VERSION_raft} + PINNED_TAG ${rapids-cmake-checkout-tag} CLONE_ON_PIN ${CUMLPRIMS_MG_RAFT_CLONE_ON_PIN} ) diff --git a/dependencies.yaml b/dependencies.yaml index dbbee3c..b4a1eb0 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -3,7 +3,7 @@ files: all: output: conda matrix: - cuda: ["12.9"] + cuda: ["12.9", "13.0"] arch: [x86_64, aarch64] includes: - build_cpp @@ -30,28 +30,23 @@ dependencies: - output_types: conda packages: - c-compiler + - cuda-nvcc - cxx-compiler - - libraft-headers==25.8.* - - librmm==25.8.* + - libraft-headers==25.10.* + - librmm==25.10.* specific: - output_types: conda matrices: - - matrix: {arch: x86_64} + - matrix: + arch: x86_64 packages: + - gcc_linux-64=14.* - sysroot_linux-64==2.28 - - matrix: {arch: aarch64} + - matrix: + arch: aarch64 packages: + - gcc_linux-aarch64=14.* - sysroot_linux-aarch64==2.28 - - output_types: conda - matrices: - - matrix: {arch: x86_64, cuda: "12.*"} - packages: - - gcc_linux-64=13.* - - cuda-nvcc - - matrix: {arch: aarch64, cuda: "12.*"} - packages: - - gcc_linux-aarch64=13.* - - cuda-nvcc cuda_version: specific: - output_types: conda @@ -76,19 +71,20 @@ dependencies: cuda: "12.9" packages: - cuda-version=12.9 - cuda: - specific: - - output_types: conda - matrices: - matrix: - cuda: "12.*" + cuda: "13.0" packages: - - cuda-cudart-dev - - cuda-nvtx-dev - - libcublas-dev - - libcurand-dev - - libcusolver-dev - - libcusparse-dev + - cuda-version=13.0 + cuda: + common: + - output_types: conda + packages: + - cuda-cudart-dev + - cuda-nvtx-dev + - libcublas-dev + - libcurand-dev + - libcusolver-dev + - libcusparse-dev develop: common: - output_types: [conda, requirements]