diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
index 9d35e3f..4775d28 100644
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@@ -13,6 +13,8 @@ ENV DEFAULT_CONDA_ENV=rapids
 
 FROM ${PYTHON_PACKAGE_MANAGER}-base
 
+ARG TARGETARCH
+
 ARG CUDA
 ENV CUDAARCHS="RAPIDS"
 ENV CUDA_VERSION="${CUDA_VERSION:-${CUDA}}"
@@ -24,7 +26,35 @@ ENV PYTHONSAFEPATH="1"
 ENV PYTHONUNBUFFERED="1"
 ENV PYTHONDONTWRITEBYTECODE="1"
 
+ENV HISTFILE="/home/coder/.cache/._bash_history"
+
+###
+# sccache configuration
+###
+ENV AWS_ROLE_ARN="arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs"
 ENV SCCACHE_REGION="us-east-2"
 ENV SCCACHE_BUCKET="rapids-sccache-devs"
-ENV VAULT_HOST="https://vault.ops.k8s.rapids.ai"
-ENV HISTFILE="/home/coder/.cache/._bash_history"
+# 2hr (1 minute longer than sccache-dist request timeout)
+ENV SCCACHE_IDLE_TIMEOUT=7200
+
+###
+# sccache-dist configuration
+###
+# Enable sccache-dist by default
+ENV DEVCONTAINER_UTILS_ENABLE_SCCACHE_DIST=1
+# Compile locally if max retries exceeded
+ENV SCCACHE_DIST_FALLBACK_TO_LOCAL_COMPILE=true
+# Retry transient errors 4 times (for a total of 5 attempts)
+ENV SCCACHE_DIST_MAX_RETRIES=4
+ENV SCCACHE_DIST_CONNECT_TIMEOUT=30
+ENV SCCACHE_DIST_CONNECTION_POOL=false
+# 1hr 59min (to accommodate debug builds)
+ENV SCCACHE_DIST_REQUEST_TIMEOUT=7140
+ENV SCCACHE_DIST_KEEPALIVE_ENABLED=true
+ENV SCCACHE_DIST_KEEPALIVE_INTERVAL=20
+ENV SCCACHE_DIST_KEEPALIVE_TIMEOUT=600
+ENV SCCACHE_DIST_URL="https://${TARGETARCH}.linux.sccache.rapids.nvidia.com"
+
+# Build as much in parallel as possible
+ENV INFER_NUM_DEVICE_ARCHITECTURES=1
+ENV MAX_DEVICE_OBJ_TO_COMPILE_IN_PARALLEL=20
diff --git a/.devcontainer/cuda12.9-conda/devcontainer.json b/.devcontainer/cuda12.9-conda/devcontainer.json
index 249d301..846f272 100644
--- a/.devcontainer/cuda12.9-conda/devcontainer.json
+++ b/.devcontainer/cuda12.9-conda/devcontainer.json
@@ -5,12 +5,19 @@
     "args": {
       "CUDA": "12.9",
       "PYTHON_PACKAGE_MANAGER": "conda",
-      "BASE": "rapidsai/devcontainers:25.08-cpp-mambaforge-ubuntu22.04"
+      "BASE": "rapidsai/devcontainers:25.10-cpp-mambaforge"
     }
   },
+  "runArgs": [
+    "--rm",
+    "--name",
+    "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.10-cuda12.9-conda",
+    "--ulimit",
+    "nofile=500000"
+  ],
   "hostRequirements": {"gpu": "optional"},
   "features": {
-    "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.8": {}
+    "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.10": {}
   },
   "overrideFeatureInstallOrder": [
     "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
diff --git a/.devcontainer/cuda12.9-pip/devcontainer.json b/.devcontainer/cuda12.9-pip/devcontainer.json
index cbe5d21..4f3a444 100644
--- a/.devcontainer/cuda12.9-pip/devcontainer.json
+++ b/.devcontainer/cuda12.9-pip/devcontainer.json
@@ -5,19 +5,26 @@
     "args": {
       "CUDA": "12.9",
       "PYTHON_PACKAGE_MANAGER": "pip",
-      "BASE": "rapidsai/devcontainers:25.08-cpp-cuda12.9-ubuntu22.04"
+      "BASE": "rapidsai/devcontainers:25.10-cpp-cuda12.9"
     }
   },
+  "runArgs": [
+    "--rm",
+    "--name",
+    "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.10-cuda12.9-pip",
+    "--ulimit",
+    "nofile=500000"
+  ],
   "hostRequirements": {"gpu": "optional"},
   "features": {
-    "ghcr.io/rapidsai/devcontainers/features/cuda:25.8": {
+    "ghcr.io/rapidsai/devcontainers/features/cuda:25.10": {
       "version": "12.9",
       "installcuBLAS": true,
       "installcuSOLVER": true,
       "installcuRAND": true,
       "installcuSPARSE": true
     },
-    "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.8": {}
+    "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.10": {}
   },
   "overrideFeatureInstallOrder": [
     "ghcr.io/rapidsai/devcontainers/features/cuda",
diff --git a/.devcontainer/cuda13.0-conda/devcontainer.json b/.devcontainer/cuda13.0-conda/devcontainer.json
new file mode 100644
index 0000000..ecdf70b
--- /dev/null
+++ b/.devcontainer/cuda13.0-conda/devcontainer.json
@@ -0,0 +1,44 @@
+{
+  "build": {
+    "context": "${localWorkspaceFolder}/.devcontainer",
+    "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile",
+    "args": {
+      "CUDA": "13.0",
+      "PYTHON_PACKAGE_MANAGER": "conda",
+      "BASE": "rapidsai/devcontainers:25.10-cpp-mambaforge"
+    }
+  },
+  "runArgs": [
+    "--rm",
+    "--name",
+    "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.10-cuda13.0-conda",
+    "--ulimit",
+    "nofile=500000"
+  ],
+  "hostRequirements": {"gpu": "optional"},
+  "features": {
+    "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.10": {}
+  },
+  "overrideFeatureInstallOrder": [
+    "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
+  ],
+  "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda13.0-envs}"],
+  "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"],
+  "workspaceFolder": "/home/coder",
+  "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/cumlprims_mg,type=bind,consistency=consistent",
+  "mounts": [
+    "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
+    "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
+    "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent",
+    "source=${localWorkspaceFolder}/../.conda/pkgs,target=/home/coder/.conda/pkgs,type=bind,consistency=consistent",
+    "source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda13.0-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent"
+  ],
+  "customizations": {
+    "vscode": {
+      "extensions": [
+        "ms-python.flake8",
+        "nvidia.nsight-vscode-edition"
+      ]
+    }
+  }
+}
diff --git a/.devcontainer/cuda13.0-pip/devcontainer.json b/.devcontainer/cuda13.0-pip/devcontainer.json
new file mode 100644
index 0000000..9fe5bb8
--- /dev/null
+++ b/.devcontainer/cuda13.0-pip/devcontainer.json
@@ -0,0 +1,51 @@
+{
+  "build": {
+    "context": "${localWorkspaceFolder}/.devcontainer",
+    "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile",
+    "args": {
+      "CUDA": "13.0",
+      "PYTHON_PACKAGE_MANAGER": "pip",
+      "BASE": "rapidsai/devcontainers:25.10-cpp-cuda13.0"
+    }
+  },
+  "runArgs": [
+    "--rm",
+    "--name",
+    "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.10-cuda13.0-pip",
+    "--ulimit",
+    "nofile=500000"
+  ],
+  "hostRequirements": {"gpu": "optional"},
+  "features": {
+    "ghcr.io/rapidsai/devcontainers/features/cuda:25.10": {
+      "version": "13.0",
+      "installcuBLAS": true,
+      "installcuSOLVER": true,
+      "installcuRAND": true,
+      "installcuSPARSE": true
+    },
+    "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.10": {}
+  },
+  "overrideFeatureInstallOrder": [
+    "ghcr.io/rapidsai/devcontainers/features/cuda",
+    "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
+  ],
+  "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda13.0-venvs}"],
+  "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"],
+  "workspaceFolder": "/home/coder",
+  "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/cumlprims_mg,type=bind,consistency=consistent",
+  "mounts": [
+    "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
+    "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
+    "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent",
+    "source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda13.0-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent"
+  ],
+  "customizations": {
+    "vscode": {
+      "extensions": [
+        "ms-python.flake8",
+        "nvidia.nsight-vscode-edition"
+      ]
+    }
+  }
+}
diff --git a/.github/release.yml b/.github/release.yml
new file mode 100644
index 0000000..2c9a858
--- /dev/null
+++ b/.github/release.yml
@@ -0,0 +1,27 @@
+# GitHub Auto-Generated Release Notes Configuration for RAPIDS
+# This file configures how GitHub automatically generates release notes
+
+changelog:
+  exclude:
+    labels:
+      - ignore-for-release
+      - dependencies
+    authors:
+      - rapids-bot[bot]
+      - dependabot[bot]
+  categories:
+    - title: 🚨 Breaking Changes
+      labels:
+        - breaking
+    - title: 🐛 Bug Fixes
+      labels:
+        - bug
+    - title: 📖 Documentation
+      labels:
+        - doc
+    - title: 🚀 New Features
+      labels:
+        - feature request
+    - title: 🛠️ Improvements
+      labels:
+        - improvement
diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index aac898a..7081eb7 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -34,7 +34,7 @@ concurrency:
 jobs:
   cpp-build:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.10
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -45,7 +45,7 @@ jobs:
     if: ${{ !startsWith(github.ref, 'refs/tags/') }}
     needs: [cpp-build]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-25.10
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -55,7 +55,7 @@ jobs:
     if: ${{ startsWith(github.ref, 'refs/tags/') }}
     needs: [cpp-build]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-25.10
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index f253400..ef976a4 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -14,7 +14,7 @@ jobs:
       - devcontainer
       - telemetry-setup
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-25.10
   telemetry-setup:
     runs-on: ubuntu-latest
     continue-on-error: true
@@ -29,27 +29,34 @@ jobs:
   checks:
     secrets: inherit
     needs: telemetry-setup
-    uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-25.10
     with:
       ignored_pr_jobs: telemetry-summarize
   conda-cpp-build:
     needs: checks
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.10
     with:
       build_type: pull-request
       script: ci/build_cpp.sh
   devcontainer:
     secrets: inherit
     needs: telemetry-setup
-    uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-25.10
     with:
-      arch: '["amd64"]'
-      cuda: '["12.9"]'
+      arch: '["amd64", "arm64"]'
+      cuda: '["13.0"]'
+      node_type: "cpu8"
+      rapids-aux-secret-1: GIST_REPO_READ_ORG_GITHUB_TOKEN
+      env: |
+        SCCACHE_DIST_MAX_RETRIES=inf
+        SCCACHE_SERVER_LOG=sccache=debug
+        SCCACHE_DIST_FALLBACK_TO_LOCAL_COMPILE=false
+        SCCACHE_DIST_AUTH_TOKEN_VAR=RAPIDS_AUX_SECRET_1
       build_command: |
-        sccache -z;
-        build-all --verbose;
-        sccache -s;
+        sccache --zero-stats;
+        build-all -j0 -DDISABLE_DEPRECATION_WARNINGS=ON --verbose 2>&1 | tee telemetry-artifacts/build.log;
+        sccache --show-adv-stats | tee telemetry-artifacts/sccache-stats.txt;
   telemetry-summarize:
     # This job must use a self-hosted runner to record telemetry traces.
     runs-on: linux-amd64-cpu4
diff --git a/.github/workflows/trigger-breaking-change-alert.yaml b/.github/workflows/trigger-breaking-change-alert.yaml
index 593fcb1..48bf37a 100644
--- a/.github/workflows/trigger-breaking-change-alert.yaml
+++ b/.github/workflows/trigger-breaking-change-alert.yaml
@@ -12,7 +12,7 @@ jobs:
   trigger-notifier:
     if: contains(github.event.pull_request.labels.*.name, 'breaking')
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/breaking-change-alert.yaml@branch-25.08
+    uses: rapidsai/shared-workflows/.github/workflows/breaking-change-alert.yaml@branch-25.10
     with:
       sender_login: ${{ github.event.sender.login }}
       sender_avatar: ${{ github.event.sender.avatar_url }}
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index bab091f..72eff35 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -4,24 +4,24 @@
 # To run: `pre-commit run --all-files`
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v5.0.0
+    rev: v6.0.0
     hooks:
       - id: check-added-large-files
       - id: debug-statements
       - id: mixed-line-ending
   - repo: https://github.com/pre-commit/mirrors-clang-format
-    rev: v20.1.4
+    rev: v21.1.0
     hooks:
       - id: clang-format
         types_or: [c, c++, cuda]
         args: ["-fallback-style=none", "-style=file", "-i"]
   - repo: https://github.com/rapidsai/dependency-file-generator
-    rev: v1.19.0
+    rev: v1.20.0
     hooks:
       - id: rapids-dependency-file-generator
-        args: ["--clean"]
+        args: ["--clean", "--warn-all", "--strict"]
   - repo: https://github.com/rapidsai/pre-commit-hooks
-    rev: v0.6.0
+    rev: v0.7.0
     hooks:
       - id: verify-copyright
         files: |
diff --git a/RAPIDS_BRANCH b/RAPIDS_BRANCH
new file mode 100644
index 0000000..9b1c52d
--- /dev/null
+++ b/RAPIDS_BRANCH
@@ -0,0 +1 @@
+branch-25.10
diff --git a/README.md b/README.md
index 603117e..fcce340 100644
--- a/README.md
+++ b/README.md
@@ -1,20 +1,20 @@
 # cuMLPrims
 
-This repository contains  C++ and CUDA code of muti-node multi-GPU (MNMG) ML mathematical primitives and some algorithms, that are used by [the main cuML project](https://github.com/rapidsai/cuml). The build system uses CMake for build configuration, and an out-of-source build is recommended.
+This repository contains C++ and CUDA code of multi-node multi-GPU (MNMG) ML mathematical primitives and some algorithms, that are used by [the main cuML project](https://github.com/rapidsai/cuml). The build system uses CMake for build configuration, and an out-of-source build is recommended.
 
-As of version 0.13, the MNMG code included in cuMLPrims follows the model one-process-per-GPU (OPG), where the code uses a communication library (based on cuML's comms) and each process has one GPU assigned to it. This is in contrast to single-process-multi-GPU (SPMG) approaches, which are no longer part of the code base.
+The MNMG code included in cuMLPrims follows the model one-process-per-GPU (OPG), where the code uses a communication library (based on cuML's comms) and each process has one GPU assigned to it. This is in contrast to single-process-multi-GPU (SPMG) approaches, which are no longer part of the code base.
 
 ## Folder Structure
 
-The folder structure mirrors closely the structure of github cuML. The folders are:
+The folder structure mirrors closely the structure of GitHub cuML. The folders are:
 
 - `ci`: Folders containing CI related scripts to run tests for each MR and create the conda packages.
-- `conda` Contains Conda recipe for `libcumlprims` Conda package in the `rapidsai` channel.
+- `conda`: Contains Conda recipe for `libcumlprims` Conda package in the `rapidsai` channel.
 - `cpp`: Contains the source code.
     - `cpp/cmake`: CMake related scripts.
     - `cpp/include`: The include folder for headers that are necessary to be installed/distributed to use the libcumlprims.so artifact by users of the library.
     - `cpp/src_prims_opg`: Contains source code for MNMG ML primitives. It also contains source code for algorithms that use the primitives that are still included in cuMLPrims as opposed to cuML.
-    - `cpp/test`: Googletest based unit tests (work in progress).
+    - `cpp/test`: Googletest based unit tests.
 
 ## Building cuMLPrims:
 
@@ -22,22 +22,20 @@ The folder structure mirrors closely the structure of github cuML. The folders a
 
 The main artifact produced by the build system is the shared library libcumlprims. Ensure the following dependencies are satisfied:
 
-1. CMake (>= 3.14.5)
-2. CUDA (>= 10.0)
-3. GCC (>= 5.4.0)
-4. NCCL (>= 2.4.6.1)
-5. UCX with CUDA support (optional)(>=1.7) - enables point-to-point messaging in the cuML communicator.
-6. zlib
+1. CMake (>= 3.30.4)
+2. CUDA (>= 12.0)
+3. GCC (>= 14)
+4. NCCL (>= 2.5)
 
-It is recommended to use conda for environment/package management. If doing so, it is recommended to use the convenience environment .yml file located in [**the cuML repository**, in `conda/environments/cuml_dev_cudax.y.yml` (replace x.y for your CUDA version)](https://github.com/rapidsai/cuml/tree/branch-0.14/conda/environments). This file contains most of the dependencies ment1ioned above (notable exceptions are gcc and zlib). To use it, for example to create an environment named cuml_dev for CUDA 10.0 and Python 3.7, you can use the follow command:
+It is recommended to use conda for environment/package management. See `conda/environments/` for available environment files.
 
 ```bash
-conda env create -n cuml_dev python=3.7 --file=conda/environments/cuml_dev_cuda10.0.yml
+conda env create --name cumlprims_dev --file conda/environments/all_cuda-130_arch-$(arch).yaml
 ```
 
 ### Using build.sh script
 
-As a convenience, a `build.sh` script is provided which can be used to execute the build commands in an automated manner.  Note that the libraries will be installed to the location set in `$INSTALL_PREFIX` if set (i.e. `export INSTALL_PREFIX=/install/path`), otherwise to `$CONDA_PREFIX`.
+As a convenience, a `build.sh` script is provided which can be used to execute the build commands in an automated manner. Note that the libraries will be installed to the location set in `$INSTALL_PREFIX` if set (i.e. `export INSTALL_PREFIX=/install/path`), otherwise to `$CONDA_PREFIX`.
 ```bash
 $ ./build.sh                           # build the libcuml library and tests
                                        # install them to $INSTALL_PREFIX if set, otherwise $CONDA_PREFIX
@@ -46,12 +44,12 @@ $ ./build.sh                           # build the libcuml library and tests
 Other `build.sh` options:
 
 ```bash
-$ ./build.sh clean                     # remove any prior build artifacts and configuration (start over)
+$ ./build.sh clean                          # remove any prior build artifacts and configuration (start over)
 $ ./build.sh libcumlprims -v                # build and install libcumlprims with verbose output
 $ ./build.sh libcumlprims -g                # build and install libcumlprims for debug
 $ PARALLEL_LEVEL=4 ./build.sh libcumlprims  # build and install libcumlprims limiting parallel build jobs to 4 (make -j4)
-$ ./build.sh libcuml -n                # build libcuml but do not install
-$ ./build.sh libcumlprims --allgpuarch        # build the  tests for all supported GPU architectures
+$ ./build.sh libcuml -n                     # build libcuml but do not install
+$ ./build.sh libcumlprims --allgpuarch      # build the tests for all supported GPU architectures
 
 ```
 
@@ -78,13 +76,13 @@ If using a conda environment (recommended), then cmake can be configured appropr
 $ cmake .. -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX
 ```
 
-Note: The following warning message is dependent upon the version of cmake and the `CMAKE_INSTALL_PREFIX` used. If this warning is displayed, the build should still run succesfully. We are currently working to resolve this open issue. You can silence this warning by adding `-DCMAKE_IGNORE_PATH=$CONDA_PREFIX/lib` to your `cmake` command.
+Note: The following warning message is dependent upon the version of cmake and the `CMAKE_INSTALL_PREFIX` used. If this warning is displayed, the build should still run successfully. We are currently working to resolve this open issue. You can silence this warning by adding `-DCMAKE_IGNORE_PATH=$CONDA_PREFIX/lib` to your `cmake` command.
 ```
 Cannot generate a safe runtime search path for target ml_test because files
 in some directories may conflict with libraries in implicit directories:
 ```
 
-There are many options to configure the build process, see the [customizing build section](#libcuml-&-libcumlc++).
+There are many options to configure the build process, see the [customizing build section](#custom-build-options).
 
 3. Build `libcumlprims`:
 
@@ -107,11 +105,7 @@ cuMLPrims CMake has the following configurable flags available:
 
 | Flag | Possible Values | Default Value | Behavior |
 | --- | --- | --- | --- |
-| BUILD_OPG_TESTS | [ON, OFF] | OFF | Build MPI cumlcomms based C++ unit tests (in progress, refer to step 4. of the build steps).  |
-| BUILD_CUMLPRIMS_LIBRARY | [ON, OFF]  | ON  | Enable/disable building libcumprims shared library. |
-| CMAKE_CXX11_ABI | [ON, OFF]  | ON  | Enable/disable the GLIBCXX11 ABI  |
-| DISABLE_OPENMP | [ON, OFF]  | OFF  | Set to `ON` to disable OpenMP  |
-| KERNEL_INFO | [ON, OFF]  | OFF  | Enable/disable kernel resource usage info in nvcc. |
-| LINE_INFO | [ON, OFF]  | OFF  | Enable/disable lineinfo in nvcc.  |
-| NVTX | [ON, OFF]  | OFF  | Enable/disable nvtx markers in libcumlprims.  |
-| GPU_ARCHS |  List of GPU architectures, semicolon-separated | 60;70;75  | List of GPU architectures that all artifacts are compiled for.  |
+| BUILD_OPG_TESTS | [ON, OFF] | OFF | Build MPI cumlcomms based C++ unit tests (in progress, refer to step 4. of the build steps). |
+| BUILD_CUMLPRIMS_LIBRARY | [ON, OFF] | ON | Enable/disable building libcumlprims shared library. |
+| DISABLE_OPENMP | [ON, OFF] | OFF | Set to `ON` to disable OpenMP |
+| NVTX | [ON, OFF] | OFF | Enable/disable nvtx markers in libcumlprims.|
diff --git a/VERSION b/VERSION
index 3af4bda..296e352 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-25.08.00
+25.10.00
diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh
index cccdf51..0e82f27 100755
--- a/ci/release/update-version.sh
+++ b/ci/release/update-version.sh
@@ -55,4 +55,5 @@ find .devcontainer/ -type f -name devcontainer.json -print0 | while IFS= read -r
     sed_runner "s@rapidsai/devcontainers:[0-9.]*@rapidsai/devcontainers:${NEXT_SHORT_TAG}@g" "${filename}"
     sed_runner "s@rapidsai/devcontainers/features/cuda:[0-9.]*@rapidsai/devcontainers/features/cuda:${NEXT_SHORT_TAG_PEP440}@" "${filename}"
     sed_runner "s@rapidsai/devcontainers/features/rapids-build-utils:[0-9.]*@rapidsai/devcontainers/features/rapids-build-utils:${NEXT_SHORT_TAG_PEP440}@" "${filename}"
+    sed_runner "s@rapids-\${localWorkspaceFolderBasename}-[0-9.]*@rapids-\${localWorkspaceFolderBasename}-${NEXT_SHORT_TAG}@g" "${filename}"
 done
diff --git a/cmake/RAPIDS.cmake b/cmake/RAPIDS.cmake
index 8f04915..65c33b0 100644
--- a/cmake/RAPIDS.cmake
+++ b/cmake/RAPIDS.cmake
@@ -18,9 +18,9 @@
 cmake_minimum_required(VERSION 3.30.4 FATAL_ERROR)
 
 # Allow users to control which version is used
-if(NOT rapids-cmake-version OR NOT rapids-cmake-version MATCHES [[^([0-9][0-9])\.([0-9][0-9])$]])
+if(NOT rapids-cmake-branch OR NOT rapids-cmake-version)
   message(
-    FATAL_ERROR "The CMake variable rapids-cmake-version must be defined in the format MAJOR.MINOR."
+    FATAL_ERROR "The CMake variable `rapids-cmake-branch` or `rapids-cmake-version` must be defined"
   )
 endif()
 
@@ -33,7 +33,7 @@ endif()
 # Allow users to control which branch is fetched
 if(NOT rapids-cmake-branch)
   # Define a default branch if the user doesn't set one
-  set(rapids-cmake-branch "branch-${rapids-cmake-version}")
+  set(rapids-cmake-branch "release/${rapids-cmake-version}")
 endif()
 
 # Allow users to control the exact URL passed to FetchContent
diff --git a/cmake/rapids_config.cmake b/cmake/rapids_config.cmake
index 5aa8e55..5f02b05 100644
--- a/cmake/rapids_config.cmake
+++ b/cmake/rapids_config.cmake
@@ -25,5 +25,15 @@ else()
       "Could not determine RAPIDS version. Contents of VERSION file:\n${_rapids_version_formatted}")
 endif()
 
+# Use STRINGS to trim whitespace/newlines
+file(STRINGS "${CMAKE_CURRENT_LIST_DIR}/../RAPIDS_BRANCH" _rapids_branch)
+if(NOT _rapids_branch)
+  message(
+    FATAL_ERROR
+      "Could not determine branch name to use for checking out rapids-cmake. The file \"${CMAKE_CURRENT_LIST_DIR}/../RAPIDS_BRANCH\" is missing."
+  )
+endif()
+
 set(rapids-cmake-version "${RAPIDS_VERSION_MAJOR_MINOR}")
+set(rapids-cmake-branch "${_rapids_branch}")
 include("${CMAKE_CURRENT_LIST_DIR}/RAPIDS.cmake")
diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml
index b321b20..555e19a 100644
--- a/conda/environments/all_cuda-129_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-129_arch-aarch64.yaml
@@ -14,13 +14,13 @@ dependencies:
 - cuda-nvtx-dev
 - cuda-version=12.9
 - cxx-compiler
-- gcc_linux-aarch64=13.*
+- gcc_linux-aarch64=14.*
 - libcublas-dev
 - libcurand-dev
 - libcusolver-dev
 - libcusparse-dev
-- libraft-headers==25.8.*
-- librmm==25.8.*
+- libraft-headers==25.10.*
+- librmm==25.10.*
 - ninja
 - pre-commit
 - python>=3.10,<3.14
diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml
index 563668c..9c4bf94 100644
--- a/conda/environments/all_cuda-129_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-129_arch-x86_64.yaml
@@ -14,13 +14,13 @@ dependencies:
 - cuda-nvtx-dev
 - cuda-version=12.9
 - cxx-compiler
-- gcc_linux-64=13.*
+- gcc_linux-64=14.*
 - libcublas-dev
 - libcurand-dev
 - libcusolver-dev
 - libcusparse-dev
-- libraft-headers==25.8.*
-- librmm==25.8.*
+- libraft-headers==25.10.*
+- librmm==25.10.*
 - ninja
 - pre-commit
 - python>=3.10,<3.14
diff --git a/conda/environments/all_cuda-130_arch-aarch64.yaml b/conda/environments/all_cuda-130_arch-aarch64.yaml
new file mode 100644
index 0000000..5c74279
--- /dev/null
+++ b/conda/environments/all_cuda-130_arch-aarch64.yaml
@@ -0,0 +1,28 @@
+# This file is generated by `rapids-dependency-file-generator`.
+# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
+channels:
+- rapidsai
+- rapidsai-nightly
+- conda-forge
+dependencies:
+- c-compiler
+- clang-tools==20.1.4
+- clang==20.1.4
+- cmake>=3.30.4
+- cuda-cudart-dev
+- cuda-nvcc
+- cuda-nvtx-dev
+- cuda-version=13.0
+- cxx-compiler
+- gcc_linux-aarch64=14.*
+- libcublas-dev
+- libcurand-dev
+- libcusolver-dev
+- libcusparse-dev
+- libraft-headers==25.10.*
+- librmm==25.10.*
+- ninja
+- pre-commit
+- python>=3.10,<3.14
+- sysroot_linux-aarch64==2.28
+name: all_cuda-130_arch-aarch64
diff --git a/conda/environments/all_cuda-130_arch-x86_64.yaml b/conda/environments/all_cuda-130_arch-x86_64.yaml
new file mode 100644
index 0000000..70f83f3
--- /dev/null
+++ b/conda/environments/all_cuda-130_arch-x86_64.yaml
@@ -0,0 +1,28 @@
+# This file is generated by `rapids-dependency-file-generator`.
+# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
+channels:
+- rapidsai
+- rapidsai-nightly
+- conda-forge
+dependencies:
+- c-compiler
+- clang-tools==20.1.4
+- clang==20.1.4
+- cmake>=3.30.4
+- cuda-cudart-dev
+- cuda-nvcc
+- cuda-nvtx-dev
+- cuda-version=13.0
+- cxx-compiler
+- gcc_linux-64=14.*
+- libcublas-dev
+- libcurand-dev
+- libcusolver-dev
+- libcusparse-dev
+- libraft-headers==25.10.*
+- librmm==25.10.*
+- ninja
+- pre-commit
+- python>=3.10,<3.14
+- sysroot_linux-64==2.28
+name: all_cuda-130_arch-x86_64
diff --git a/conda/recipes/libcumlprims/conda_build_config.yaml b/conda/recipes/libcumlprims/conda_build_config.yaml
index caf5489..1082f0d 100644
--- a/conda/recipes/libcumlprims/conda_build_config.yaml
+++ b/conda/recipes/libcumlprims/conda_build_config.yaml
@@ -1,8 +1,8 @@
 c_compiler_version:
-  - 13
+  - 14
 
 cxx_compiler_version:
-  - 13
+  - 14
 
 cuda_compiler:
   - cuda-nvcc
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index cb28690..1e82405 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -54,6 +54,7 @@ option(CUMLPRIMS_MG_RAFT_CLONE_ON_PIN "Explicitly clone RAFT branch when pinned
 option(DETECT_CONDA_ENV "Enable detection of conda environment for dependencies" ON)
 option(DISABLE_OPENMP "Disable OpenMP" OFF)
 option(NVTX "Enable nvtx markers" OFF)
+option(DISABLE_DEPRECATION_WARNINGS "Disable warnings generated from deprecated declarations." OFF)
 
 if(BUILD_TESTS)
   message(STATUS "CUMLPRIMS_MG: Setting BUILD_TESTS to OFF because they don't compile")
@@ -279,4 +280,4 @@ rapids_export(
 # include(cmake/doxygen.cmake)
 # add_doxygen_target(IN_DOXYFILE src_prims/Doxyfile.in
 #   OUT_DOXYFILE ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile
-#   CWD ${CMAKE_CURRENT_BINARY_DIR})
\ No newline at end of file
+#   CWD ${CMAKE_CURRENT_BINARY_DIR})
diff --git a/cpp/cmake/modules/ConfigureCUDA.cmake b/cpp/cmake/modules/ConfigureCUDA.cmake
index f471714..e51226c 100644
--- a/cpp/cmake/modules/ConfigureCUDA.cmake
+++ b/cpp/cmake/modules/ConfigureCUDA.cmake
@@ -24,19 +24,14 @@ list(APPEND CUMLPRIMS_MG_CUDA_FLAGS --expt-extended-lambda --expt-relaxed-conste
 # list(APPEND CUMLPRIMS_MG_CUDA_FLAGS -Werror=cross-execution-space-call)
 # list(APPEND CUMLPRIMS_MG_CUDA_FLAGS -Xcompiler=-Wall,-Werror,-Wno-error=deprecated-declarations)
 
-if(DISABLE_DEPRECATION_WARNING)
-    list(APPEND CUMLPRIMS_MG_CXX_FLAGS -Wno-deprecated-declarations)
-    list(APPEND CUMLPRIMS_MG_CUDA_FLAGS -Xcompiler=-Wno-deprecated-declarations)
+if(DISABLE_DEPRECATION_WARNINGS)
+    list(APPEND CUMLPRIMS_MG_CXX_FLAGS -Wno-deprecated-declarations -DRAFT_HIDE_DEPRECATION_WARNINGS)
+    list(APPEND CUMLPRIMS_MG_CUDA_FLAGS -Xcompiler=-Wno-deprecated-declarations -DRAFT_HIDE_DEPRECATION_WARNINGS)
 endif()
 
 # make sure we produce smallest binary size
-list(APPEND CUMLPRIMS_MG_CUDA_FLAGS -Xfatbin=-compress-all)
-if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA"
-   AND (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.9 AND CMAKE_CUDA_COMPILER_VERSION
-                                                                   VERSION_LESS 13.0)
-)
-  list(APPEND CUMLPRIMS_MG_CUDA_FLAGS -Xfatbin=--compress-level=3)
-endif()
+include(${rapids-cmake-dir}/cuda/enable_fatbin_compression.cmake)
+rapids_cuda_enable_fatbin_compression(VARIABLE CUMLPRIMS_MG_CUDA_FLAGS TUNE_FOR rapids)
 
 # Option to enable line info in CUDA device compilation to allow introspection when profiling / memchecking
 if(CUDA_ENABLE_LINEINFO)
diff --git a/cpp/cmake/thirdparty/get_raft.cmake b/cpp/cmake/thirdparty/get_raft.cmake
index c503006..698b690 100644
--- a/cpp/cmake/thirdparty/get_raft.cmake
+++ b/cpp/cmake/thirdparty/get_raft.cmake
@@ -1,5 +1,5 @@
 #=============================================================================
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -20,7 +20,7 @@ function(find_and_configure_raft)
     cmake_parse_arguments(PKG "${options}" "${oneValueArgs}"
                           "${multiValueArgs}" ${ARGN} )
 
-    if(PKG_CLONE_ON_PIN AND NOT PKG_PINNED_TAG STREQUAL "branch-${CUMLPRIMS_MG_BRANCH_VERSION_raft}")
+    if(PKG_CLONE_ON_PIN AND NOT PKG_PINNED_TAG STREQUAL "${rapids-cmake-checkout-tag}")
       message(STATUS "CUMLPRIMS_MG: RAFT pinned tag found: ${PKG_PINNED_TAG}. Cloning raft locally.")
       set(CPM_DOWNLOAD_raft ON)
     elseif(PKG_USE_RAFT_STATIC AND (NOT CPM_raft_SOURCE))
@@ -53,13 +53,12 @@ function(find_and_configure_raft)
 endfunction()
 
 set(CUMLPRIMS_MG_MIN_VERSION_raft "${CUMLPRIMS_MG_VERSION_MAJOR}.${CUMLPRIMS_MG_VERSION_MINOR}.00")
-set(CUMLPRIMS_MG_BRANCH_VERSION_raft "${CUMLPRIMS_MG_VERSION_MAJOR}.${CUMLPRIMS_MG_VERSION_MINOR}")
 
 # Change pinned tag here to test a commit in CI
 # To use a different RAFT locally, set the CMake variable
 # CPM_raft_SOURCE=/path/to/local/raft
 find_and_configure_raft(VERSION          ${CUMLPRIMS_MG_MIN_VERSION_raft}
                         FORK             rapidsai
-                        PINNED_TAG       branch-${CUMLPRIMS_MG_BRANCH_VERSION_raft}
+                        PINNED_TAG       ${rapids-cmake-checkout-tag}
                         CLONE_ON_PIN     ${CUMLPRIMS_MG_RAFT_CLONE_ON_PIN}
                         )
diff --git a/dependencies.yaml b/dependencies.yaml
index dbbee3c..b4a1eb0 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -3,7 +3,7 @@ files:
   all:
     output: conda
     matrix:
-      cuda: ["12.9"]
+      cuda: ["12.9", "13.0"]
       arch: [x86_64, aarch64]
     includes:
       - build_cpp
@@ -30,28 +30,23 @@ dependencies:
       - output_types: conda
         packages:
           - c-compiler
+          - cuda-nvcc
           - cxx-compiler
-          - libraft-headers==25.8.*
-          - librmm==25.8.*
+          - libraft-headers==25.10.*
+          - librmm==25.10.*
     specific:
       - output_types: conda
         matrices:
-          - matrix: {arch: x86_64}
+          - matrix:
+              arch: x86_64
             packages:
+              - gcc_linux-64=14.*
               - sysroot_linux-64==2.28
-          - matrix: {arch: aarch64}
+          - matrix:
+              arch: aarch64
             packages:
+              - gcc_linux-aarch64=14.*
               - sysroot_linux-aarch64==2.28
-      - output_types: conda
-        matrices:
-          - matrix: {arch: x86_64, cuda: "12.*"}
-            packages:
-              - gcc_linux-64=13.*
-              - cuda-nvcc
-          - matrix: {arch: aarch64, cuda: "12.*"}
-            packages:
-              - gcc_linux-aarch64=13.*
-              - cuda-nvcc
   cuda_version:
     specific:
       - output_types: conda
@@ -76,19 +71,20 @@ dependencies:
               cuda: "12.9"
             packages:
               - cuda-version=12.9
-  cuda:
-    specific:
-      - output_types: conda
-        matrices:
           - matrix:
-              cuda: "12.*"
+              cuda: "13.0"
             packages:
-              - cuda-cudart-dev
-              - cuda-nvtx-dev
-              - libcublas-dev
-              - libcurand-dev
-              - libcusolver-dev
-              - libcusparse-dev
+              - cuda-version=13.0
+  cuda:
+    common:
+      - output_types: conda
+        packages:
+          - cuda-cudart-dev
+          - cuda-nvtx-dev
+          - libcublas-dev
+          - libcurand-dev
+          - libcusolver-dev
+          - libcusparse-dev
   develop:
     common:
       - output_types: [conda, requirements]