rapidsai · rapids-bot · Jan 10, 2026 · Jan 8, 2026 · Jan 8, 2026 · Jan 8, 2026
@@ -334,7 +334,7 @@ jobs:
     uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@cuda-13.1.0
     with:
       arch: '["amd64", "arm64"]'
-      cuda: '["13.0"]'
+      cuda: '["13.1"]'
       node_type: "cpu8"
       rapids-aux-secret-1: GIST_REPO_READ_ORG_GITHUB_TOKEN
       env: |

@@ -192,6 +192,8 @@ if(NOT BUILD_CPU_ONLY)
   rapids_cpm_cccl(BUILD_EXPORT_SET cuvs-exports INSTALL_EXPORT_SET cuvs-exports)
   include(cmake/thirdparty/get_raft.cmake)
   include(cmake/thirdparty/get_cutlass.cmake)
+  include(${rapids-cmake-dir}/cpm/cuco.cmake)
+  rapids_cpm_cuco()
 endif()
 
 if(BUILD_TESTS OR BUILD_C_TESTS)
@@ -315,7 +317,10 @@ if(NOT BUILD_CPU_ONLY)
                CUDA_SEPARABLE_COMPILATION ON
                POSITION_INDEPENDENT_CODE ON
   )
-  target_link_libraries(cuvs-cagra-search PRIVATE cuvs::cuvs_cpp_headers)
+  target_link_libraries(
+    cuvs-cagra-search PRIVATE cuvs::cuvs_cpp_headers
+                              $<TARGET_NAME_IF_EXISTS:nvidia::cutlass::cutlass>
+  )
   target_compile_options(
     cuvs-cagra-search PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:${CUVS_CXX_FLAGS}>"
                               "$<$<COMPILE_LANGUAGE:CUDA>:${CUVS_CUDA_FLAGS}>"
@@ -566,8 +571,12 @@ if(NOT BUILD_CPU_ONLY)
 
   target_link_libraries(
     cuvs_objs
-    PRIVATE cuvs::cuvs_cpp_headers ${CUVS_CTK_MATH_DEPENDENCIES}
-            $<TARGET_NAME_IF_EXISTS:OpenMP::OpenMP_CXX> $<TARGET_NAME_IF_EXISTS:hnswlib::hnswlib>
+    PRIVATE cuvs::cuvs_cpp_headers
+            cuco::cuco
+            nvidia::cutlass::cutlass
+            ${CUVS_CTK_MATH_DEPENDENCIES}
+            $<TARGET_NAME_IF_EXISTS:OpenMP::OpenMP_CXX>
+            $<TARGET_NAME_IF_EXISTS:hnswlib::hnswlib>
   )
 
   # Endian detection
@@ -637,7 +646,8 @@ if(NOT BUILD_CPU_ONLY)
              $<BUILD_LOCAL_INTERFACE:$<TARGET_NAME_IF_EXISTS:NCCL::NCCL>>
              $<BUILD_LOCAL_INTERFACE:$<TARGET_NAME_IF_EXISTS:hnswlib::hnswlib>>
              $<$<BOOL:${CUVS_NVTX}>:CUDA::nvtx3>
-      PRIVATE nvidia::cutlass::cutlass $<TARGET_NAME_IF_EXISTS:OpenMP::OpenMP_CXX>
+      PRIVATE $<TARGET_NAME_IF_EXISTS:OpenMP::OpenMP_CXX> $<COMPILE_ONLY:nvidia::cutlass::cutlass>
+              $<COMPILE_ONLY:cuco::cuco>
     )
 
     # ensure CUDA symbols aren't relocated to the middle of the debug build binaries
@@ -692,8 +702,8 @@ SECTIONS
              ${CUVS_CTK_MATH_DEPENDENCIES}
              $<TARGET_NAME_IF_EXISTS:NCCL::NCCL> # needs to be public for DT_NEEDED
              $<BUILD_LOCAL_INTERFACE:$<TARGET_NAME_IF_EXISTS:hnswlib::hnswlib>> # header only
-      PRIVATE nvidia::cutlass::cutlass $<TARGET_NAME_IF_EXISTS:OpenMP::OpenMP_CXX>
-              $<$<BOOL:${CUVS_NVTX}>:CUDA::nvtx3>
+      PRIVATE $<TARGET_NAME_IF_EXISTS:OpenMP::OpenMP_CXX> $<$<BOOL:${CUVS_NVTX}>:CUDA::nvtx3>
+              $<COMPILE_ONLY:nvidia::cutlass::cutlass> $<COMPILE_ONLY:cuco::cuco>
     )
   endif()
 

@@ -0,0 +1,31 @@
+From 661c7e679ac72926d619da46834d09f52a727f5e Mon Sep 17 00:00:00 2001
+From: Robert Maynard <rmaynard@nvidia.com>
+Date: Tue, 5 Aug 2025 15:05:57 -0400
+Subject: [PATCH] Support both CUDA 12 and 13 cccl header locations
+
+---
+ CMakeLists.txt | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/CMakeLists.txt b/CMakeLists.txt
+index 38dcca9f..4088b71f 100755
+--- a/CMakeLists.txt
++++ b/CMakeLists.txt
+@@ -704,8 +704,14 @@ target_include_directories(
+   CUTLASS
+   SYSTEM INTERFACE
+   $<BUILD_INTERFACE:${CUDA_TOOLKIT_ROOT_DIR}/include>
+-  $<BUILD_INTERFACE:${CUDA_TOOLKIT_ROOT_DIR}/include/cccl>
+   )
++if(CUDA_VERSION VERSION_GREATER_EQUAL 13.0)
++  target_include_directories(
++    CUTLASS
++    SYSTEM INTERFACE
++    $<BUILD_INTERFACE:${CUDA_TOOLKIT_ROOT_DIR}/include/cccl>
++    )
++endif()
+
+ install(
+   DIRECTORY
+--
+2.39.5 (Apple Git-154)
@@ -1,13 +1,13 @@
 {
   "packages" : {
     "cutlass" : {
-      "version": "3.5.1",
+      "version": "4.1.0",
       "git_url": "https://github.com/NVIDIA/cutlass.git",
       "git_tag": "v${version}",
       "patches" : [
         {
-          "file" : "${current_json_dir}/cutlass/build-export.patch",
-          "issue" : "Fix build directory export",
+          "file" : "${current_json_dir}/cutlass/Support-both-CUDA-12-and-13-cccl-header-locations.patch",
+          "issue" : "Support CUDA 12 CTK layout[https://github.com/NVIDIA/cutlass/pull/2543]",
           "fixed_in" : ""
         }
       ]

@@ -1,6 +1,6 @@
 # =============================================================================
 # cmake-format: off
-# SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 # cmake-format: on
 # =============================================================================
@@ -24,9 +24,7 @@ function(find_and_configure_cutlass)
       CACHE BOOL "Disable CUTLASS to build with cuBLAS library."
   )
 
-  if (CUDA_STATIC_RUNTIME)
-    set(CUDART_LIBRARY "${CUDA_cudart_static_LIBRARY}" CACHE FILEPATH "fixing cutlass cmake code" FORCE)
-  endif()
+  set(CUDART_LIBRARY "${CUDA_cudart_static_LIBRARY}" CACHE FILEPATH "fixing cutlass cmake code" FORCE)
 
   include("${rapids-cmake-dir}/cpm/package_override.cmake")
   rapids_cpm_package_override("${CMAKE_CURRENT_FUNCTION_LIST_DIR}/../patches/cutlass_override.json")
@@ -42,6 +40,7 @@ function(find_and_configure_cutlass)
     NvidiaCutlass ${version} ${find_args}
     GLOBAL_TARGETS nvidia::cutlass::cutlass
     CPM_ARGS ${cpm_args}
+    EXCLUDE_FROM_ALL ON
     OPTIONS "CUDAToolkit_ROOT ${CUDAToolkit_LIBRARY_DIR}"
   )
 

@@ -0,0 +1,19 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#pragma once
+
+#include <raft/core/error.hpp>
+
+namespace cuvs {
+
+/**
+ * @brief Exception thrown when a CUTLASS error is encountered.
+ */
+struct cutlass_error : public raft::exception {
+  explicit cutlass_error(char const* const message) : raft::exception(message) {}
+  explicit cutlass_error(std::string const& message) : raft::exception(message) {}
+};
+
+}  // namespace cuvs
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION.
+ * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION.
  * SPDX-License-Identifier: Apache-2.0
  */
 
@@ -15,10 +15,10 @@
 #define cutlass cuvs_cutlass
 #endif
 
-#include "epilogue_elementwise.cuh"     // FusedDistanceNNEpilogueElementwise
-#include "gemm.h"                       // FusedDistanceNNGemm
-#include <raft/util/cudart_utils.hpp>   // getMultiProcessorCount
-#include <raft/util/cutlass_utils.cuh>  // RAFT_CUTLASS_TRY
+#include "../../../util/cutlass_utils.hpp"  // CUVS_CUTLASS_TRY
+#include "epilogue_elementwise.cuh"         // FusedDistanceNNEpilogueElementwise
+#include "gemm.h"                           // FusedDistanceNNGemm
+#include <raft/util/cudart_utils.hpp>       // getMultiProcessorCount
 
 #include <rmm/device_uvector.hpp>
 
@@ -152,11 +152,11 @@ void cutlassFusedDistanceNN(const DataT* x,
   // Instantiate CUTLASS kernel depending on templates
   fusedDistanceNN fusedDistanceNN_op;
   // Check the problem size is supported or not
-  RAFT_CUTLASS_TRY(fusedDistanceNN_op.can_implement(arguments));
+  CUVS_CUTLASS_TRY(fusedDistanceNN_op.can_implement(arguments));
   // Initialize CUTLASS kernel with arguments and workspace pointer
-  RAFT_CUTLASS_TRY(fusedDistanceNN_op.initialize(arguments, workspace.data(), stream));
+  CUVS_CUTLASS_TRY(fusedDistanceNN_op.initialize(arguments, workspace.data(), stream));
   // Launch initialized CUTLASS kernel
-  RAFT_CUTLASS_TRY(fusedDistanceNN_op.run(stream));
+  CUVS_CUTLASS_TRY(fusedDistanceNN_op.run(stream));
 }
 
 };  // namespace detail

@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2018-2024, NVIDIA CORPORATION.
+ * SPDX-FileCopyrightText: Copyright (c) 2018-2026, NVIDIA CORPORATION.
  * SPDX-License-Identifier: Apache-2.0
  */
 
@@ -18,8 +18,8 @@
 #include "pairwise_distance_epilogue_elementwise.h"
 #include "pairwise_distance_gemm.h"
 
+#include "../../util/cutlass_utils.hpp"
 #include "distance_ops/cutlass.cuh"
-#include <raft/util/cutlass_utils.cuh>
 
 #include <rmm/device_uvector.hpp>
 
@@ -157,13 +157,13 @@ std::enable_if_t<ops::has_cutlass_op<OpT>::value> cutlassDistanceKernel(const Da
     // Instantiate CUTLASS kernel depending on templates
     cutlassDist cutlassDist_op;
     // Check the problem size is supported or not
-    RAFT_CUTLASS_TRY(cutlassDist_op.can_implement(arguments));
+    CUVS_CUTLASS_TRY(cutlassDist_op.can_implement(arguments));
 
     // Initialize CUTLASS kernel with arguments and workspace pointer
-    RAFT_CUTLASS_TRY(cutlassDist_op.initialize(arguments, workspace.data(), stream));
+    CUVS_CUTLASS_TRY(cutlassDist_op.initialize(arguments, workspace.data(), stream));
 
     // Launch initialized CUTLASS kernel
-    RAFT_CUTLASS_TRY(cutlassDist_op(stream));
+    CUVS_CUTLASS_TRY(cutlassDist_op(stream));
   }
 }
 

@@ -0,0 +1,33 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#pragma once
+
+#include <cuvs/util/cutlass_utils.hpp>
+
+#include <cutlass/cutlass.h>
+
+/**
+ * @brief Error checking macro for CUTLASS functions.
+ *
+ * Invokes a CUTLASS function call, if the call does not return cutlass::Status::kSuccess,
+ * throws an exception detailing the CUTLASS error that occurred. This macro
+ * is only available internally to cuvs and as such the file differs from
+ * the one found in cuvs/include/util
+ *
+ */
+#define CUVS_CUTLASS_TRY(call)                        \
+  do {                                                \
+    cutlass::Status const status = call;              \
+    if (status != cutlass::Status::kSuccess) {        \
+      std::string msg{};                              \
+      SET_ERROR_MSG(msg,                              \
+                    "CUTLASS error encountered at: ", \
+                    "call='%s', Reason=%s",           \
+                    #call,                            \
+                    cutlassGetStatusString(status));  \
+      throw cuvs::cutlass_error(msg);                 \
+    }                                                 \
+  } while (0)