rapidsai · maxwbuckley · Mar 22, 2026 · Mar 22, 2026 · bdice · Mar 22, 2026
@@ -76,6 +76,16 @@ create_logger_macros(RMM "rmm::default_logger()" include/rmm)
 include(cmake/thirdparty/get_cccl.cmake)
 include(cmake/thirdparty/get_nvtx.cmake)
 
+# Mark third-party includes as SYSTEM to suppress warnings from upstream headers
+foreach(_dep rapids_logger spdlog)
+  if(TARGET ${_dep})
+    get_target_property(_inc ${_dep} INTERFACE_INCLUDE_DIRECTORIES)
+    if(_inc)
+      set_target_properties(${_dep} PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${_inc}")
+    endif()
+  endif()
+endforeach()
+
 # ##################################################################################################
 # * library targets --------------------------------------------------------------------------------
 
@@ -128,11 +138,20 @@ if(RMM_NVTX)
   target_compile_definitions(rmm PUBLIC RMM_NVTX)
 endif()
 
-set(RMM_CXX_FLAGS -Wall -Werror -Wextra -Wsign-conversion -Wno-unknown-pragmas
-                  -Wno-error=deprecated-declarations)
+set(RMM_CXX_FLAGS
+    -Wall
+    -Werror
+    -Wextra
+    -Wshadow
+    -Wnon-virtual-dtor
+    -Woverloaded-virtual
+
+    -Wsign-conversion
+    -Wno-unknown-pragmas
+    -Wno-error=deprecated-declarations)
 set(RMM_CUDA_FLAGS
     -Werror=all-warnings
-    -Xcompiler=-Wall,-Werror,-Wextra,-Wsign-conversion,-Wno-error=deprecated-declarations)
+    -Xcompiler=-Wall,-Werror,-Wextra,-Wshadow,-Wnon-virtual-dtor,-Woverloaded-virtual,-Wsign-conversion,-Wno-error=deprecated-declarations)
 target_compile_options(rmm PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:${RMM_CXX_FLAGS}>"
                                    "$<$<COMPILE_LANGUAGE:CUDA>:${RMM_CUDA_FLAGS}>")
 

@@ -9,9 +9,19 @@
 option(DISABLE_DEPRECATION_WARNING "Disable warnings generated from deprecated declarations." OFF)
 option(PER_THREAD_DEFAULT_STREAM "Build with per-thread default stream" OFF)
 
-set(RMM_BENCHMARKS_CXX_FLAGS -Wall -Werror -Wextra -Wsign-conversion -Wno-unknown-pragmas)
-set(RMM_BENCHMARKS_CUDA_FLAGS -Werror=all-warnings
-                              -Xcompiler=-Wall,-Werror,-Wextra,-Wsign-conversion)
+set(RMM_BENCHMARKS_CXX_FLAGS
+    -Wall
+    -Werror
+    -Wextra
+    -Wshadow
+    -Wnon-virtual-dtor
+    -Woverloaded-virtual
+
+    -Wsign-conversion
+    -Wno-unknown-pragmas)
+set(RMM_BENCHMARKS_CUDA_FLAGS
+    -Werror=all-warnings
+    -Xcompiler=-Wall,-Werror,-Wextra,-Wshadow,-Wnon-virtual-dtor,-Woverloaded-virtual,-Wsign-conversion)
 
 if(PER_THREAD_DEFAULT_STREAM)
   message(STATUS "RMM: Building benchmarks with per-thread default stream")

@@ -29,7 +29,7 @@ constexpr std::size_t size_mb{1 << 20};
 struct allocation {
   void* ptr{nullptr};
   std::size_t size{0};
-  allocation(void* ptr, std::size_t size) : ptr{ptr}, size{size} {}
+  allocation(void* p, std::size_t sz) : ptr{p}, size{sz} {}
   allocation() = default;
 };
 
@@ -262,13 +262,13 @@ void declare_benchmark(std::string const& name)
 }
 
 static void profile_random_allocations(MRFactoryFunc const& factory,
-                                       std::size_t num_allocations,
-                                       std::size_t max_size)
+                                       std::size_t alloc_count,
+                                       std::size_t max_alloc_size)
 {
   auto mr = factory();
 
   try {
-    uniform_random_allocations(*mr, num_allocations, max_size, max_usage);
+    uniform_random_allocations(*mr, alloc_count, max_alloc_size, max_usage);
   } catch (std::exception const& e) {
     std::cout << "Error: " << e.what() << "\n";
   }

@@ -89,7 +89,7 @@ using MRFactoryFunc = std::function<std::shared_ptr<rmm::mr::device_memory_resou
 struct allocation {
   allocation() = default;
   void* ptr{};
-  allocation(void* ptr, std::size_t size) : ptr{ptr}, size{size} {}
+  allocation(void* p, std::size_t sz) : ptr{p}, size{sz} {}
   std::size_t size{};
 };
 
@@ -119,8 +119,8 @@ struct replay_benchmark {
    * set of arguments forwarded to the MR constructor.
    *
    * @param factory A factory function to create the memory resource
+   * @param simulated_size The simulated total memory size
    * @param events The set of allocation events to replay
-   * @param args Variable number of arguments forward to the constructor of MR
    */
   replay_benchmark(MRFactoryFunc factory,
                    std::size_t simulated_size,
@@ -366,14 +366,14 @@ int main(int argc, char** argv)
                             "Enable verbose printing of log events",
                             cxxopts::value<bool>()->default_value("false"));
 
-      auto args = options.parse(argc, argv);
+      auto parsed = options.parse(argc, argv);
 
-      if (args.count("file") == 0) {
+      if (parsed.count("file") == 0) {
         std::cout << options.help() << std::endl;
         exit(0);
       }
 
-      return args;
+      return parsed;
     }();
 
     auto filename = args["file"].as<std::string>();

@@ -19,8 +19,8 @@
 
 cuda_event_timer::cuda_event_timer(benchmark::State& state,
                                    bool flush_l2_cache,
-                                   rmm::cuda_stream_view stream)
-  : stream(stream), p_state(&state)
+                                   rmm::cuda_stream_view strm)
+  : stream(strm), p_state(&state)
 {
   // flush all of L2$
   if (flush_l2_cache) {

@@ -64,11 +64,11 @@ class cuda_event_timer {
    * to update.
    * @param[in] flush_l2_cache whether or not to flush the L2 cache before
    *                            every iteration.
-   * @param[in] stream The CUDA stream we are measuring time on.
+   * @param[in] strm The CUDA stream we are measuring time on.
    */
   cuda_event_timer(benchmark::State& state,
                    bool flush_l2_cache,
-                   rmm::cuda_stream_view stream = rmm::cuda_stream_default);
+                   rmm::cuda_stream_view strm = rmm::cuda_stream_default);
 
   // The user will HAVE to provide a benchmark::State object to set
   // the timer so we disable the default c'tor.

@@ -35,29 +35,28 @@ struct event {
   event(event&&) noexcept            = default;
   event& operator=(event&&) noexcept = default;
   ~event()                           = default;
-  event(action act, std::size_t size, void const* ptr)
+  event(action a, std::size_t sz, void const* ptr)
     // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
-    : act{act}, size{size}, pointer{reinterpret_cast<uintptr_t>(ptr)}
+    : act{a}, size{sz}, pointer{reinterpret_cast<uintptr_t>(ptr)}
   {
   }
 
   // NOLINTNEXTLINE(bugprone-easily-swappable-parameters)
-  event(action act, std::size_t size, uintptr_t ptr) : act{act}, size{size}, pointer{ptr} {}
+  event(action a, std::size_t sz, uintptr_t ptr) : act{a}, size{sz}, pointer{ptr} {}
 
   event(std::size_t tid,
-        action act,
-        std::size_t size,  // NOLINT(bugprone-easily-swappable-parameters)
+        action a,
+        std::size_t sz,  // NOLINT(bugprone-easily-swappable-parameters)
         uintptr_t ptr,
-        uintptr_t stream,
-        std::size_t index)
-    : act{act}, size{size}, pointer{ptr}, thread_id{tid}, stream{stream}, index{index}
+        uintptr_t strm,
+        std::size_t idx)
+    : act{a}, size{sz}, pointer{ptr}, thread_id{tid}, stream{strm}, index{idx}
   {
   }
 
-  event(
-    std::size_t tid, action act, std::size_t size, void* ptr, uintptr_t stream, std::size_t index)
+  event(std::size_t tid, action a, std::size_t sz, void* ptr, uintptr_t strm, std::size_t idx)
     // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
-    : event{tid, act, size, reinterpret_cast<uintptr_t>(ptr), stream, index}
+    : event{tid, a, sz, reinterpret_cast<uintptr_t>(ptr), strm, idx}
   {
   }
 

@@ -59,11 +59,11 @@ class cuda_stream {
   /**
    * @brief Construct a new CUDA stream object
    *
-   * @param flags Stream creation flags.
+   * @param stream_flags Stream creation flags.
    *
    * @throw rmm::cuda_error if stream creation fails
    */
-  cuda_stream(cuda_stream::flags flags = cuda_stream::flags::sync_default);
+  cuda_stream(cuda_stream::flags stream_flags = cuda_stream::flags::sync_default);
 
   /**
    * @brief Returns true if the owned stream is non-null

@@ -19,7 +19,7 @@
  * @brief Assertion that logs a CRITICAL log message on failure.
  */
 #ifdef NDEBUG
-#define RMM_LOGGING_ASSERT(_expr) (void)0
+#define RMM_LOGGING_ASSERT(_expr) static_cast<void>(0)
 #elif RMM_LOG_ACTIVE_LEVEL < RMM_LOG_LEVEL_OFF
 #define RMM_LOGGING_ASSERT(_expr)                                                                 \
   do {                                                                                            \

@@ -95,12 +95,12 @@ struct hwdecompress {
   {
 #if defined(CUDA_VERSION) && CUDA_VERSION >= RMM_MIN_HWDECOMPRESS_CUDA_DRIVER_VERSION
     // Check if hardware decompression is supported (requires CUDA 12.8 driver or higher)
-    static bool is_supported = []() {
+    static bool supported = []() {
       int driver_version{};
       RMM_CUDA_TRY(cudaDriverGetVersion(&driver_version));
       return driver_version >= RMM_MIN_HWDECOMPRESS_CUDA_DRIVER_VERSION;
     }();
-    return is_supported;
+    return supported;
 #else
     return false;
 #endif

@@ -42,10 +42,10 @@ class exec_policy : public thrust_exec_policy_t {
   /**
    * @brief Construct a new execution policy object
    *
-   * @param stream The stream on which to allocate temporary memory
+   * @param strm The stream on which to allocate temporary memory
    * @param mr The resource to use for allocating temporary memory
    */
-  explicit exec_policy(cuda_stream_view stream      = cuda_stream_default,
+  explicit exec_policy(cuda_stream_view strm        = cuda_stream_default,
                        device_async_resource_ref mr = mr::get_current_device_resource_ref());
 };
 
@@ -67,10 +67,10 @@ class exec_policy_nosync : public thrust_exec_policy_nosync_t {
   /**
    * @brief Construct a new execution policy object
    *
-   * @param stream The stream on which to allocate temporary memory
+   * @param strm The stream on which to allocate temporary memory
    * @param mr The resource to use for allocating temporary memory
    */
-  explicit exec_policy_nosync(cuda_stream_view stream      = cuda_stream_default,
+  explicit exec_policy_nosync(cuda_stream_view strm        = cuda_stream_default,
                               device_async_resource_ref mr = mr::get_current_device_resource_ref());
 };
 

@@ -141,18 +141,18 @@ class arena_memory_resource final : public device_memory_resource {
 #else
     bytes = rmm::align_up(bytes, rmm::CUDA_ALLOCATION_ALIGNMENT);
 #endif
-    auto& arena = get_arena(stream);
+    auto& stream_arena = get_arena(stream);
 
     {
       std::shared_lock lock(mtx_);
-      void* pointer = arena.allocate_sync(bytes);
+      void* pointer = stream_arena.allocate_sync(bytes);
       if (pointer != nullptr) { return pointer; }
     }
 
     {
       std::unique_lock lock(mtx_);
       defragment();
-      void* pointer = arena.allocate_sync(bytes);
+      void* pointer = stream_arena.allocate_sync(bytes);
       if (pointer == nullptr) {
         if (dump_log_on_failure_) { dump_memory_log(bytes); }
         auto const msg = std::string("Maximum pool size exceeded (failed to allocate ") +
@@ -193,12 +193,12 @@ class arena_memory_resource final : public device_memory_resource {
 #else
     bytes = rmm::align_up(bytes, rmm::CUDA_ALLOCATION_ALIGNMENT);
 #endif
-    auto& arena = get_arena(stream);
+    auto& dealloc_arena = get_arena(stream);
 
     {
       std::shared_lock lock(mtx_);
       // If the memory being freed does not belong to the arena, the following will return false.
-      if (arena.deallocate(stream, ptr, bytes)) { return; }
+      if (dealloc_arena.deallocate(stream, ptr, bytes)) { return; }
     }
 
     {

@@ -27,10 +27,7 @@ namespace mr::detail {
  */
 struct block : public block_base {
   block() = default;
-  block(char* ptr, std::size_t size, bool is_head)
-    : block_base{ptr}, size_bytes{size}, head{is_head}
-  {
-  }
+  block(char* p, std::size_t size, bool is_head) : block_base{p}, size_bytes{size}, head{is_head} {}
 
   /**
    * @brief Returns the pointer to the memory represented by this block.

@@ -20,7 +20,7 @@ struct block_base {
   void* ptr{};  ///< Raw memory pointer
 
   block_base() = default;
-  block_base(void* ptr) : ptr{ptr} {};
+  block_base(void* p) : ptr{p} {};
 
   /// Returns the raw pointer for this block
   [[nodiscard]] inline void* pointer() const { return ptr; }

@@ -136,6 +136,8 @@ class owning_wrapper : public device_memory_resource {
   {
   }
 
+  ~owning_wrapper() override = default;
+
   /**
    * @briefreturn{A constant reference to the wrapped resource}
    */

@@ -92,8 +92,8 @@ class pinned_host_memory_resource final : public device_memory_resource {
   {
     // TODO: Use the alignment parameter as an argument to do_deallocate
     std::size_t constexpr alignment = rmm::CUDA_ALLOCATION_ALIGNMENT;
-    rmm::detail::aligned_host_deallocate(ptr, bytes, alignment, [](void* ptr) {
-      RMM_ASSERT_CUDA_SUCCESS_SAFE_SHUTDOWN(cudaFreeHost(ptr));
+    rmm::detail::aligned_host_deallocate(ptr, bytes, alignment, [](void* p) {
+      RMM_ASSERT_CUDA_SUCCESS_SAFE_SHUTDOWN(cudaFreeHost(p));
     });
   }
 

@@ -121,7 +121,7 @@ class system_memory_resource final : public device_memory_resource {
     stream.synchronize();
 
     rmm::detail::aligned_host_deallocate(
-      ptr, bytes, CUDA_ALLOCATION_ALIGNMENT, [](void* ptr) { ::operator delete(ptr); });
+      ptr, bytes, CUDA_ALLOCATION_ALIGNMENT, [](void* p) { ::operator delete(p); });
   }
 
   /**

diff --git a/cpp/src/cuda_stream.cpp b/cpp/src/cuda_stream.cpp
@@ -13,12 +13,12 @@
 
 namespace rmm {
 
-cuda_stream::cuda_stream(cuda_stream::flags flags)
-  : stream_{[flags]() {
+cuda_stream::cuda_stream(cuda_stream::flags stream_flags)
+  : stream_{[stream_flags]() {
               auto* stream = new cudaStream_t;  // NOLINT(cppcoreguidelines-owning-memory)
               // TODO: use std::to_underlying once C++23 is allowed.
               RMM_CUDA_TRY(cudaStreamCreateWithFlags(
-                stream, static_cast<std::underlying_type_t<cuda_stream::flags>>(flags)));
+                stream, static_cast<std::underlying_type_t<cuda_stream::flags>>(stream_flags)));
               return stream;
             }(),
             [](cudaStream_t* stream) {

diff --git a/cpp/src/exec_policy.cpp b/cpp/src/exec_policy.cpp
@@ -7,15 +7,14 @@
 
 namespace rmm {
 
-exec_policy::exec_policy(cuda_stream_view stream, device_async_resource_ref mr)
-  : thrust_exec_policy_t(
-      thrust::cuda::par(mr::thrust_allocator<char>(stream, mr)).on(stream.value()))
+exec_policy::exec_policy(cuda_stream_view strm, device_async_resource_ref mr)
+  : thrust_exec_policy_t(thrust::cuda::par(mr::thrust_allocator<char>(strm, mr)).on(strm.value()))
 {
 }
 
-exec_policy_nosync::exec_policy_nosync(cuda_stream_view stream, device_async_resource_ref mr)
+exec_policy_nosync::exec_policy_nosync(cuda_stream_view strm, device_async_resource_ref mr)
   : thrust_exec_policy_nosync_t(
-      thrust::cuda::par_nosync(mr::thrust_allocator<char>(stream, mr)).on(stream.value()))
+      thrust::cuda::par_nosync(mr::thrust_allocator<char>(strm, mr)).on(strm.value()))
 {
 }
 

diff --git a/cpp/src/logger.cpp b/cpp/src/logger.cpp
@@ -25,9 +25,9 @@ std::string default_pattern() { return "[%6t][%H:%M:%S:%f][%-6l] %v"; }
 rapids_logger::logger& default_logger()
 {
   static rapids_logger::logger logger_ = [] {
-    rapids_logger::logger logger_{"RMM", {default_sink()}};
-    logger_.set_pattern(default_pattern());
-    return logger_;
+    rapids_logger::logger lgr{"RMM", {default_sink()}};
+    lgr.set_pattern(default_pattern());
+    return lgr;
   }();
   return logger_;
 }
-Original file line number
+Diff line change
@@ Expand Up / @@ -136,6 +136,8 @@ class owning_wrapper : public device_memory_resource { @@
       {
       }
+      ~owning_wrapper() override = default;
       /**
        * @briefreturn{A constant reference to the wrapped resource}
        */
@@ Expand Down @@