NVIDIA · PointKernel · Jun 13, 2022 · Jun 4, 2022 · Jun 4, 2022 · Jun 7, 2022
@@ -37,8 +37,6 @@ conda activate cuda
 
 gpuci_logger "Check versions"
 python --version
-$CC --version
-$CXX --version
 
 gpuci_logger "Check conda environment"
 conda info

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2017-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,6 +18,8 @@
 
 namespace cuco {
 
+using hash_value_type = uint32_t;
+
 namespace detail {
 
 // MurmurHash3_32 implementation from

@@ -21,6 +21,8 @@
 
 #include <cuda/std/atomic>
 
+#include <cooperative_groups.h>
+
 namespace cuco {
 namespace detail {
 
@@ -186,13 +188,15 @@ class linear_probing_impl
    *
    * If vector-load is enabled, the return slot is always even to avoid illegal memory access.
    *
-   * @tparam CG CUDA Cooperative Groups type
+   * @tparam ProbeKey Probe key type
+   *
    * @param g the Cooperative Group for which the initial slot is needed
    * @param k The key to get the slot for
    * @return Pointer to the initial slot for `k`
    */
-  template <typename CG>
-  __device__ __forceinline__ iterator initial_slot(CG const& g, Key const k) noexcept
+  template <typename ProbeKey>
+  __device__ __forceinline__ iterator
+  initial_slot(cooperative_groups::thread_block_tile<cg_size> const& g, ProbeKey const& k) noexcept
   {
     auto const hash_value = [&]() {
       auto const tmp = hash_(k);
@@ -307,13 +311,15 @@ class double_hashing_impl
    * If vector-load is enabled, the return slot is always a multiple of (`cg_size` * `vector_width`)
    * to avoid illegal memory access.
    *
-   * @tparam CG CUDA Cooperative Groups type
+   * @tparam ProbeKey Probe key type
+   *
    * @param g the Cooperative Group for which the initial slot is needed
    * @param k The key to get the slot for
    * @return Pointer to the initial slot for `k`
    */
-  template <typename CG>
-  __device__ __forceinline__ iterator initial_slot(CG const& g, Key const k) noexcept
+  template <typename ProbeKey>
+  __device__ __forceinline__ iterator
+  initial_slot(cooperative_groups::thread_block_tile<cg_size> const& g, ProbeKey const& k) noexcept
   {
     std::size_t index;
     auto const hash_value = hash1_(k);

@@ -715,10 +715,17 @@ static_map<Key, Value, Scope, Allocator>::device_view::find(CG g,
 }
 
 template <typename Key, typename Value, cuda::thread_scope Scope, typename Allocator>
-template <typename Hash, typename KeyEqual>
+template <typename ProbeKey, typename Hash, typename KeyEqual>
 __device__ bool static_map<Key, Value, Scope, Allocator>::device_view::contains(
-  Key const& k, Hash hash, KeyEqual key_equal) const noexcept
+  ProbeKey const& k, Hash hash, KeyEqual key_equal) const noexcept
 {
+  static_assert(std::is_invocable_r_v<bool, KeyEqual, ProbeKey, Key>,
+                "KeyEqual(ProbeKey{}, Key{}) must be a valid callable.");
+  static_assert(std::is_invocable_r_v<cuco::hash_value_type, Hash, Key>,
+                "Hash(Key{}) must be a valid callable.");
+  static_assert(std::is_invocable_r_v<cuco::hash_value_type, Hash, ProbeKey>,
+                "Hash(ProbeKey{}) must be a valid callable.");
+
   auto current_slot = initial_slot(k, hash);
 
   while (true) {
@@ -733,10 +740,20 @@ __device__ bool static_map<Key, Value, Scope, Allocator>::device_view::contains(
 }
 
 template <typename Key, typename Value, cuda::thread_scope Scope, typename Allocator>
-template <typename CG, typename Hash, typename KeyEqual>
-__device__ bool static_map<Key, Value, Scope, Allocator>::device_view::contains(
-  CG g, Key const& k, Hash hash, KeyEqual key_equal) const noexcept
+template <typename CG, typename ProbeKey, typename Hash, typename KeyEqual>
+__device__ std::enable_if_t<std::is_invocable_v<KeyEqual, ProbeKey, Key>, bool>
+static_map<Key, Value, Scope, Allocator>::device_view::contains(CG const& g,
+                                                                ProbeKey const& k,
+                                                                Hash hash,
+                                                                KeyEqual key_equal) const noexcept
 {
+  static_assert(std::is_invocable_r_v<bool, KeyEqual, ProbeKey, Key>,
+                "KeyEqual(ProbeKey{}, Key{}) must be a valid callable.");
+  static_assert(std::is_invocable_r_v<cuco::hash_value_type, Hash, Key>,
+                "Hash(Key{}) must be a valid callable.");
+  static_assert(std::is_invocable_r_v<cuco::hash_value_type, Hash, ProbeKey>,
+                "Hash(ProbeKey{}) must be a valid callable.");
+
   auto current_slot = initial_slot(g, k, hash);
 
   while (true) {

@@ -20,6 +20,8 @@
 #include <thrust/tuple.h>
 #include <thrust/type_traits/is_contiguous_iterator.h>
 
+#include <cooperative_groups.h>
+
 namespace cuco {
 
 template <typename Key,
@@ -69,13 +71,16 @@ class static_multimap<Key, Value, Scope, Allocator, ProbeSequence>::device_view_
    *
    * To be used for Cooperative Group based probing.
    *
-   * @tparam CG Cooperative Group type
+   * @tparam ProbeKey Probe key type
+   *
    * @param g the Cooperative Group for which the initial slot is needed
    * @param k The key to get the slot for
    * @return Pointer to the initial slot for `k`
    */
-  template <typename CG>
-  __device__ __forceinline__ iterator initial_slot(CG const& g, Key const& k) noexcept
+  template <typename ProbeKey>
+  __device__ __forceinline__ iterator
+  initial_slot(cooperative_groups::thread_block_tile<ProbeSequence::cg_size> const& g,
+               ProbeKey const& k) noexcept
   {
     return probe_sequence_.initial_slot(g, k);
   }
@@ -85,13 +90,16 @@ class static_multimap<Key, Value, Scope, Allocator, ProbeSequence>::device_view_
    *
    * To be used for Cooperative Group based probing.
    *
-   * @tparam CG Cooperative Group type
+   * @tparam ProbeKey Probe key type
+   *
    * @param g the Cooperative Group for which the initial slot is needed
    * @param k The key to get the slot for
    * @return Pointer to the initial slot for `k`
    */
-  template <typename CG>
-  __device__ __forceinline__ const_iterator initial_slot(CG g, Key const& k) const noexcept
+  template <typename ProbeKey>
+  __device__ __forceinline__ const_iterator
+  initial_slot(cooperative_groups::thread_block_tile<ProbeSequence::cg_size> const& g,
+               ProbeKey const& k) const noexcept
   {
     return probe_sequence_.initial_slot(g, k);
   }
@@ -569,17 +577,19 @@ class static_multimap<Key, Value, Scope, Allocator, ProbeSequence>::device_view_
    *
    * @tparam uses_vector_load Boolean flag indicating whether vector loads are used
    * @tparam CG Cooperative Group type
+   * @tparam ProbeKey Probe key type
    * @tparam KeyEqual Binary callable type
+   *
    * @param g The Cooperative Group used to perform the contains operation
    * @param k The key to search for
    * @param key_equal The binary callable used to compare two keys
    * for equality
    * @return A boolean indicating whether the key/value pair
    * containing `k` was inserted
    */
-  template <bool uses_vector_load, typename CG, typename KeyEqual>
+  template <bool uses_vector_load, typename CG, typename ProbeKey, typename KeyEqual>
   __device__ __forceinline__ std::enable_if_t<uses_vector_load, bool> contains(
-    CG g, Key const& k, KeyEqual key_equal) noexcept
+    CG const& g, ProbeKey const& k, KeyEqual key_equal) noexcept
   {
     auto current_slot = initial_slot(g, k);
 
@@ -617,17 +627,19 @@ class static_multimap<Key, Value, Scope, Allocator, ProbeSequence>::device_view_
    *
    * @tparam uses_vector_load Boolean flag indicating whether vector loads are used
    * @tparam CG Cooperative Group type
+   * @tparam ProbeKey Probe key type that is convertible to the map's `key_type`
    * @tparam KeyEqual Binary callable type
+   *
    * @param g The Cooperative Group used to perform the contains operation
    * @param k The key to search for
    * @param key_equal The binary callable used to compare two keys
    * for equality
    * @return A boolean indicating whether the key/value pair
    * containing `k` was inserted
    */
-  template <bool uses_vector_load, typename CG, typename KeyEqual>
+  template <bool uses_vector_load, typename CG, typename ProbeKey, typename KeyEqual>
   __device__ __forceinline__ std::enable_if_t<not uses_vector_load, bool> contains(
-    CG g, Key const& k, KeyEqual key_equal) noexcept
+    CG const& g, ProbeKey const& k, KeyEqual key_equal) noexcept
   {
     auto current_slot = initial_slot(g, k);
 

@@ -108,7 +108,7 @@ template <typename Key,
           class ProbeSequence>
 template <typename InputIt, typename OutputIt, typename KeyEqual>
 void static_multimap<Key, Value, Scope, Allocator, ProbeSequence>::contains(
-  InputIt first, InputIt last, OutputIt output_begin, cudaStream_t stream, KeyEqual key_equal) const
+  InputIt first, InputIt last, OutputIt output_begin, KeyEqual key_equal, cudaStream_t stream) const
 {
   auto const num_keys = std::distance(first, last);
   if (num_keys == 0) { return; }
@@ -536,13 +536,37 @@ template <typename Key,
           cuda::thread_scope Scope,
           typename Allocator,
           class ProbeSequence>
-template <typename KeyEqual>
+template <typename ProbeKey, typename KeyEqual>
 __device__ __forceinline__ bool
 static_multimap<Key, Value, Scope, Allocator, ProbeSequence>::device_view::contains(
   cooperative_groups::thread_block_tile<ProbeSequence::cg_size> const& g,
-  Key const& k,
+  ProbeKey const& k,
   KeyEqual key_equal) noexcept
 {
+  static_assert(std::is_invocable_r_v<bool, KeyEqual, ProbeKey, Key>,
+                "KeyEqual(ProbeKey{}, Key{}) must be a valid callable.");
+
+  if constexpr (ProbeSequence::is_linear_probing) {
+    static_assert(std::is_invocable_r_v<cuco::hash_value_type, typename ProbeSequence::hasher, Key>,
+                  "ProbeSequence::hasher(Key{}) must be a valid callable.");
+    static_assert(
+      std::is_invocable_r_v<cuco::hash_value_type, typename ProbeSequence::hasher, ProbeKey>,
+      "ProbeSequence::hasher(ProbeKey{}) must be a valid callable.");
+  } else {
+    static_assert(
+      std::is_invocable_r_v<cuco::hash_value_type, typename ProbeSequence::hasher1, Key>,
+      "ProbeSequence::hasher1(Key{}) must be a valid callable.");
+    static_assert(
+      std::is_invocable_r_v<cuco::hash_value_type, typename ProbeSequence::hasher2, Key>,
+      "ProbeSequence::hasher2(Key{}) must be a valid callable.");
+    static_assert(
+      std::is_invocable_r_v<cuco::hash_value_type, typename ProbeSequence::hasher1, ProbeKey>,
+      "ProbeSequence::hasher1(ProbeKey{}) must be a valid callable.");
+    static_assert(
+      std::is_invocable_r_v<cuco::hash_value_type, typename ProbeSequence::hasher2, ProbeKey>,
+      "ProbeSequence::hasher2(ProbeKey{}) must be a valid callable.");
+  }
+
   return impl_.contains<uses_vector_load()>(g, k, key_equal);
 }
 

diff --git a/include/cuco/probe_sequences.cuh b/include/cuco/probe_sequences.cuh
@@ -35,9 +35,12 @@ namespace cuco {
 template <uint32_t CGSize, typename Hash>
 class linear_probing : public detail::probe_sequence_base<CGSize> {
  public:
+  static constexpr bool is_linear_probing = true;
+
   using probe_sequence_base_type = detail::probe_sequence_base<CGSize>;
   using probe_sequence_base_type::cg_size;
   using probe_sequence_base_type::vector_width;
+  using hasher = Hash;
 
   template <typename Key, typename Value, cuda::thread_scope Scope>
   using impl = detail::linear_probing_impl<Key, Value, Scope, vector_width(), CGSize, Hash>;
@@ -61,9 +64,13 @@ class linear_probing : public detail::probe_sequence_base<CGSize> {
 template <uint32_t CGSize, typename Hash1, typename Hash2>
 class double_hashing : public detail::probe_sequence_base<CGSize> {
  public:
+  static constexpr bool is_linear_probing = false;
+
   using probe_sequence_base_type = detail::probe_sequence_base<CGSize>;
   using probe_sequence_base_type::cg_size;
   using probe_sequence_base_type::vector_width;
+  using hasher1 = Hash1;
+  using hasher2 = Hash2;
 
   template <typename Key, typename Value, cuda::thread_scope Scope>
   using impl = detail::double_hashing_impl<Key, Value, Scope, vector_width(), CGSize, Hash1, Hash2>;