Skip to content
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions ci/gpu/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,6 @@ conda activate cuda

gpuci_logger "Check versions"
python --version
$CC --version
$CXX --version

gpuci_logger "Check conda environment"
conda info
Expand Down
4 changes: 3 additions & 1 deletion include/cuco/detail/hash_functions.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017-2021, NVIDIA CORPORATION.
* Copyright (c) 2017-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -18,6 +18,8 @@

namespace cuco {

using hash_value_type = uint32_t;

namespace detail {

// MurmurHash3_32 implementation from
Expand Down
18 changes: 12 additions & 6 deletions include/cuco/detail/probe_sequence_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@

#include <cuda/std/atomic>

#include <cooperative_groups.h>

namespace cuco {
namespace detail {

Expand Down Expand Up @@ -186,13 +188,15 @@ class linear_probing_impl
*
* If vector-load is enabled, the return slot is always even to avoid illegal memory access.
*
* @tparam CG CUDA Cooperative Groups type
* @tparam ProbeKey Probe key type
*
* @param g the Cooperative Group for which the initial slot is needed
* @param k The key to get the slot for
* @return Pointer to the initial slot for `k`
*/
template <typename CG>
__device__ __forceinline__ iterator initial_slot(CG const& g, Key const k) noexcept
template <typename ProbeKey>
__device__ __forceinline__ iterator
initial_slot(cooperative_groups::thread_block_tile<cg_size> const& g, ProbeKey const& k) noexcept
{
auto const hash_value = [&]() {
auto const tmp = hash_(k);
Expand Down Expand Up @@ -307,13 +311,15 @@ class double_hashing_impl
* If vector-load is enabled, the return slot is always a multiple of (`cg_size` * `vector_width`)
* to avoid illegal memory access.
*
* @tparam CG CUDA Cooperative Groups type
* @tparam ProbeKey Probe key type
*
* @param g the Cooperative Group for which the initial slot is needed
* @param k The key to get the slot for
* @return Pointer to the initial slot for `k`
*/
template <typename CG>
__device__ __forceinline__ iterator initial_slot(CG const& g, Key const k) noexcept
template <typename ProbeKey>
__device__ __forceinline__ iterator
initial_slot(cooperative_groups::thread_block_tile<cg_size> const& g, ProbeKey const& k) noexcept
{
std::size_t index;
auto const hash_value = hash1_(k);
Expand Down
27 changes: 22 additions & 5 deletions include/cuco/detail/static_map.inl
Original file line number Diff line number Diff line change
Expand Up @@ -715,10 +715,17 @@ static_map<Key, Value, Scope, Allocator>::device_view::find(CG g,
}

template <typename Key, typename Value, cuda::thread_scope Scope, typename Allocator>
template <typename Hash, typename KeyEqual>
template <typename ProbeKey, typename Hash, typename KeyEqual>
__device__ bool static_map<Key, Value, Scope, Allocator>::device_view::contains(
Key const& k, Hash hash, KeyEqual key_equal) const noexcept
ProbeKey const& k, Hash hash, KeyEqual key_equal) const noexcept
{
static_assert(std::is_invocable_r_v<bool, KeyEqual, ProbeKey, Key>,
"KeyEqual(ProbeKey{}, Key{}) must be a valid callable.");
Comment thread
PointKernel marked this conversation as resolved.
Outdated
static_assert(std::is_invocable_r_v<cuco::hash_value_type, Hash, Key>,
"Hash(Key{}) must be a valid callable.");
static_assert(std::is_invocable_r_v<cuco::hash_value_type, Hash, ProbeKey>,
"Hash(ProbeKey{}) must be a valid callable.");

auto current_slot = initial_slot(k, hash);

while (true) {
Expand All @@ -733,10 +740,20 @@ __device__ bool static_map<Key, Value, Scope, Allocator>::device_view::contains(
}

template <typename Key, typename Value, cuda::thread_scope Scope, typename Allocator>
template <typename CG, typename Hash, typename KeyEqual>
__device__ bool static_map<Key, Value, Scope, Allocator>::device_view::contains(
CG g, Key const& k, Hash hash, KeyEqual key_equal) const noexcept
template <typename CG, typename ProbeKey, typename Hash, typename KeyEqual>
__device__ std::enable_if_t<std::is_invocable_v<KeyEqual, ProbeKey, Key>, bool>
static_map<Key, Value, Scope, Allocator>::device_view::contains(CG const& g,
ProbeKey const& k,
Hash hash,
KeyEqual key_equal) const noexcept
{
static_assert(std::is_invocable_r_v<bool, KeyEqual, ProbeKey, Key>,
"KeyEqual(ProbeKey{}, Key{}) must be a valid callable.");
Comment thread
PointKernel marked this conversation as resolved.
Outdated
static_assert(std::is_invocable_r_v<cuco::hash_value_type, Hash, Key>,
"Hash(Key{}) must be a valid callable.");
static_assert(std::is_invocable_r_v<cuco::hash_value_type, Hash, ProbeKey>,
"Hash(ProbeKey{}) must be a valid callable.");

auto current_slot = initial_slot(g, k, hash);

while (true) {
Expand Down
32 changes: 22 additions & 10 deletions include/cuco/detail/static_multimap/device_view_impl.inl
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
#include <thrust/tuple.h>
#include <thrust/type_traits/is_contiguous_iterator.h>

#include <cooperative_groups.h>

namespace cuco {

template <typename Key,
Expand Down Expand Up @@ -69,13 +71,16 @@ class static_multimap<Key, Value, Scope, Allocator, ProbeSequence>::device_view_
*
* To be used for Cooperative Group based probing.
*
* @tparam CG Cooperative Group type
* @tparam ProbeKey Probe key type
*
* @param g the Cooperative Group for which the initial slot is needed
* @param k The key to get the slot for
* @return Pointer to the initial slot for `k`
*/
template <typename CG>
__device__ __forceinline__ iterator initial_slot(CG const& g, Key const& k) noexcept
template <typename ProbeKey>
__device__ __forceinline__ iterator
initial_slot(cooperative_groups::thread_block_tile<ProbeSequence::cg_size> const& g,
Comment thread
PointKernel marked this conversation as resolved.
ProbeKey const& k) noexcept
{
return probe_sequence_.initial_slot(g, k);
}
Expand All @@ -85,13 +90,16 @@ class static_multimap<Key, Value, Scope, Allocator, ProbeSequence>::device_view_
*
* To be used for Cooperative Group based probing.
*
* @tparam CG Cooperative Group type
* @tparam ProbeKey Probe key type
*
* @param g the Cooperative Group for which the initial slot is needed
* @param k The key to get the slot for
* @return Pointer to the initial slot for `k`
*/
template <typename CG>
__device__ __forceinline__ const_iterator initial_slot(CG g, Key const& k) const noexcept
template <typename ProbeKey>
__device__ __forceinline__ const_iterator
initial_slot(cooperative_groups::thread_block_tile<ProbeSequence::cg_size> const& g,
ProbeKey const& k) const noexcept
{
return probe_sequence_.initial_slot(g, k);
}
Expand Down Expand Up @@ -569,17 +577,19 @@ class static_multimap<Key, Value, Scope, Allocator, ProbeSequence>::device_view_
*
* @tparam uses_vector_load Boolean flag indicating whether vector loads are used
* @tparam CG Cooperative Group type
* @tparam ProbeKey Probe key type
* @tparam KeyEqual Binary callable type
*
* @param g The Cooperative Group used to perform the contains operation
* @param k The key to search for
* @param key_equal The binary callable used to compare two keys
* for equality
* @return A boolean indicating whether the key/value pair
* containing `k` was inserted
*/
template <bool uses_vector_load, typename CG, typename KeyEqual>
template <bool uses_vector_load, typename CG, typename ProbeKey, typename KeyEqual>
__device__ __forceinline__ std::enable_if_t<uses_vector_load, bool> contains(
CG g, Key const& k, KeyEqual key_equal) noexcept
CG const& g, ProbeKey const& k, KeyEqual key_equal) noexcept
{
auto current_slot = initial_slot(g, k);

Expand Down Expand Up @@ -617,17 +627,19 @@ class static_multimap<Key, Value, Scope, Allocator, ProbeSequence>::device_view_
*
* @tparam uses_vector_load Boolean flag indicating whether vector loads are used
* @tparam CG Cooperative Group type
* @tparam ProbeKey Probe key type that is convertible to the map's `key_type`
* @tparam KeyEqual Binary callable type
*
* @param g The Cooperative Group used to perform the contains operation
* @param k The key to search for
* @param key_equal The binary callable used to compare two keys
* for equality
* @return A boolean indicating whether the key/value pair
* containing `k` was inserted
*/
template <bool uses_vector_load, typename CG, typename KeyEqual>
template <bool uses_vector_load, typename CG, typename ProbeKey, typename KeyEqual>
__device__ __forceinline__ std::enable_if_t<not uses_vector_load, bool> contains(
CG g, Key const& k, KeyEqual key_equal) noexcept
CG const& g, ProbeKey const& k, KeyEqual key_equal) noexcept
{
auto current_slot = initial_slot(g, k);

Expand Down
30 changes: 27 additions & 3 deletions include/cuco/detail/static_multimap/static_multimap.inl
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ template <typename Key,
class ProbeSequence>
template <typename InputIt, typename OutputIt, typename KeyEqual>
void static_multimap<Key, Value, Scope, Allocator, ProbeSequence>::contains(
InputIt first, InputIt last, OutputIt output_begin, cudaStream_t stream, KeyEqual key_equal) const
InputIt first, InputIt last, OutputIt output_begin, KeyEqual key_equal, cudaStream_t stream) const
{
auto const num_keys = std::distance(first, last);
if (num_keys == 0) { return; }
Expand Down Expand Up @@ -536,13 +536,37 @@ template <typename Key,
cuda::thread_scope Scope,
typename Allocator,
class ProbeSequence>
template <typename KeyEqual>
template <typename ProbeKey, typename KeyEqual>
__device__ __forceinline__ bool
static_multimap<Key, Value, Scope, Allocator, ProbeSequence>::device_view::contains(
cooperative_groups::thread_block_tile<ProbeSequence::cg_size> const& g,
Key const& k,
ProbeKey const& k,
KeyEqual key_equal) noexcept
{
static_assert(std::is_invocable_r_v<bool, KeyEqual, ProbeKey, Key>,

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tbh, all these static_asserts are overkill. If someone passes in an invalid callable, it will still fail to compile, just with a different error message. This feels like a ton of complexity to maintain.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I pointed to is_invocable just for the purposes of documentation. I didn't intend that we need to static_assert all of the requirements of the function.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've removed those static_assert as requested and updated corresponding docs.

I agree that so many asserts are painful to maintain now but the situation will be improved after the grand refactor where we only need one set of those static asserts in the base class as opposed to putting them in every involved function for all map types.

"KeyEqual(ProbeKey{}, Key{}) must be a valid callable.");
Comment thread
PointKernel marked this conversation as resolved.
Outdated

if constexpr (ProbeSequence::is_linear_probing) {
static_assert(std::is_invocable_r_v<cuco::hash_value_type, typename ProbeSequence::hasher, Key>,
"ProbeSequence::hasher(Key{}) must be a valid callable.");
static_assert(
std::is_invocable_r_v<cuco::hash_value_type, typename ProbeSequence::hasher, ProbeKey>,
"ProbeSequence::hasher(ProbeKey{}) must be a valid callable.");
} else {
static_assert(
std::is_invocable_r_v<cuco::hash_value_type, typename ProbeSequence::hasher1, Key>,
"ProbeSequence::hasher1(Key{}) must be a valid callable.");
static_assert(
std::is_invocable_r_v<cuco::hash_value_type, typename ProbeSequence::hasher2, Key>,
"ProbeSequence::hasher2(Key{}) must be a valid callable.");
static_assert(
std::is_invocable_r_v<cuco::hash_value_type, typename ProbeSequence::hasher1, ProbeKey>,
"ProbeSequence::hasher1(ProbeKey{}) must be a valid callable.");
static_assert(
std::is_invocable_r_v<cuco::hash_value_type, typename ProbeSequence::hasher2, ProbeKey>,
"ProbeSequence::hasher2(ProbeKey{}) must be a valid callable.");
}

return impl_.contains<uses_vector_load()>(g, k, key_equal);
}

Expand Down
7 changes: 7 additions & 0 deletions include/cuco/probe_sequences.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,12 @@ namespace cuco {
template <uint32_t CGSize, typename Hash>
class linear_probing : public detail::probe_sequence_base<CGSize> {
public:
static constexpr bool is_linear_probing = true;

using probe_sequence_base_type = detail::probe_sequence_base<CGSize>;
using probe_sequence_base_type::cg_size;
using probe_sequence_base_type::vector_width;
using hasher = Hash;

template <typename Key, typename Value, cuda::thread_scope Scope>
using impl = detail::linear_probing_impl<Key, Value, Scope, vector_width(), CGSize, Hash>;
Expand All @@ -61,9 +64,13 @@ class linear_probing : public detail::probe_sequence_base<CGSize> {
template <uint32_t CGSize, typename Hash1, typename Hash2>
class double_hashing : public detail::probe_sequence_base<CGSize> {
public:
static constexpr bool is_linear_probing = false;

using probe_sequence_base_type = detail::probe_sequence_base<CGSize>;
using probe_sequence_base_type::cg_size;
using probe_sequence_base_type::vector_width;
using hasher1 = Hash1;
using hasher2 = Hash2;

template <typename Key, typename Value, cuda::thread_scope Scope>
using impl = detail::double_hashing_impl<Key, Value, Scope, vector_width(), CGSize, Hash1, Hash2>;
Expand Down
Loading