Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions ci/gpu/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,6 @@ conda activate cuda

gpuci_logger "Check versions"
python --version
$CC --version
$CXX --version

gpuci_logger "Check conda environment"
conda info
Expand Down
4 changes: 3 additions & 1 deletion include/cuco/detail/hash_functions.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017-2021, NVIDIA CORPORATION.
* Copyright (c) 2017-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -18,6 +18,8 @@

namespace cuco {

using hash_value_type = uint32_t;

namespace detail {

// MurmurHash3_32 implementation from
Expand Down
18 changes: 12 additions & 6 deletions include/cuco/detail/probe_sequence_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@

#include <cuda/std/atomic>

#include <cooperative_groups.h>

namespace cuco {
namespace detail {

Expand Down Expand Up @@ -186,13 +188,15 @@ class linear_probing_impl
*
* If vector-load is enabled, the return slot is always even to avoid illegal memory access.
*
* @tparam CG CUDA Cooperative Groups type
* @tparam ProbeKey Probe key type
*
* @param g the Cooperative Group for which the initial slot is needed
* @param k The key to get the slot for
* @return Pointer to the initial slot for `k`
*/
template <typename CG>
__device__ __forceinline__ iterator initial_slot(CG const& g, Key const k) noexcept
template <typename ProbeKey>
__device__ __forceinline__ iterator
initial_slot(cooperative_groups::thread_block_tile<cg_size> const& g, ProbeKey const& k) noexcept
{
auto const hash_value = [&]() {
auto const tmp = hash_(k);
Expand Down Expand Up @@ -307,13 +311,15 @@ class double_hashing_impl
* If vector-load is enabled, the return slot is always a multiple of (`cg_size` * `vector_width`)
* to avoid illegal memory access.
*
* @tparam CG CUDA Cooperative Groups type
* @tparam ProbeKey Probe key type
*
* @param g the Cooperative Group for which the initial slot is needed
* @param k The key to get the slot for
* @return Pointer to the initial slot for `k`
*/
template <typename CG>
__device__ __forceinline__ iterator initial_slot(CG const& g, Key const k) noexcept
template <typename ProbeKey>
__device__ __forceinline__ iterator
initial_slot(cooperative_groups::thread_block_tile<cg_size> const& g, ProbeKey const& k) noexcept
{
std::size_t index;
auto const hash_value = hash1_(k);
Expand Down
13 changes: 8 additions & 5 deletions include/cuco/detail/static_map.inl
Original file line number Diff line number Diff line change
Expand Up @@ -715,9 +715,9 @@ static_map<Key, Value, Scope, Allocator>::device_view::find(CG g,
}

template <typename Key, typename Value, cuda::thread_scope Scope, typename Allocator>
template <typename Hash, typename KeyEqual>
template <typename ProbeKey, typename Hash, typename KeyEqual>
__device__ bool static_map<Key, Value, Scope, Allocator>::device_view::contains(
Key const& k, Hash hash, KeyEqual key_equal) const noexcept
ProbeKey const& k, Hash hash, KeyEqual key_equal) const noexcept
{
auto current_slot = initial_slot(k, hash);

Expand All @@ -733,9 +733,12 @@ __device__ bool static_map<Key, Value, Scope, Allocator>::device_view::contains(
}

template <typename Key, typename Value, cuda::thread_scope Scope, typename Allocator>
template <typename CG, typename Hash, typename KeyEqual>
__device__ bool static_map<Key, Value, Scope, Allocator>::device_view::contains(
CG g, Key const& k, Hash hash, KeyEqual key_equal) const noexcept
template <typename CG, typename ProbeKey, typename Hash, typename KeyEqual>
__device__ std::enable_if_t<std::is_invocable_v<KeyEqual, ProbeKey, Key>, bool>
static_map<Key, Value, Scope, Allocator>::device_view::contains(CG const& g,
ProbeKey const& k,
Hash hash,
KeyEqual key_equal) const noexcept
{
auto current_slot = initial_slot(g, k, hash);

Expand Down
42 changes: 28 additions & 14 deletions include/cuco/detail/static_multimap/device_view_impl.inl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -15,13 +15,15 @@
*/

#include <cuco/detail/bitwise_compare.cuh>
#include <cuco/detail/static_multimap/kernels.cuh>
#include <cuco/detail/utils.cuh>

#include <thrust/tuple.h>
#include <thrust/type_traits/is_contiguous_iterator.h>

namespace cuco {
#include <cooperative_groups.h>

namespace cuco {
template <typename Key,
typename Value,
cuda::thread_scope Scope,
Expand Down Expand Up @@ -69,13 +71,16 @@ class static_multimap<Key, Value, Scope, Allocator, ProbeSequence>::device_view_
*
* To be used for Cooperative Group based probing.
*
* @tparam CG Cooperative Group type
* @tparam ProbeKey Probe key type
*
* @param g the Cooperative Group for which the initial slot is needed
* @param k The key to get the slot for
* @return Pointer to the initial slot for `k`
*/
template <typename CG>
__device__ __forceinline__ iterator initial_slot(CG const& g, Key const& k) noexcept
template <typename ProbeKey>
__device__ __forceinline__ iterator
initial_slot(cooperative_groups::thread_block_tile<ProbeSequence::cg_size> const& g,
ProbeKey const& k) noexcept
{
return probe_sequence_.initial_slot(g, k);
}
Expand All @@ -85,13 +90,16 @@ class static_multimap<Key, Value, Scope, Allocator, ProbeSequence>::device_view_
*
* To be used for Cooperative Group based probing.
*
* @tparam CG Cooperative Group type
* @tparam ProbeKey Probe key type
*
* @param g the Cooperative Group for which the initial slot is needed
* @param k The key to get the slot for
* @return Pointer to the initial slot for `k`
*/
template <typename CG>
__device__ __forceinline__ const_iterator initial_slot(CG g, Key const& k) const noexcept
template <typename ProbeKey>
__device__ __forceinline__ const_iterator
initial_slot(cooperative_groups::thread_block_tile<ProbeSequence::cg_size> const& g,
ProbeKey const& k) const noexcept
{
return probe_sequence_.initial_slot(g, k);
}
Expand Down Expand Up @@ -568,18 +576,21 @@ class static_multimap<Key, Value, Scope, Allocator, ProbeSequence>::device_view_
* `contains` at moderate to high load factors.
*
* @tparam uses_vector_load Boolean flag indicating whether vector loads are used
* @tparam CG Cooperative Group type
* @tparam ProbeKey Probe key type
* @tparam KeyEqual Binary callable type
*
* @param g The Cooperative Group used to perform the contains operation
* @param k The key to search for
* @param key_equal The binary callable used to compare two keys
* for equality
* @return A boolean indicating whether the key/value pair
* containing `k` was inserted
*/
template <bool uses_vector_load, typename CG, typename KeyEqual>
template <bool uses_vector_load, typename ProbeKey, typename KeyEqual>
__device__ __forceinline__ std::enable_if_t<uses_vector_load, bool> contains(
CG g, Key const& k, KeyEqual key_equal) noexcept
cooperative_groups::thread_block_tile<ProbeSequence::cg_size> const& g,
ProbeKey const& k,
KeyEqual key_equal) noexcept
{
auto current_slot = initial_slot(g, k);

Expand Down Expand Up @@ -616,18 +627,21 @@ class static_multimap<Key, Value, Scope, Allocator, ProbeSequence>::device_view_
* `contains` at moderate to high load factors.
*
* @tparam uses_vector_load Boolean flag indicating whether vector loads are used
* @tparam CG Cooperative Group type
* @tparam ProbeKey Probe key type
* @tparam KeyEqual Binary callable type
*
* @param g The Cooperative Group used to perform the contains operation
* @param k The key to search for
* @param key_equal The binary callable used to compare two keys
* for equality
* @return A boolean indicating whether the key/value pair
* containing `k` was inserted
*/
template <bool uses_vector_load, typename CG, typename KeyEqual>
template <bool uses_vector_load, typename ProbeKey, typename KeyEqual>
__device__ __forceinline__ std::enable_if_t<not uses_vector_load, bool> contains(
CG g, Key const& k, KeyEqual key_equal) noexcept
cooperative_groups::thread_block_tile<ProbeSequence::cg_size> const& g,
ProbeKey const& k,
KeyEqual key_equal) noexcept
{
auto current_slot = initial_slot(g, k);

Expand Down
6 changes: 3 additions & 3 deletions include/cuco/detail/static_multimap/static_multimap.inl
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ template <typename Key,
class ProbeSequence>
template <typename InputIt, typename OutputIt, typename KeyEqual>
void static_multimap<Key, Value, Scope, Allocator, ProbeSequence>::contains(
InputIt first, InputIt last, OutputIt output_begin, cudaStream_t stream, KeyEqual key_equal) const
InputIt first, InputIt last, OutputIt output_begin, KeyEqual key_equal, cudaStream_t stream) const
{
auto const num_keys = std::distance(first, last);
if (num_keys == 0) { return; }
Expand Down Expand Up @@ -536,11 +536,11 @@ template <typename Key,
cuda::thread_scope Scope,
typename Allocator,
class ProbeSequence>
template <typename KeyEqual>
template <typename ProbeKey, typename KeyEqual>
__device__ __forceinline__ bool
static_multimap<Key, Value, Scope, Allocator, ProbeSequence>::device_view::contains(
cooperative_groups::thread_block_tile<ProbeSequence::cg_size> const& g,
Key const& k,
ProbeKey const& k,
KeyEqual key_equal) noexcept
{
return impl_.contains<uses_vector_load()>(g, k, key_equal);
Expand Down
Loading