Skip to content

Commit

Permalink
Fix insert_or_apply (#579)
Browse files Browse the repository at this point in the history
This PR cleans up some of the issues occured during merge of #551. 

1. propagate the **key_eq** and **probing_scheme** from **global** `ref`
to constructor of `shared_memory_ref` in **insert_or_apply_shmem**
kernel.
2. Disable **init** overload of `insert_or_apply` using **sfinae**,
because `cuda::stream_ref` is default constructed, this can invoke the
**init** overload even though the user calls **no-init** overload.

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
srinivasyadav18 and pre-commit-ci[bot] authored Aug 16, 2024
1 parent d7f4a1a commit 6eaed1b
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 12 deletions.
4 changes: 2 additions & 2 deletions include/cuco/detail/static_map/kernels.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -202,8 +202,8 @@ CUCO_KERNEL __launch_bounds__(BlockSize) void insert_or_apply_shmem(

auto shared_map = SharedMapRefType{cuco::empty_key<Key>{ref.empty_key_sentinel()},
cuco::empty_value<Value>{ref.empty_value_sentinel()},
{},
{},
ref.key_eq(),
ref.probing_scheme(),
{},
storage};
auto shared_map_ref = std::move(shared_map).with(cuco::op::insert_or_apply);
Expand Down
10 changes: 1 addition & 9 deletions include/cuco/detail/static_map/static_map.inl
Original file line number Diff line number Diff line change
Expand Up @@ -347,19 +347,11 @@ template <class Key,
class ProbingScheme,
class Allocator,
class Storage>
template <typename InputIt, typename Init, typename Op>
template <typename InputIt, typename Init, typename Op, typename>
void static_map<Key, T, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Storage>::
insert_or_apply_async(
InputIt first, InputIt last, Init init, Op op, cuda::stream_ref stream) noexcept
{
using shared_map_type = cuco::static_map<Key,
T,
int32_t,
cuda::thread_scope_block,
KeyEqual,
ProbingScheme,
Allocator,
cuco::storage<1>>;
auto constexpr has_init = true;
static_map_ns::detail::dispatch_insert_or_apply<has_init, cg_size, Allocator>(
first, last, init, op, ref(op::insert_or_apply), stream);
Expand Down
6 changes: 5 additions & 1 deletion include/cuco/static_map.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@

#include <cstddef>
#include <memory>
#include <type_traits>
#include <utility>

namespace cuco {
Expand Down Expand Up @@ -564,7 +565,10 @@ class static_map {
* @param op Callable object to perform apply operation.
* @param stream CUDA stream used for insert
*/
template <typename InputIt, typename Init, typename Op>
template <typename InputIt,
typename Init,
typename Op,
typename = std::enable_if_t<std::is_convertible_v<Init, T>>>
void insert_or_apply_async(
InputIt first, InputIt last, Init init, Op op, cuda::stream_ref stream = {}) noexcept;

Expand Down

0 comments on commit 6eaed1b

Please sign in to comment.