-
Notifications
You must be signed in to change notification settings - Fork 106
Add static_multimap::pair_contains
#175
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
1366cfc
e815261
ec0f19d
8e88d14
a0c178c
9e07fc9
1657014
e6a71ae
32d0b17
4618205
bfa1461
107ee9b
0462bea
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -139,7 +139,7 @@ class static_multimap<Key, Value, Scope, Allocator, ProbeSequence>::device_view_ | |
| * @param current_slot The given slot to load from | ||
| */ | ||
| __device__ __forceinline__ void load_pair_array(value_type* arr, | ||
| const_iterator current_slot) noexcept | ||
| const_iterator current_slot) const noexcept | ||
| { | ||
| if constexpr (sizeof(value_type) == 4) { | ||
| auto const tmp = *reinterpret_cast<ushort4 const*>(current_slot); | ||
|
|
@@ -567,32 +567,33 @@ class static_multimap<Key, Value, Scope, Allocator, ProbeSequence>::device_view_ | |
| } | ||
|
|
||
| /** | ||
| * @brief Indicates whether the key `k` exists in the map using vector loads. | ||
| * @brief Indicates whether the probe `element` exists in the map using vector loads. | ||
| * | ||
| * If the key `k` was inserted into the map, `contains` returns | ||
| * true. Otherwise, it returns false. Uses the CUDA Cooperative Groups API to | ||
| * to leverage multiple threads to perform a single `contains` operation. This provides a | ||
| * significant boost in throughput compared to the non Cooperative Group based | ||
| * `contains` at moderate to high load factors. | ||
| * If `element` was inserted into the map, `contains` returns true. Otherwise, it returns false. | ||
| * Uses the CUDA Cooperative Groups API to leverage multiple threads to perform a single | ||
| * `contains` operation. This provides a significant boost in throughput compared to the non | ||
| * Cooperative Group based `contains` at moderate to high load factors. | ||
| * | ||
| * @tparam is_pair_contains `true` if it's a `pair_contains` implementation | ||
| * @tparam uses_vector_load Boolean flag indicating whether vector loads are used | ||
| * @tparam ProbeKey Probe key type | ||
| * @tparam KeyEqual Binary callable type | ||
| * @tparam ProbeT Probe data type | ||
| * @tparam Equal Binary callable type | ||
| * | ||
| * @param g The Cooperative Group used to perform the contains operation | ||
| * @param k The key to search for | ||
| * @param key_equal The binary callable used to compare two keys | ||
| * for equality | ||
| * @return A boolean indicating whether the key/value pair | ||
| * containing `k` was inserted | ||
| * @param element The probe element to search for | ||
| * @param equal The binary function to compare input element and slot content for equality | ||
| * @return A boolean indicating whether the key/value pair represented by `element` was inserted | ||
| */ | ||
| template <bool uses_vector_load, typename ProbeKey, typename KeyEqual> | ||
| template <bool is_pair_contains, bool uses_vector_load, typename ProbeT, typename Equal> | ||
| __device__ __forceinline__ std::enable_if_t<uses_vector_load, bool> contains( | ||
| cooperative_groups::thread_block_tile<ProbeSequence::cg_size> const& g, | ||
| ProbeKey const& k, | ||
| KeyEqual key_equal) noexcept | ||
| ProbeT const& element, | ||
| Equal equal) const noexcept | ||
| { | ||
| auto current_slot = initial_slot(g, k); | ||
| auto current_slot = [&]() { | ||
| if constexpr (is_pair_contains) { return initial_slot(g, element.first); } | ||
| if constexpr (not is_pair_contains) { return initial_slot(g, element); } | ||
| }(); | ||
|
|
||
| while (true) { | ||
| value_type arr[2]; | ||
|
|
@@ -602,8 +603,22 @@ class static_multimap<Key, Value, Scope, Allocator, ProbeSequence>::device_view_ | |
| detail::bitwise_compare(arr[0].first, this->get_empty_key_sentinel()); | ||
| auto const second_slot_is_empty = | ||
| detail::bitwise_compare(arr[1].first, this->get_empty_key_sentinel()); | ||
| auto const first_equals = (not first_slot_is_empty and key_equal(arr[0].first, k)); | ||
| auto const second_equals = (not second_slot_is_empty and key_equal(arr[1].first, k)); | ||
| auto const first_equals = [&]() { | ||
| if constexpr (is_pair_contains) { | ||
| return not first_slot_is_empty and equal(arr[0], element); | ||
| } | ||
| if constexpr (not is_pair_contains) { | ||
| return not first_slot_is_empty and equal(arr[0].first, element); | ||
| } | ||
PointKernel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| }(); | ||
| auto const second_equals = [&]() { | ||
| if constexpr (is_pair_contains) { | ||
| return not second_slot_is_empty and equal(arr[1], element); | ||
| } | ||
| if constexpr (not is_pair_contains) { | ||
| return not second_slot_is_empty and equal(arr[1].first, element); | ||
| } | ||
| }(); | ||
|
|
||
| // the key we were searching for was found by one of the threads, so we return true | ||
| if (g.any(first_equals or second_equals)) { return true; } | ||
|
|
@@ -618,32 +633,33 @@ class static_multimap<Key, Value, Scope, Allocator, ProbeSequence>::device_view_ | |
| } | ||
|
|
||
| /** | ||
| * @brief Indicates whether the key `k` exists in the map using scalar loads. | ||
| * @brief Indicates whether `element` exists in the map using scalar loads. | ||
| * | ||
| * If the key `k` was inserted into the map, `contains` returns | ||
| * true. Otherwise, it returns false. Uses the CUDA Cooperative Groups API to | ||
| * to leverage multiple threads to perform a single `contains` operation. This provides a | ||
| * significant boost in throughput compared to the non Cooperative Group | ||
| * `contains` at moderate to high load factors. | ||
| * If `element` was inserted into the map, `contains` returns true. Otherwise, it returns false. | ||
| * Uses the CUDA Cooperative Groups API to leverage multiple threads to perform a single | ||
| * `contains` operation. This provides a significant boost in throughput compared to the non | ||
| * Cooperative Group `contains` at moderate to high load factors. | ||
| * | ||
| * @tparam is_pair_contains `true` if it's a `pair_contains` implementation | ||
| * @tparam uses_vector_load Boolean flag indicating whether vector loads are used | ||
| * @tparam ProbeKey Probe key type | ||
| * @tparam KeyEqual Binary callable type | ||
| * @tparam ProbeT Probe data type | ||
| * @tparam Equal Binary callable type | ||
| * | ||
| * @param g The Cooperative Group used to perform the contains operation | ||
| * @param k The key to search for | ||
| * @param key_equal The binary callable used to compare two keys | ||
| * for equality | ||
| * @return A boolean indicating whether the key/value pair | ||
| * containing `k` was inserted | ||
| * @param element The probe element to search for | ||
| * @param equal The binary function to compare input element and slot content for equality | ||
| * @return A boolean indicating whether the key/value pair represented by `element` was inserted | ||
| */ | ||
| template <bool uses_vector_load, typename ProbeKey, typename KeyEqual> | ||
| template <bool is_pair_contains, bool uses_vector_load, typename ProbeT, typename Equal> | ||
| __device__ __forceinline__ std::enable_if_t<not uses_vector_load, bool> contains( | ||
| cooperative_groups::thread_block_tile<ProbeSequence::cg_size> const& g, | ||
| ProbeKey const& k, | ||
| KeyEqual key_equal) noexcept | ||
| ProbeT const& element, | ||
| Equal equal) const noexcept | ||
| { | ||
| auto current_slot = initial_slot(g, k); | ||
| auto current_slot = [&]() { | ||
| if constexpr (is_pair_contains) { return initial_slot(g, element.first); } | ||
| if constexpr (not is_pair_contains) { return initial_slot(g, element); } | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Similar to above. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. And also all other places.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Now I recall, we need this to silence the compiler warnings with cuda-11.0/11.2. Reverting back to the |
||
| }(); | ||
|
|
||
| while (true) { | ||
| value_type slot_contents = *reinterpret_cast<value_type const*>(current_slot); | ||
|
|
@@ -654,7 +670,14 @@ class static_multimap<Key, Value, Scope, Allocator, ProbeSequence>::device_view_ | |
| auto const slot_is_empty = | ||
| detail::bitwise_compare(existing_key, this->get_empty_key_sentinel()); | ||
|
|
||
| auto const equals = (not slot_is_empty and key_equal(existing_key, k)); | ||
| auto const equals = [&]() { | ||
| if constexpr (is_pair_contains) { | ||
| return not slot_is_empty and equal(slot_contents, element); | ||
| } | ||
| if constexpr (not is_pair_contains) { | ||
| return not slot_is_empty and equal(existing_key, element); | ||
| } | ||
| }(); | ||
|
|
||
| // the key we were searching for was found by one of the threads, so we return true | ||
| if (g.any(equals)) { return true; } | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.