From 4a8bbeb2cfd06856c8ac8720a40de49decb73fa2 Mon Sep 17 00:00:00 2001 From: Stephan Dollberg Date: Wed, 6 Mar 2024 12:40:29 +0000 Subject: [PATCH 1/2] segment_map: Also segment the bucket array In segmented mode we only applied the segmenting to the values array but not the bucket array. As a result there the pattern of there still being a deallocation followed by an increased allocation when resizing the hash map continues to exist. Further, in environments where the max allocation size is limited because of fragmentation issues this can lead to problems. To avoid both of these issues this patch makes the bucket array use the same datastructure as the values array, i.e.: a `std::vector` when linear and `segmented_vector` when segmented (or the passed datastructure if specified). This extra indirection does add some overhead in the segmented case. Looking at the quick benchmarks we see: Before: ``` | ns/op | op/s | err% | ins/op | cyc/op | IPC | bra/op | miss% | total | benchmarking |--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:------------- | 8,912,995.09 | 112.20 | 0.1% | 225,712,537.08 | 26,628,198.00 | 8.476 | 25,133,812.23 | 0.1% | 1.15 | `ankerl::unordered_dense::map segmented_vector iterate while adding then removing` | 65,440,597.50 | 15.28 | 0.1% | 496,971,523.50 | 195,721,929.00 | 2.539 | 64,749,156.50 | 11.2% | 1.44 | `ankerl::unordered_dense::map segmented_vector random insert erase` | 63,254,162.50 | 15.81 | 0.1% | 540,753,642.50 | 188,790,381.00 | 2.864 | 101,168,500.00 | 6.3% | 1.39 | `ankerl::unordered_dense::map segmented_vector 50% probability to find` | 9,777,270.50 | 102.28 | 0.2% | 281,149,360.00 | 28,833,467.00 | 9.751 | 25,968,567.75 | 0.1% | 1.19 | `ankerl::unordered_dense::map segmented_vector iterate while adding then removing` | 220,368,952.00 | 4.54 | 0.2% |2,707,978,150.00 | 659,198,358.00 | 4.108 | 347,649,399.00 | 3.8% | 2.43 | `ankerl::unordered_dense::map segmented_vector random insert erase` | 156,887,435.00 | 6.37 | 0.1% |2,166,844,490.00 | 464,728,290.00 | 4.663 | 266,835,027.00 | 2.5% | 1.73 | `ankerl::unordered_dense::map segmented_vector 50% probability to find` ``` After: ``` | ns/op | op/s | err% | ins/op | cyc/op | IPC | bra/op | miss% | total | benchmarking |--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:------------- | 8,921,748.31 | 112.09 | 0.1% | 226,313,644.69 | 26,684,106.00 | 8.481 | 25,174,702.92 | 0.1% | 1.18 | `ankerl::unordered_dense::map segmented_vector iterate while adding then removing` | 75,578,500.00 | 13.23 | 0.1% | 597,036,791.50 | 226,059,912.00 | 2.641 | 64,865,689.00 | 11.3% | 1.14 | `ankerl::unordered_dense::map segmented_vector random insert erase` | 74,928,542.00 | 13.35 | 0.1% | 677,557,943.00 | 223,726,152.00 | 3.029 | 91,606,575.00 | 7.0% | 1.13 | `ankerl::unordered_dense::map segmented_vector 50% probability to find` | 10,079,993.00 | 99.21 | 0.4% | 293,716,069.73 | 29,697,236.40 | 9.890 | 25,980,823.83 | 0.1% | 1.20 | `ankerl::unordered_dense::map segmented_vector iterate while adding then removing` | 220,081,085.00 | 4.54 | 0.1% |2,721,992,469.00 | 658,245,042.00 | 4.135 | 345,686,575.00 | 3.8% | 2.42 | `ankerl::unordered_dense::map segmented_vector random insert erase` | 158,126,693.00 | 6.32 | 0.1% |2,191,768,626.00 | 468,710,736.00 | 4.676 | 267,938,632.00 | 2.5% | 1.74 | `ankerl::unordered_dense::map segmented_vector 50% probability to find` ``` If we think this is not unconditionally acceptable then we could possibly add another template parameter (or make IsSegmented an enum) to decide which parts are supposed to be segmented. Fixes https://github.com/martinus/unordered_dense/issues/94 --- include/ankerl/unordered_dense.h | 81 ++++++++++++++++++-------------- 1 file changed, 47 insertions(+), 34 deletions(-) diff --git a/include/ankerl/unordered_dense.h b/include/ankerl/unordered_dense.h index 2aaacd6..7425387 100644 --- a/include/ankerl/unordered_dense.h +++ b/include/ankerl/unordered_dense.h @@ -810,7 +810,8 @@ class table : public std::conditional_t, base_table_type_map, bas private: using bucket_alloc = typename std::allocator_traits::template rebind_alloc; - using bucket_alloc_traits = std::allocator_traits; + using underlying_bucket_type = + std::conditional_t, std::vector>; static constexpr uint8_t initial_shifts = 64 - 2; // 2^(64-m_shift) number of buckets static constexpr float default_max_load_factor = 0.8F; @@ -839,9 +840,7 @@ class table : public std::conditional_t, base_table_type_map, bas static_assert(std::is_trivially_copyable_v, "assert we can just memset / memcpy"); value_container_type m_values{}; // Contains all the key-value pairs in one densely stored container. No holes. - using bucket_pointer = typename std::allocator_traits::pointer; - bucket_pointer m_buckets{}; - size_t m_num_buckets = 0; + underlying_bucket_type m_buckets{}; size_t m_max_bucket_capacity = 0; float m_max_load_factor = default_max_load_factor; Hash m_hash{}; @@ -849,14 +848,18 @@ class table : public std::conditional_t, base_table_type_map, bas uint8_t m_shifts = initial_shifts; [[nodiscard]] auto next(value_idx_type bucket_idx) const -> value_idx_type { - return ANKERL_UNORDERED_DENSE_UNLIKELY(bucket_idx + 1U == m_num_buckets) + return ANKERL_UNORDERED_DENSE_UNLIKELY(bucket_idx + 1U == bucket_count()) ? 0 : static_cast(bucket_idx + 1U); } // Helper to access bucket through pointer types - [[nodiscard]] static constexpr auto at(bucket_pointer bucket_ptr, size_t offset) -> Bucket& { - return *(bucket_ptr + static_cast::difference_type>(offset)); + [[nodiscard]] static constexpr auto at(underlying_bucket_type& bucket, size_t offset) -> Bucket& { + return bucket[offset]; + } + + [[nodiscard]] static constexpr auto at(const underlying_bucket_type& bucket, size_t offset) -> const Bucket& { + return bucket[offset]; } // use the dist_inc and dist_dec functions so that uint16_t types work without warning @@ -946,7 +949,13 @@ class table : public std::conditional_t, base_table_type_map, bas } else { m_shifts = other.m_shifts; allocate_buckets_from_shift(); - std::memcpy(m_buckets, other.m_buckets, sizeof(Bucket) * bucket_count()); + if constexpr (IsSegmented) { + for (auto i = 0UL; i < bucket_count(); ++i) { + at(m_buckets, i) = at(other.m_buckets, i); + } + } else { + std::memcpy(m_buckets.data(), other.m_buckets.data(), sizeof(Bucket) * bucket_count()); + } } } @@ -958,30 +967,36 @@ class table : public std::conditional_t, base_table_type_map, bas } void deallocate_buckets() { - auto ba = bucket_alloc(m_values.get_allocator()); - if (nullptr != m_buckets) { - bucket_alloc_traits::deallocate(ba, m_buckets, bucket_count()); - m_buckets = nullptr; - } - m_num_buckets = 0; + m_buckets.clear(); + m_buckets.shrink_to_fit(); m_max_bucket_capacity = 0; } void allocate_buckets_from_shift() { - auto ba = bucket_alloc(m_values.get_allocator()); - m_num_buckets = calc_num_buckets(m_shifts); - m_buckets = bucket_alloc_traits::allocate(ba, m_num_buckets); - if (m_num_buckets == max_bucket_count()) { + auto num_buckets = calc_num_buckets(m_shifts); + if constexpr (IsSegmented) { + m_buckets.reserve(num_buckets); + for (size_t i = m_buckets.size(); i < num_buckets; ++i) { + m_buckets.emplace_back(); + } + } else { + m_buckets.resize(num_buckets); + } + if (num_buckets == max_bucket_count()) { // reached the maximum, make sure we can use each bucket m_max_bucket_capacity = max_bucket_count(); } else { - m_max_bucket_capacity = static_cast(static_cast(m_num_buckets) * max_load_factor()); + m_max_bucket_capacity = static_cast(static_cast(num_buckets) * max_load_factor()); } } void clear_buckets() { - if (m_buckets != nullptr) { - std::memset(&*m_buckets, 0, sizeof(Bucket) * bucket_count()); + if constexpr (IsSegmented) { + for (auto&& e : m_buckets) { + std::memset(&e, 0, sizeof(e)); + } + } else { + std::memset(m_buckets.data(), 0, sizeof(Bucket) * bucket_count()); } } @@ -1004,7 +1019,9 @@ class table : public std::conditional_t, base_table_type_map, bas on_error_bucket_overflow(); } --m_shifts; - deallocate_buckets(); + if constexpr (!IsSegmented) { + deallocate_buckets(); + } allocate_buckets_from_shift(); clear_and_fill_buckets_from_values(); } @@ -1178,6 +1195,7 @@ class table : public std::conditional_t, base_table_type_map, bas KeyEqual const& equal = KeyEqual(), allocator_type const& alloc_or_container = allocator_type()) : m_values(alloc_or_container) + , m_buckets(alloc_or_container) , m_hash(hash) , m_equal(equal) { if (0 != bucket_count) { @@ -1253,12 +1271,7 @@ class table : public std::conditional_t, base_table_type_map, bas table(std::initializer_list init, size_type bucket_count, Hash const& hash, allocator_type const& alloc) : table(init, bucket_count, hash, KeyEqual(), alloc) {} - ~table() { - if (nullptr != m_buckets) { - auto ba = bucket_alloc(m_values.get_allocator()); - bucket_alloc_traits::deallocate(ba, m_buckets, bucket_count()); - } - } + ~table() {} auto operator=(table const& other) -> table& { if (&other != this) { @@ -1283,8 +1296,8 @@ class table : public std::conditional_t, base_table_type_map, bas // we can only reuse m_buckets when both maps have the same allocator! if (get_allocator() == other.get_allocator()) { - m_buckets = std::exchange(other.m_buckets, nullptr); - m_num_buckets = std::exchange(other.m_num_buckets, 0); + m_buckets = std::move(other.m_buckets); + other.m_buckets.clear(); m_max_bucket_capacity = std::exchange(other.m_max_bucket_capacity, 0); m_shifts = std::exchange(other.m_shifts, initial_shifts); m_max_load_factor = std::exchange(other.m_max_load_factor, default_max_load_factor); @@ -1421,7 +1434,7 @@ class table : public std::conditional_t, base_table_type_map, bas on_error_too_many_elements(); } auto shifts = calc_shifts_for_size(container.size()); - if (0 == m_num_buckets || shifts < m_shifts || container.get_allocator() != m_values.get_allocator()) { + if (0 == bucket_count() || shifts < m_shifts || container.get_allocator() != m_values.get_allocator()) { m_shifts = shifts; deallocate_buckets(); allocate_buckets_from_shift(); @@ -1821,7 +1834,7 @@ class table : public std::conditional_t, base_table_type_map, bas // bucket interface /////////////////////////////////////////////////////// auto bucket_count() const noexcept -> size_t { // NOLINT(modernize-use-nodiscard) - return m_num_buckets; + return m_buckets.size(); } static constexpr auto max_bucket_count() noexcept -> size_t { // NOLINT(modernize-use-nodiscard) @@ -1840,7 +1853,7 @@ class table : public std::conditional_t, base_table_type_map, bas void max_load_factor(float ml) { m_max_load_factor = ml; - if (m_num_buckets != max_bucket_count()) { + if (bucket_count() != max_bucket_count()) { m_max_bucket_capacity = static_cast(static_cast(bucket_count()) * max_load_factor()); } } @@ -1864,7 +1877,7 @@ class table : public std::conditional_t, base_table_type_map, bas m_values.reserve(capa); } auto shifts = calc_shifts_for_size((std::max)(capa, size())); - if (0 == m_num_buckets || shifts < m_shifts) { + if (0 == bucket_count() || shifts < m_shifts) { m_shifts = shifts; deallocate_buckets(); allocate_buckets_from_shift(); From c7fe207770ec4aecd7654d39b2e54111e7f80221 Mon Sep 17 00:00:00 2001 From: Stephan Dollberg Date: Thu, 21 Mar 2024 13:46:17 +0000 Subject: [PATCH 2/2] segment_map: Allow also specifying a custom container for the bucket array The previous patch allowed using the bucket array in segmented mode. This patch expands the series to similarly also allow using a custom container. We are only allowing specifying a custom container and derive a possible custom allocator directly by rebinding the value allocator. This guarantees that they can be converted to each other. --- include/ankerl/unordered_dense.h | 95 +++++++++++++++++++++++--------- test/app/doctest.h | 19 ++++--- test/unit/custom_container.cpp | 4 +- 3 files changed, 83 insertions(+), 35 deletions(-) diff --git a/include/ankerl/unordered_dense.h b/include/ankerl/unordered_dense.h index 7425387..bbc4918 100644 --- a/include/ankerl/unordered_dense.h +++ b/include/ankerl/unordered_dense.h @@ -436,6 +436,7 @@ ANKERL_UNORDERED_DENSE_PACK(struct big { namespace detail { struct nonesuch {}; +struct default_container_t {}; template class Op, class... Args> struct detector { @@ -796,6 +797,7 @@ template class table : public std::conditional_t, base_table_type_map, base_table_type_set> { using underlying_value_type = typename std::conditional_t, std::pair, Key>; @@ -810,9 +812,13 @@ class table : public std::conditional_t, base_table_type_map, bas private: using bucket_alloc = typename std::allocator_traits::template rebind_alloc; - using underlying_bucket_type = + using default_bucket_container_type = std::conditional_t, std::vector>; + using bucket_container_type = std::conditional_t, + default_bucket_container_type, + BucketContainer>; + static constexpr uint8_t initial_shifts = 64 - 2; // 2^(64-m_shift) number of buckets static constexpr float default_max_load_factor = 0.8F; @@ -840,7 +846,7 @@ class table : public std::conditional_t, base_table_type_map, bas static_assert(std::is_trivially_copyable_v, "assert we can just memset / memcpy"); value_container_type m_values{}; // Contains all the key-value pairs in one densely stored container. No holes. - underlying_bucket_type m_buckets{}; + bucket_container_type m_buckets{}; size_t m_max_bucket_capacity = 0; float m_max_load_factor = default_max_load_factor; Hash m_hash{}; @@ -854,11 +860,11 @@ class table : public std::conditional_t, base_table_type_map, bas } // Helper to access bucket through pointer types - [[nodiscard]] static constexpr auto at(underlying_bucket_type& bucket, size_t offset) -> Bucket& { + [[nodiscard]] static constexpr auto at(bucket_container_type& bucket, size_t offset) -> Bucket& { return bucket[offset]; } - [[nodiscard]] static constexpr auto at(const underlying_bucket_type& bucket, size_t offset) -> const Bucket& { + [[nodiscard]] static constexpr auto at(const bucket_container_type& bucket, size_t offset) -> const Bucket& { return bucket[offset]; } @@ -949,7 +955,7 @@ class table : public std::conditional_t, base_table_type_map, bas } else { m_shifts = other.m_shifts; allocate_buckets_from_shift(); - if constexpr (IsSegmented) { + if constexpr (IsSegmented || !std::is_same_v) { for (auto i = 0UL; i < bucket_count(); ++i) { at(m_buckets, i) = at(other.m_buckets, i); } @@ -974,8 +980,10 @@ class table : public std::conditional_t, base_table_type_map, bas void allocate_buckets_from_shift() { auto num_buckets = calc_num_buckets(m_shifts); - if constexpr (IsSegmented) { - m_buckets.reserve(num_buckets); + if constexpr (IsSegmented || !std::is_same_v) { + if constexpr (has_reserve) { + m_buckets.reserve(num_buckets); + } for (size_t i = m_buckets.size(); i < num_buckets; ++i) { m_buckets.emplace_back(); } @@ -991,7 +999,7 @@ class table : public std::conditional_t, base_table_type_map, bas } void clear_buckets() { - if constexpr (IsSegmented) { + if constexpr (IsSegmented || !std::is_same_v) { for (auto&& e : m_buckets) { std::memset(&e, 0, sizeof(e)); } @@ -1019,7 +1027,7 @@ class table : public std::conditional_t, base_table_type_map, bas on_error_bucket_overflow(); } --m_shifts; - if constexpr (!IsSegmented) { + if constexpr (!IsSegmented || std::is_same_v) { deallocate_buckets(); } allocate_buckets_from_shift(); @@ -1938,30 +1946,34 @@ ANKERL_UNORDERED_DENSE_EXPORT template , class KeyEqual = std::equal_to, class AllocatorOrContainer = std::allocator>, - class Bucket = bucket_type::standard> -using map = detail::table; + class Bucket = bucket_type::standard, + class BucketContainer = detail::default_container_t> +using map = detail::table; ANKERL_UNORDERED_DENSE_EXPORT template , class KeyEqual = std::equal_to, class AllocatorOrContainer = std::allocator>, - class Bucket = bucket_type::standard> -using segmented_map = detail::table; + class Bucket = bucket_type::standard, + class BucketContainer = detail::default_container_t> +using segmented_map = detail::table; ANKERL_UNORDERED_DENSE_EXPORT template , class KeyEqual = std::equal_to, class AllocatorOrContainer = std::allocator, - class Bucket = bucket_type::standard> -using set = detail::table; + class Bucket = bucket_type::standard, + class BucketContainer = detail::default_container_t> +using set = detail::table; ANKERL_UNORDERED_DENSE_EXPORT template , class KeyEqual = std::equal_to, class AllocatorOrContainer = std::allocator, - class Bucket = bucket_type::standard> -using segmented_set = detail::table; + class Bucket = bucket_type::standard, + class BucketContainer = detail::default_container_t> +using segmented_set = detail::table; # if defined(ANKERL_UNORDERED_DENSE_PMR) @@ -1972,29 +1984,54 @@ ANKERL_UNORDERED_DENSE_EXPORT template , class KeyEqual = std::equal_to, class Bucket = bucket_type::standard> -using map = - detail::table>, Bucket, false>; +using map = detail::table>, + Bucket, + detail::default_container_t, + false>; ANKERL_UNORDERED_DENSE_EXPORT template , class KeyEqual = std::equal_to, class Bucket = bucket_type::standard> -using segmented_map = - detail::table>, Bucket, true>; +using segmented_map = detail::table>, + Bucket, + detail::default_container_t, + true>; ANKERL_UNORDERED_DENSE_EXPORT template , class KeyEqual = std::equal_to, class Bucket = bucket_type::standard> -using set = detail::table, Bucket, false>; +using set = detail::table, + Bucket, + detail::default_container_t, + false>; ANKERL_UNORDERED_DENSE_EXPORT template , class KeyEqual = std::equal_to, class Bucket = bucket_type::standard> -using segmented_set = - detail::table, Bucket, true>; +using segmented_set = detail::table, + Bucket, + detail::default_container_t, + true>; } // namespace pmr @@ -2019,11 +2056,15 @@ ANKERL_UNORDERED_DENSE_EXPORT template // NOLINTNEXTLINE(cert-dcl58-cpp) -auto erase_if(ankerl::unordered_dense::detail::table& map, - Pred pred) -> size_t { - using map_t = ankerl::unordered_dense::detail::table; +auto erase_if( + ankerl::unordered_dense::detail::table& + map, + Pred pred) -> size_t { + using map_t = ankerl::unordered_dense::detail:: + table; // going back to front because erase() invalidates the end iterator auto const old_size = map.size(); diff --git a/test/app/doctest.h b/test/app/doctest.h index 446f716..2e16581 100644 --- a/test/app/doctest.h +++ b/test/app/doctest.h @@ -29,9 +29,12 @@ template , class KeyEqual = std::equal_to, class AllocatorOrContainer = std::deque>, - class Bucket = ankerl::unordered_dense::bucket_type::standard> -class deque_map : public ankerl::unordered_dense::detail::table { - using base_t = ankerl::unordered_dense::detail::table; + class Bucket = ankerl::unordered_dense::bucket_type::standard, + class BucketContainer = std::deque> +class deque_map : public ankerl::unordered_dense::detail:: + table { + using base_t = + ankerl::unordered_dense::detail::table; using base_t::base_t; }; @@ -39,10 +42,12 @@ template , class KeyEqual = std::equal_to, class AllocatorOrContainer = std::deque, - class Bucket = ankerl::unordered_dense::bucket_type::standard> -class deque_set - : public ankerl::unordered_dense::detail::table { - using base_t = ankerl::unordered_dense::detail::table; + class Bucket = ankerl::unordered_dense::bucket_type::standard, + class BucketContainer = std::deque> +class deque_set : public ankerl::unordered_dense::detail:: + table { + using base_t = ankerl::unordered_dense::detail:: + table; using base_t::base_t; }; diff --git a/test/unit/custom_container.cpp b/test/unit/custom_container.cpp index 84848ff..7ee79ef 100644 --- a/test/unit/custom_container.cpp +++ b/test/unit/custom_container.cpp @@ -15,7 +15,9 @@ TEST_CASE_MAP("custom_container", std::string, ankerl::unordered_dense::hash, std::equal_to, - std::deque>) { + std::deque>, + ankerl::unordered_dense::bucket_type::standard, + std::deque) { auto map = map_t(); for (int i = 0; i < 10; ++i) {