Skip to content

Commit 4a8bbeb

Browse files
segment_map: Also segment the bucket array
In segmented mode we only applied the segmenting to the values array but not the bucket array. As a result there the pattern of there still being a deallocation followed by an increased allocation when resizing the hash map continues to exist. Further, in environments where the max allocation size is limited because of fragmentation issues this can lead to problems. To avoid both of these issues this patch makes the bucket array use the same datastructure as the values array, i.e.: a `std::vector` when linear and `segmented_vector` when segmented (or the passed datastructure if specified). This extra indirection does add some overhead in the segmented case. Looking at the quick benchmarks we see: Before: ``` | ns/op | op/s | err% | ins/op | cyc/op | IPC | bra/op | miss% | total | benchmarking |--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:------------- | 8,912,995.09 | 112.20 | 0.1% | 225,712,537.08 | 26,628,198.00 | 8.476 | 25,133,812.23 | 0.1% | 1.15 | `ankerl::unordered_dense::map<uint64_t, size_t> segmented_vector iterate while adding then removing` | 65,440,597.50 | 15.28 | 0.1% | 496,971,523.50 | 195,721,929.00 | 2.539 | 64,749,156.50 | 11.2% | 1.44 | `ankerl::unordered_dense::map<uint64_t, size_t> segmented_vector random insert erase` | 63,254,162.50 | 15.81 | 0.1% | 540,753,642.50 | 188,790,381.00 | 2.864 | 101,168,500.00 | 6.3% | 1.39 | `ankerl::unordered_dense::map<uint64_t, size_t> segmented_vector 50% probability to find` | 9,777,270.50 | 102.28 | 0.2% | 281,149,360.00 | 28,833,467.00 | 9.751 | 25,968,567.75 | 0.1% | 1.19 | `ankerl::unordered_dense::map<std::string, size_t> segmented_vector iterate while adding then removing` | 220,368,952.00 | 4.54 | 0.2% |2,707,978,150.00 | 659,198,358.00 | 4.108 | 347,649,399.00 | 3.8% | 2.43 | `ankerl::unordered_dense::map<std::string, size_t> segmented_vector random insert erase` | 156,887,435.00 | 6.37 | 0.1% |2,166,844,490.00 | 464,728,290.00 | 4.663 | 266,835,027.00 | 2.5% | 1.73 | `ankerl::unordered_dense::map<std::string, size_t> segmented_vector 50% probability to find` ``` After: ``` | ns/op | op/s | err% | ins/op | cyc/op | IPC | bra/op | miss% | total | benchmarking |--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:------------- | 8,921,748.31 | 112.09 | 0.1% | 226,313,644.69 | 26,684,106.00 | 8.481 | 25,174,702.92 | 0.1% | 1.18 | `ankerl::unordered_dense::map<uint64_t, size_t> segmented_vector iterate while adding then removing` | 75,578,500.00 | 13.23 | 0.1% | 597,036,791.50 | 226,059,912.00 | 2.641 | 64,865,689.00 | 11.3% | 1.14 | `ankerl::unordered_dense::map<uint64_t, size_t> segmented_vector random insert erase` | 74,928,542.00 | 13.35 | 0.1% | 677,557,943.00 | 223,726,152.00 | 3.029 | 91,606,575.00 | 7.0% | 1.13 | `ankerl::unordered_dense::map<uint64_t, size_t> segmented_vector 50% probability to find` | 10,079,993.00 | 99.21 | 0.4% | 293,716,069.73 | 29,697,236.40 | 9.890 | 25,980,823.83 | 0.1% | 1.20 | `ankerl::unordered_dense::map<std::string, size_t> segmented_vector iterate while adding then removing` | 220,081,085.00 | 4.54 | 0.1% |2,721,992,469.00 | 658,245,042.00 | 4.135 | 345,686,575.00 | 3.8% | 2.42 | `ankerl::unordered_dense::map<std::string, size_t> segmented_vector random insert erase` | 158,126,693.00 | 6.32 | 0.1% |2,191,768,626.00 | 468,710,736.00 | 4.676 | 267,938,632.00 | 2.5% | 1.74 | `ankerl::unordered_dense::map<std::string, size_t> segmented_vector 50% probability to find` ``` If we think this is not unconditionally acceptable then we could possibly add another template parameter (or make IsSegmented an enum) to decide which parts are supposed to be segmented. Fixes martinus#94
1 parent d911053 commit 4a8bbeb

File tree

1 file changed

+47
-34
lines changed

1 file changed

+47
-34
lines changed

include/ankerl/unordered_dense.h

+47-34
Original file line numberDiff line numberDiff line change
@@ -810,7 +810,8 @@ class table : public std::conditional_t<is_map_v<T>, base_table_type_map<T>, bas
810810
private:
811811
using bucket_alloc =
812812
typename std::allocator_traits<typename value_container_type::allocator_type>::template rebind_alloc<Bucket>;
813-
using bucket_alloc_traits = std::allocator_traits<bucket_alloc>;
813+
using underlying_bucket_type =
814+
std::conditional_t<IsSegmented, segmented_vector<Bucket, bucket_alloc>, std::vector<Bucket, bucket_alloc>>;
814815

815816
static constexpr uint8_t initial_shifts = 64 - 2; // 2^(64-m_shift) number of buckets
816817
static constexpr float default_max_load_factor = 0.8F;
@@ -839,24 +840,26 @@ class table : public std::conditional_t<is_map_v<T>, base_table_type_map<T>, bas
839840
static_assert(std::is_trivially_copyable_v<Bucket>, "assert we can just memset / memcpy");
840841

841842
value_container_type m_values{}; // Contains all the key-value pairs in one densely stored container. No holes.
842-
using bucket_pointer = typename std::allocator_traits<bucket_alloc>::pointer;
843-
bucket_pointer m_buckets{};
844-
size_t m_num_buckets = 0;
843+
underlying_bucket_type m_buckets{};
845844
size_t m_max_bucket_capacity = 0;
846845
float m_max_load_factor = default_max_load_factor;
847846
Hash m_hash{};
848847
KeyEqual m_equal{};
849848
uint8_t m_shifts = initial_shifts;
850849

851850
[[nodiscard]] auto next(value_idx_type bucket_idx) const -> value_idx_type {
852-
return ANKERL_UNORDERED_DENSE_UNLIKELY(bucket_idx + 1U == m_num_buckets)
851+
return ANKERL_UNORDERED_DENSE_UNLIKELY(bucket_idx + 1U == bucket_count())
853852
? 0
854853
: static_cast<value_idx_type>(bucket_idx + 1U);
855854
}
856855

857856
// Helper to access bucket through pointer types
858-
[[nodiscard]] static constexpr auto at(bucket_pointer bucket_ptr, size_t offset) -> Bucket& {
859-
return *(bucket_ptr + static_cast<typename std::allocator_traits<bucket_alloc>::difference_type>(offset));
857+
[[nodiscard]] static constexpr auto at(underlying_bucket_type& bucket, size_t offset) -> Bucket& {
858+
return bucket[offset];
859+
}
860+
861+
[[nodiscard]] static constexpr auto at(const underlying_bucket_type& bucket, size_t offset) -> const Bucket& {
862+
return bucket[offset];
860863
}
861864

862865
// use the dist_inc and dist_dec functions so that uint16_t types work without warning
@@ -946,7 +949,13 @@ class table : public std::conditional_t<is_map_v<T>, base_table_type_map<T>, bas
946949
} else {
947950
m_shifts = other.m_shifts;
948951
allocate_buckets_from_shift();
949-
std::memcpy(m_buckets, other.m_buckets, sizeof(Bucket) * bucket_count());
952+
if constexpr (IsSegmented) {
953+
for (auto i = 0UL; i < bucket_count(); ++i) {
954+
at(m_buckets, i) = at(other.m_buckets, i);
955+
}
956+
} else {
957+
std::memcpy(m_buckets.data(), other.m_buckets.data(), sizeof(Bucket) * bucket_count());
958+
}
950959
}
951960
}
952961

@@ -958,30 +967,36 @@ class table : public std::conditional_t<is_map_v<T>, base_table_type_map<T>, bas
958967
}
959968

960969
void deallocate_buckets() {
961-
auto ba = bucket_alloc(m_values.get_allocator());
962-
if (nullptr != m_buckets) {
963-
bucket_alloc_traits::deallocate(ba, m_buckets, bucket_count());
964-
m_buckets = nullptr;
965-
}
966-
m_num_buckets = 0;
970+
m_buckets.clear();
971+
m_buckets.shrink_to_fit();
967972
m_max_bucket_capacity = 0;
968973
}
969974

970975
void allocate_buckets_from_shift() {
971-
auto ba = bucket_alloc(m_values.get_allocator());
972-
m_num_buckets = calc_num_buckets(m_shifts);
973-
m_buckets = bucket_alloc_traits::allocate(ba, m_num_buckets);
974-
if (m_num_buckets == max_bucket_count()) {
976+
auto num_buckets = calc_num_buckets(m_shifts);
977+
if constexpr (IsSegmented) {
978+
m_buckets.reserve(num_buckets);
979+
for (size_t i = m_buckets.size(); i < num_buckets; ++i) {
980+
m_buckets.emplace_back();
981+
}
982+
} else {
983+
m_buckets.resize(num_buckets);
984+
}
985+
if (num_buckets == max_bucket_count()) {
975986
// reached the maximum, make sure we can use each bucket
976987
m_max_bucket_capacity = max_bucket_count();
977988
} else {
978-
m_max_bucket_capacity = static_cast<value_idx_type>(static_cast<float>(m_num_buckets) * max_load_factor());
989+
m_max_bucket_capacity = static_cast<value_idx_type>(static_cast<float>(num_buckets) * max_load_factor());
979990
}
980991
}
981992

982993
void clear_buckets() {
983-
if (m_buckets != nullptr) {
984-
std::memset(&*m_buckets, 0, sizeof(Bucket) * bucket_count());
994+
if constexpr (IsSegmented) {
995+
for (auto&& e : m_buckets) {
996+
std::memset(&e, 0, sizeof(e));
997+
}
998+
} else {
999+
std::memset(m_buckets.data(), 0, sizeof(Bucket) * bucket_count());
9851000
}
9861001
}
9871002

@@ -1004,7 +1019,9 @@ class table : public std::conditional_t<is_map_v<T>, base_table_type_map<T>, bas
10041019
on_error_bucket_overflow();
10051020
}
10061021
--m_shifts;
1007-
deallocate_buckets();
1022+
if constexpr (!IsSegmented) {
1023+
deallocate_buckets();
1024+
}
10081025
allocate_buckets_from_shift();
10091026
clear_and_fill_buckets_from_values();
10101027
}
@@ -1178,6 +1195,7 @@ class table : public std::conditional_t<is_map_v<T>, base_table_type_map<T>, bas
11781195
KeyEqual const& equal = KeyEqual(),
11791196
allocator_type const& alloc_or_container = allocator_type())
11801197
: m_values(alloc_or_container)
1198+
, m_buckets(alloc_or_container)
11811199
, m_hash(hash)
11821200
, m_equal(equal) {
11831201
if (0 != bucket_count) {
@@ -1253,12 +1271,7 @@ class table : public std::conditional_t<is_map_v<T>, base_table_type_map<T>, bas
12531271
table(std::initializer_list<value_type> init, size_type bucket_count, Hash const& hash, allocator_type const& alloc)
12541272
: table(init, bucket_count, hash, KeyEqual(), alloc) {}
12551273

1256-
~table() {
1257-
if (nullptr != m_buckets) {
1258-
auto ba = bucket_alloc(m_values.get_allocator());
1259-
bucket_alloc_traits::deallocate(ba, m_buckets, bucket_count());
1260-
}
1261-
}
1274+
~table() {}
12621275

12631276
auto operator=(table const& other) -> table& {
12641277
if (&other != this) {
@@ -1283,8 +1296,8 @@ class table : public std::conditional_t<is_map_v<T>, base_table_type_map<T>, bas
12831296

12841297
// we can only reuse m_buckets when both maps have the same allocator!
12851298
if (get_allocator() == other.get_allocator()) {
1286-
m_buckets = std::exchange(other.m_buckets, nullptr);
1287-
m_num_buckets = std::exchange(other.m_num_buckets, 0);
1299+
m_buckets = std::move(other.m_buckets);
1300+
other.m_buckets.clear();
12881301
m_max_bucket_capacity = std::exchange(other.m_max_bucket_capacity, 0);
12891302
m_shifts = std::exchange(other.m_shifts, initial_shifts);
12901303
m_max_load_factor = std::exchange(other.m_max_load_factor, default_max_load_factor);
@@ -1421,7 +1434,7 @@ class table : public std::conditional_t<is_map_v<T>, base_table_type_map<T>, bas
14211434
on_error_too_many_elements();
14221435
}
14231436
auto shifts = calc_shifts_for_size(container.size());
1424-
if (0 == m_num_buckets || shifts < m_shifts || container.get_allocator() != m_values.get_allocator()) {
1437+
if (0 == bucket_count() || shifts < m_shifts || container.get_allocator() != m_values.get_allocator()) {
14251438
m_shifts = shifts;
14261439
deallocate_buckets();
14271440
allocate_buckets_from_shift();
@@ -1821,7 +1834,7 @@ class table : public std::conditional_t<is_map_v<T>, base_table_type_map<T>, bas
18211834
// bucket interface ///////////////////////////////////////////////////////
18221835

18231836
auto bucket_count() const noexcept -> size_t { // NOLINT(modernize-use-nodiscard)
1824-
return m_num_buckets;
1837+
return m_buckets.size();
18251838
}
18261839

18271840
static constexpr auto max_bucket_count() noexcept -> size_t { // NOLINT(modernize-use-nodiscard)
@@ -1840,7 +1853,7 @@ class table : public std::conditional_t<is_map_v<T>, base_table_type_map<T>, bas
18401853

18411854
void max_load_factor(float ml) {
18421855
m_max_load_factor = ml;
1843-
if (m_num_buckets != max_bucket_count()) {
1856+
if (bucket_count() != max_bucket_count()) {
18441857
m_max_bucket_capacity = static_cast<value_idx_type>(static_cast<float>(bucket_count()) * max_load_factor());
18451858
}
18461859
}
@@ -1864,7 +1877,7 @@ class table : public std::conditional_t<is_map_v<T>, base_table_type_map<T>, bas
18641877
m_values.reserve(capa);
18651878
}
18661879
auto shifts = calc_shifts_for_size((std::max)(capa, size()));
1867-
if (0 == m_num_buckets || shifts < m_shifts) {
1880+
if (0 == bucket_count() || shifts < m_shifts) {
18681881
m_shifts = shifts;
18691882
deallocate_buckets();
18701883
allocate_buckets_from_shift();

0 commit comments

Comments
 (0)