Skip to content

Commit c22a4d7

Browse files
mmaslankaprvrockwotj
authored andcommitted
c/leader_balancer: replaced muted group index with roaring bitmap
The muted group index was using a `absl::flat_has_set` the use of flat data structure lead to oversized allocation when the number of muted/skipped raft group was large. Replaced a hash set with roaring bitmap. The roaring bitmap is efficient and compressed leading to better performance and smaller allocation. Fixes: redpanda-data#12005 Signed-off-by: Michal Maslanka <[email protected]>
1 parent a89f241 commit c22a4d7

11 files changed

+44
-35
lines changed

src/v/cluster/scheduling/leader_balancer.cc

+4-4
Original file line numberDiff line numberDiff line change
@@ -590,11 +590,11 @@ absl::flat_hash_set<model::node_id> leader_balancer::muted_nodes() const {
590590
return nodes;
591591
}
592592

593-
absl::flat_hash_set<raft::group_id> leader_balancer::muted_groups() const {
594-
absl::flat_hash_set<raft::group_id> res;
595-
res.reserve(_muted.size());
593+
leader_balancer_types::muted_groups_t leader_balancer::muted_groups() const {
594+
leader_balancer_types::muted_groups_t res;
595+
596596
for (const auto& e : _muted) {
597-
res.insert(e.first);
597+
res.add(static_cast<uint64_t>(e.first));
598598
}
599599
return res;
600600
}

src/v/cluster/scheduling/leader_balancer.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ class leader_balancer {
106106
leader_balancer_types::group_id_to_topic_revision_t
107107
build_group_id_to_topic_rev() const;
108108
index_type build_index();
109-
absl::flat_hash_set<raft::group_id> muted_groups() const;
109+
leader_balancer_types::muted_groups_t muted_groups() const;
110110
absl::flat_hash_set<model::node_id> muted_nodes() const;
111111

112112
ss::future<bool> do_transfer(reassignment);

src/v/cluster/scheduling/leader_balancer_constraints.cc

+4-2
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,8 @@ even_topic_distributon_constraint::recommended_reassignment() {
115115
const auto& replicas = g_info.replicas;
116116

117117
// Don't try moving any groups that are currently muted.
118-
if (mi().muted_groups().contains(group)) {
118+
if (mi().muted_groups().contains(
119+
static_cast<uint64_t>(group))) {
119120
continue;
120121
}
121122

@@ -269,7 +270,8 @@ even_shard_load_constraint::recommended_reassignment() {
269270
// Consider each group from high load core, and record the
270271
// reassignment involving the lowest load "to" core.
271272
for (const auto& group : from->second) {
272-
if (mi().muted_groups().contains(group.first)) {
273+
if (mi().muted_groups().contains(
274+
static_cast<uint64_t>(group.first))) {
273275
continue;
274276
}
275277

src/v/cluster/scheduling/leader_balancer_constraints.h

+5-5
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ class muted_index final : public index {
6868
public:
6969
muted_index(
7070
absl::flat_hash_set<model::node_id> muted_nodes,
71-
absl::flat_hash_set<raft::group_id> muted_groups)
71+
leader_balancer_types::muted_groups_t muted_groups)
7272
: _muted_nodes(std::move(muted_nodes))
7373
, _muted_groups(std::move(muted_groups)) {}
7474

@@ -80,25 +80,25 @@ class muted_index final : public index {
8080
return _muted_nodes;
8181
}
8282

83-
const absl::flat_hash_set<raft::group_id>& muted_groups() const {
83+
const leader_balancer_types::muted_groups_t& muted_groups() const {
8484
return _muted_groups;
8585
}
8686

87-
absl::flat_hash_set<raft::group_id>& muted_groups() {
87+
leader_balancer_types::muted_groups_t& muted_groups() {
8888
return _muted_groups;
8989
}
9090

9191
void update_index(const reassignment&) override {
9292
// nothing to do here.
9393
}
9494

95-
void update_muted_groups(absl::flat_hash_set<raft::group_id> new_groups) {
95+
void update_muted_groups(leader_balancer_types::muted_groups_t new_groups) {
9696
_muted_groups = std::move(new_groups);
9797
}
9898

9999
private:
100100
absl::flat_hash_set<model::node_id> _muted_nodes;
101-
absl::flat_hash_set<raft::group_id> _muted_groups;
101+
leader_balancer_types::muted_groups_t _muted_groups;
102102
};
103103

104104
/*

src/v/cluster/scheduling/leader_balancer_greedy.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ class greedy_balanced_shards final : public leader_balancer_strategy {
3333
explicit greedy_balanced_shards(
3434
index_type cores, absl::flat_hash_set<model::node_id> muted_nodes)
3535
: _mi(std::make_unique<leader_balancer_types::muted_index>(
36-
std::move(muted_nodes), absl::flat_hash_set<raft::group_id>{}))
36+
std::move(muted_nodes), leader_balancer_types::muted_groups_t{}))
3737
, _even_shard_load_c(
3838
leader_balancer_types::shard_index{std::move(cores)}, *_mi) {}
3939

@@ -55,7 +55,7 @@ class greedy_balanced_shards final : public leader_balancer_strategy {
5555
* muted node should not be touched in case the mute is temporary.
5656
*/
5757
std::optional<reassignment>
58-
find_movement(const absl::flat_hash_set<raft::group_id>& skip) final {
58+
find_movement(const leader_balancer_types::muted_groups_t& skip) final {
5959
_mi->update_muted_groups(skip);
6060
return _even_shard_load_c.recommended_reassignment();
6161
}

src/v/cluster/scheduling/leader_balancer_random.h

+3-2
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525

2626
#include <cmath>
2727
#include <cstddef>
28+
#include <cstdint>
2829
#include <deque>
2930
#include <functional>
3031
#include <memory>
@@ -119,7 +120,7 @@ class random_hill_climbing_strategy final : public leader_balancer_strategy {
119120
* Find a group reassignment that reduces total error.
120121
*/
121122
std::optional<reassignment>
122-
find_movement(const absl::flat_hash_set<raft::group_id>& skip) override {
123+
find_movement(const leader_balancer_types::muted_groups_t& skip) override {
123124
for (;;) {
124125
auto reassignment_opt = _reassignments.generate_reassignment();
125126

@@ -129,7 +130,7 @@ class random_hill_climbing_strategy final : public leader_balancer_strategy {
129130

130131
auto reassignment = *reassignment_opt;
131132
if (
132-
skip.contains(reassignment.group)
133+
skip.contains(static_cast<uint64_t>(reassignment.group))
133134
|| _mi->muted_nodes().contains(reassignment.from.node_id)
134135
|| _mi->muted_nodes().contains(reassignment.to.node_id)) {
135136
continue;

src/v/cluster/scheduling/leader_balancer_strategy.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ class leader_balancer_strategy {
5858
* Find a group reassignment that reduces total error.
5959
*/
6060
virtual std::optional<reassignment>
61-
find_movement(const absl::flat_hash_set<raft::group_id>& skip) = 0;
61+
find_movement(const leader_balancer_types::muted_groups_t& skip) = 0;
6262

6363
virtual void apply_movement(const reassignment& reassignment) = 0;
6464

src/v/cluster/scheduling/leader_balancer_types.h

+3
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
#include <absl/container/btree_map.h>
1818
#include <absl/container/flat_hash_map.h>
1919
#include <absl/container/flat_hash_set.h>
20+
#include <absl/container/node_hash_map.h>
21+
#include <roaring/roaring64map.hh>
2022

2123
namespace cluster::leader_balancer_types {
2224

@@ -43,6 +45,7 @@ using index_type = absl::node_hash_map<
4345
using group_id_to_topic_revision_t
4446
= absl::btree_map<raft::group_id, model::revision_id>;
4547

48+
using muted_groups_t = roaring::Roaring64Map;
4649
/*
4750
* Leaders per shard.
4851
*/

src/v/cluster/tests/leader_balancer_constraints_test.cc

+3-2
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
* the Business Source License, use of this software will be governed
99
* by the Apache License, Version 2.0
1010
*/
11+
#include <cstdint>
1112
#define BOOST_TEST_MODULE leader_balancer_constraints
1213

1314
#include "cluster/scheduling/leader_balancer_constraints.h"
@@ -510,7 +511,7 @@ BOOST_AUTO_TEST_CASE(topic_skew_error) {
510511
auto rhc = lbt::random_hill_climbing_strategy(
511512
shard_index.shards(), g_id_to_t_id, lbt::muted_index{{}, {}});
512513

513-
absl::flat_hash_set<raft::group_id> muted_groups{};
514+
cluster::leader_balancer_types::muted_groups_t muted_groups{};
514515

515516
auto pre_topic_error = even_topic_con.error();
516517
auto pre_shard_error = even_shard_con.error();
@@ -524,7 +525,7 @@ BOOST_AUTO_TEST_CASE(topic_skew_error) {
524525
rhc.apply_movement(*movement_opt);
525526
even_shard_con.update_index(*movement_opt);
526527
even_topic_con.update_index(*movement_opt);
527-
muted_groups.insert(movement_opt->group);
528+
muted_groups.add(static_cast<uint64_t>(movement_opt->group));
528529

529530
auto new_error = rhc.error();
530531
BOOST_REQUIRE(new_error <= current_error);

src/v/cluster/tests/leader_balancer_constraints_utils.h

+9-7
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,11 @@ static bool no_movement(
150150
const absl::flat_hash_set<int>& muted = {},
151151
const absl::flat_hash_set<raft::group_id>& skip = {}) {
152152
auto [shard_index, muted_index] = from_spec(spec, muted);
153-
muted_index.update_muted_groups(skip);
153+
cluster::leader_balancer_types::muted_groups_t skip_typed;
154+
for (auto& s : skip) {
155+
skip_typed.add(static_cast<uint64_t>(s));
156+
}
157+
muted_index.update_muted_groups(skip_typed);
154158
auto balancer = lbt::even_shard_load_constraint(shard_index, muted_index);
155159

156160
return !balancer.recommended_reassignment();
@@ -175,12 +179,10 @@ static ss::sstring expect_movement(
175179
const absl::flat_hash_set<int>& skip = {}) {
176180
auto [shard_index, mute_index] = from_spec(spec, muted);
177181

178-
absl::flat_hash_set<raft::group_id> skip_typed;
179-
std::transform(
180-
skip.begin(),
181-
skip.end(),
182-
std::inserter(skip_typed, skip_typed.begin()),
183-
[](auto groupid) { return raft::group_id{groupid}; });
182+
cluster::leader_balancer_types::muted_groups_t skip_typed;
183+
for (auto& s : skip) {
184+
skip_typed.add(static_cast<uint64_t>(s));
185+
}
184186

185187
mute_index.update_muted_groups(std::move(skip_typed));
186188
auto balancer = lbt::even_shard_load_constraint(shard_index, mute_index);

src/v/cluster/tests/leader_balancer_test.cc

+9-9
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@
88
* the Business Source License, use of this software will be governed
99
* by the Apache License, Version 2.0
1010
*/
11+
#include "cluster/scheduling/leader_balancer_types.h"
12+
13+
#include <cstdint>
1114
#define BOOST_TEST_MODULE leader_balancer
1215

1316
#include "absl/container/flat_hash_map.h"
@@ -152,7 +155,7 @@ static auto from_spec(
152155
bool no_movement(
153156
const cluster_spec& spec,
154157
const absl::flat_hash_set<int>& muted = {},
155-
const absl::flat_hash_set<raft::group_id>& skip = {}) {
158+
const cluster::leader_balancer_types::muted_groups_t& skip = {}) {
156159
auto [_, balancer] = from_spec(spec, muted);
157160
return !balancer.find_movement(skip);
158161
}
@@ -206,12 +209,10 @@ ss::sstring expect_movement(
206209
const absl::flat_hash_set<int>& skip = {}) {
207210
auto [index, balancer] = from_spec(spec, muted);
208211

209-
absl::flat_hash_set<raft::group_id> skip_typed;
210-
std::transform(
211-
skip.begin(),
212-
skip.end(),
213-
std::inserter(skip_typed, skip_typed.begin()),
214-
[](auto groupid) { return raft::group_id{groupid}; });
212+
cluster::leader_balancer_types::muted_groups_t skip_typed;
213+
for (auto s : skip) {
214+
skip_typed.add(static_cast<uint64_t>(s));
215+
}
215216

216217
auto reassignment = balancer.find_movement(skip_typed);
217218

@@ -357,7 +358,6 @@ BOOST_AUTO_TEST_CASE(greedy_skip) {
357358
expect_movement(spec, re(5, 1, 2), {}, {1, 2, 3, 4}), "");
358359

359360
// mute node 0 and skip all groups on node 1, no movement
360-
absl::flat_hash_set<raft::group_id> skip{
361-
raft::group_id(5), raft::group_id(6)};
361+
cluster::leader_balancer_types::muted_groups_t skip{5, 6};
362362
BOOST_REQUIRE(no_movement(spec, {0}, skip));
363363
}

0 commit comments

Comments
 (0)