Skip to content

Commit 4f7bda2

Browse files
Merge pull request #11065 from rabbitmq/add-remove-member-improvements
Better handle QQ delete member operation when member already removed
2 parents e158a86 + 02b6274 commit 4f7bda2

File tree

2 files changed

+34
-4
lines changed

2 files changed

+34
-4
lines changed

deps/rabbit/src/rabbit_quorum_queue.erl

+7-4
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@
128128
-define(START_CLUSTER_RPC_TIMEOUT, 60_000). %% needs to be longer than START_CLUSTER_TIMEOUT
129129
-define(TICK_TIMEOUT, 5000). %% the ra server tick time
130130
-define(DELETE_TIMEOUT, 5000).
131-
-define(ADD_MEMBER_TIMEOUT, 5000).
131+
-define(MEMBER_CHANGE_TIMEOUT, 20_000).
132132
-define(SNAPSHOT_INTERVAL, 8192). %% the ra default is 4096
133133
-define(UNLIMITED_PREFETCH_COUNT, 2000). %% something large for ra
134134

@@ -1201,7 +1201,7 @@ add_member(Q, Node) ->
12011201
add_member(Q, Node, promotable).
12021202

12031203
add_member(Q, Node, Membership) ->
1204-
add_member(Q, Node, Membership, ?ADD_MEMBER_TIMEOUT).
1204+
add_member(Q, Node, Membership, ?MEMBER_CHANGE_TIMEOUT).
12051205

12061206
add_member(VHost, Name, Node, Timeout) when is_binary(VHost) ->
12071207
%% NOTE needed to pass mixed cluster tests.
@@ -1278,8 +1278,11 @@ delete_member(Q, Node) when ?amqqueue_is_quorum(Q) ->
12781278
%% deleting the last member is not allowed
12791279
{error, last_node};
12801280
Members ->
1281-
case ra:remove_member(Members, ServerId) of
1282-
{ok, _, _Leader} ->
1281+
case ra:remove_member(Members, ServerId, ?MEMBER_CHANGE_TIMEOUT) of
1282+
Res when element(1, Res) == ok orelse
1283+
Res == {error, not_member} ->
1284+
%% if not a member we can still proceed with updating the
1285+
%% mnesia record and clean up server if still running
12831286
Fun = fun(Q1) ->
12841287
update_type_state(
12851288
Q1,

deps/rabbit/test/quorum_queue_SUITE.erl

+27
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ groups() ->
5959
delete_member_queue_not_found,
6060
delete_member,
6161
delete_member_not_a_member,
62+
delete_member_member_already_deleted,
6263
node_removal_is_quorum_critical]
6364
++ memory_tests()},
6465
{cluster_size_3, [], [
@@ -1954,6 +1955,32 @@ delete_member_not_a_member(Config) ->
19541955
rpc:call(Server, rabbit_quorum_queue, delete_member,
19551956
[<<"/">>, QQ, Server])).
19561957

1958+
delete_member_member_already_deleted(Config) ->
1959+
[Server, Server2] = Servers = rabbit_ct_broker_helpers:get_node_configs(Config, nodename),
1960+
NServers = length(Servers),
1961+
Ch = rabbit_ct_client_helpers:open_channel(Config, Server),
1962+
QQ = ?config(queue_name, Config),
1963+
RaName = ra_name(QQ),
1964+
?assertEqual({'queue.declare_ok', QQ, 0, 0},
1965+
declare(Ch, QQ, [{<<"x-queue-type">>, longstr, <<"quorum">>}])),
1966+
?awaitMatch(NServers, count_online_nodes(Server, <<"/">>, QQ), ?DEFAULT_AWAIT),
1967+
ServerId = {RaName, Server},
1968+
ServerId2 = {RaName, Server2},
1969+
%% use are APU directory to simulate situation where the ra:remove_server/2
1970+
%% call timed out but later succeeded
1971+
?assertMatch(ok,
1972+
rpc:call(Server2, ra, leave_and_terminate,
1973+
[quorum_queues, ServerId, ServerId2])),
1974+
1975+
%% idempotent by design
1976+
?assertEqual(ok,
1977+
rpc:call(Server, rabbit_quorum_queue, delete_member,
1978+
[<<"/">>, QQ, Server2])),
1979+
{ok, Q} = rpc:call(Server, rabbit_amqqueue, lookup, [QQ, <<"/">>]),
1980+
#{nodes := Nodes} = amqqueue:get_type_state(Q),
1981+
?assertEqual(1, length(Nodes)),
1982+
ok.
1983+
19571984
delete_member_during_node_down(Config) ->
19581985
[Server, DownServer, Remove] = Servers = rabbit_ct_broker_helpers:get_node_configs(
19591986
Config, nodename),

0 commit comments

Comments
 (0)