diff options
author | Michael Klishin <mklishin@pivotal.io> | 2020-02-25 19:06:34 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-02-25 19:06:34 +0300 |
commit | 5ce141c9f5e50aaa61880ee54aff7f07287d2707 (patch) | |
tree | c03ad96a8372b353244f18c76e49ca3b7c05f6b8 | |
parent | 1cc662cf0d02d34f6f7b71b046f3859b47464e2a (diff) | |
parent | 5e99e875363060f759e1391b8f8fa102fd7edcb5 (diff) | |
download | rabbitmq-server-git-5ce141c9f5e50aaa61880ee54aff7f07287d2707.tar.gz |
Merge pull request #2255 from rabbitmq/qq-remove-member-fix
Split QQ remove member into two operations
-rw-r--r-- | src/rabbit_quorum_queue.erl | 51 | ||||
-rw-r--r-- | test/quorum_queue_SUITE.erl | 19 |
2 files changed, 65 insertions, 5 deletions
diff --git a/src/rabbit_quorum_queue.erl b/src/rabbit_quorum_queue.erl index 7e9974b27a..972716a396 100644 --- a/src/rabbit_quorum_queue.erl +++ b/src/rabbit_quorum_queue.erl @@ -46,6 +46,9 @@ filter_quorum_critical/1, filter_quorum_critical/2, all_replica_states/0]). -export([is_policy_applicable/2]). +-export([repair_amqqueue_nodes/1, + repair_amqqueue_nodes/2 + ]). -include_lib("stdlib/include/qlc.hrl"). -include("rabbit.hrl"). @@ -376,7 +379,38 @@ repair_leader_record(QName, Self) -> end, ok. - +repair_amqqueue_nodes(VHost, QueueName) -> + QName = #resource{virtual_host = VHost, name = QueueName, kind = queue}, + repair_amqqueue_nodes(QName). + +-spec repair_amqqueue_nodes(rabbit_types:r('queue') | amqqueue:amqqueue()) -> + ok | repaired. +repair_amqqueue_nodes(QName = #resource{}) -> + {ok, Q0} = rabbit_amqqueue:lookup(QName), + repair_amqqueue_nodes(Q0); +repair_amqqueue_nodes(Q0) -> + QName = amqqueue:get_name(Q0), + Leader = amqqueue:get_pid(Q0), + {ok, Members, _} = ra:members(Leader), + RaNodes = [N || {_, N} <- Members], + #{nodes := Nodes} = amqqueue:get_type_state(Q0), + case lists:sort(RaNodes) =:= lists:sort(Nodes) of + true -> + %% up to date + ok; + false -> + %% update amqqueue record + Fun = fun (Q) -> + TS0 = amqqueue:get_type_state(Q), + TS = TS0#{nodes => RaNodes}, + amqqueue:set_type_state(Q, TS) + end, + rabbit_misc:execute_mnesia_transaction( + fun() -> + rabbit_amqqueue:update(QName, Fun) + end), + repaired + end. reductions(Name) -> try @@ -899,8 +933,8 @@ delete_member(Q, Node) when ?amqqueue_is_quorum(Q) -> %% deleting the last member is not allowed {error, last_node}; Members -> - case ra:leave_and_delete_server(Members, ServerId) of - ok -> + case ra:remove_member(Members, ServerId) of + {ok, _, _Leader} -> Fun = fun(Q1) -> update_type_state( Q1, @@ -910,8 +944,15 @@ delete_member(Q, Node) when ?amqqueue_is_quorum(Q) -> end, rabbit_misc:execute_mnesia_transaction( fun() -> rabbit_amqqueue:update(QName, Fun) end), - ok; - timeout -> + case ra:force_delete_server(ServerId) of + ok -> + ok; + {error, _} = Err -> + Err; + Err -> + {error, Err} + end; + {timeout, _} -> {error, timeout}; E -> E diff --git a/test/quorum_queue_SUITE.erl b/test/quorum_queue_SUITE.erl index 1c96517c85..64a2dc4205 100644 --- a/test/quorum_queue_SUITE.erl +++ b/test/quorum_queue_SUITE.erl @@ -70,6 +70,7 @@ groups() -> recover_from_multiple_failures, leadership_takeover, delete_declare, + delete_member_during_node_down, metrics_cleanup_on_leadership_takeover, metrics_cleanup_on_leader_crash, consume_in_minority, @@ -1413,6 +1414,24 @@ delete_member_not_a_member(Config) -> rpc:call(Server, rabbit_quorum_queue, delete_member, [<<"/">>, QQ, Server])). +delete_member_during_node_down(Config) -> + [Server, DownServer, _] = rabbit_ct_broker_helpers:get_node_configs( + Config, nodename), + + stop_node(Config, DownServer), + Ch = rabbit_ct_client_helpers:open_channel(Config, Server), + QQ = ?config(queue_name, Config), + ?assertEqual({'queue.declare_ok', QQ, 0, 0}, + declare(Ch, QQ, [{<<"x-queue-type">>, longstr, <<"quorum">>}])), + timer:sleep(200), + ?assertEqual(ok, rpc:call(Server, rabbit_quorum_queue, delete_member, + [<<"/">>, QQ, Server])), + + rabbit_ct_broker_helpers:start_node(Config, DownServer), + ?assertEqual(ok, rpc:call(Server, rabbit_quorum_queue, repair_amqqueue_nodes, + [<<"/">>, QQ])), + ok. + %% These tests check if node removal would cause any queues to lose (or not lose) %% their quorum. See rabbitmq/rabbitmq-cli#389 for background. |