diff options
author | Jean-Sebastien Pedron <jean-sebastien@rabbitmq.com> | 2014-12-09 19:14:28 +0100 |
---|---|---|
committer | Jean-Sebastien Pedron <jean-sebastien@rabbitmq.com> | 2014-12-09 19:14:28 +0100 |
commit | a755fcfcd6bfd9435534de2dc07288144bdf85a9 (patch) | |
tree | 3bde494cd06a95b675af77ad49c9aaf75de940d9 /src/rabbit_autoheal.erl | |
parent | 1749f8c8e0c65a4c09df720ade033706c6d37468 (diff) | |
download | rabbitmq-server-a755fcfcd6bfd9435534de2dc07288144bdf85a9.tar.gz |
Autoheal: The loosing leader must wait for the winner_is message
As any other loosing nodes, the leader must wait for the winner_is
message, instead of restarting immediately.
The previous behaviour caused transient failures in the autoheal process
if the leader was in the middle of the restart at the time the winner
checks that all loosing nodes are up and running.
Diffstat (limited to 'src/rabbit_autoheal.erl')
-rw-r--r-- | src/rabbit_autoheal.erl | 9 |
1 files changed, 7 insertions, 2 deletions
diff --git a/src/rabbit_autoheal.erl b/src/rabbit_autoheal.erl index 90458741..7089911c 100644 --- a/src/rabbit_autoheal.erl +++ b/src/rabbit_autoheal.erl @@ -54,6 +54,10 @@ %% - we are the winner and are waiting for all losing nodes to stop %% before telling them they can restart %% +%% about_to_heal +%% - we are the leader, and have already assigned the winner and losers. +%% We are part of the losers and we wait for the winner_is announcement. +%% %% {leader_waiting, OutstandingStops} %% - we are the leader, and have already assigned the winner and losers. %% We are neither but need to ignore further requests to autoheal. @@ -135,7 +139,7 @@ handle_msg({request_start, Node}, true -> Continue({become_winner, Losers}); false -> send(Winner, {become_winner, Losers}), %% [0] case lists:member(node(), Losers) of - true -> Continue({winner_is, Winner}); + true -> about_to_heal; false -> {leader_waiting, Losers} end end @@ -163,7 +167,8 @@ handle_msg({become_winner, Losers}, end; handle_msg({winner_is, Winner}, - not_healing, _Partitions) -> + State, _Partitions) + when State =:= not_healing orelse State =:= about_to_heal -> rabbit_log:warning( "Autoheal: we were selected to restart; winner is ~p~n", [Winner]), rabbit_node_monitor:run_outside_applications( |