summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSimon MacMullen <simon@rabbitmq.com>2013-03-12 17:56:18 +0000
committerSimon MacMullen <simon@rabbitmq.com>2013-03-12 17:56:18 +0000
commit38f82dfc2e3743f101b2921750ea8d4bd679c5d2 (patch)
treee38bd01d0d69eaba17ea1cb95a75e6241ba8a7ef
parent5dea4c63b56e8b9a7acdf0cedbe5fa051ea5e266 (diff)
downloadrabbitmq-server-38f82dfc2e3743f101b2921750ea8d4bd679c5d2.tar.gz
If we have been partitioned, and we are now in the only remaining partition, we no longer care about partitions - forget them. Note that we do not attempt to deal with individual (other) partitions going away, it's only safe to forget *any* of them when we have seen the back of *all* of them.
-rw-r--r--src/rabbit_node_monitor.erl24
1 files changed, 21 insertions, 3 deletions
diff --git a/src/rabbit_node_monitor.erl b/src/rabbit_node_monitor.erl
index 3d900d26..558596ef 100644
--- a/src/rabbit_node_monitor.erl
+++ b/src/rabbit_node_monitor.erl
@@ -249,7 +249,8 @@ handle_info({'DOWN', _MRef, process, {rabbit, Node}, _Reason},
write_cluster_status({AllNodes, DiscNodes, del_node(Node, RunningNodes)}),
ok = handle_dead_rabbit(Node),
[P ! {node_down, Node} || P <- pmon:monitored(Subscribers)],
- {noreply, State#state{monitors = pmon:erase({rabbit, Node}, Monitors)}};
+ {noreply, handle_dead_rabbit_state(
+ State#state{monitors = pmon:erase({rabbit, Node}, Monitors)})};
handle_info({'DOWN', _MRef, process, Pid, _Reason},
State = #state{subscribers = Subscribers}) ->
@@ -308,9 +309,14 @@ handle_dead_rabbit(Node) ->
ok.
majority() ->
+ length(alive_nodes()) / length(rabbit_mnesia:cluster_nodes(all)) > 0.5.
+
+%% mnesia:system_info(db_nodes) (and hence
+%% rabbit_mnesia:cluster_nodes(running)) does not give reliable results
+%% when partitioned.
+alive_nodes() ->
Nodes = rabbit_mnesia:cluster_nodes(all),
- Alive = [N || N <- Nodes, pong =:= net_adm:ping(N)],
- length(Alive) / length(Nodes) > 0.5.
+ [N || N <- Nodes, pong =:= net_adm:ping(N)].
await_cluster_recovery() ->
rabbit_log:warning("Cluster minority status detected - awaiting recovery~n",
@@ -334,6 +340,18 @@ wait_for_cluster_recovery(Nodes) ->
wait_for_cluster_recovery(Nodes)
end.
+handle_dead_rabbit_state(State = #state{partitions = Partitions}) ->
+ %% If we have been partitioned, and we are now in the only remaining
+ %% partition, we no longer care about partitions - forget them. Note
+ %% that we do not attempt to deal with individual (other) partitions
+ %% going away, it's only safe to forget *any* of them when we have seen
+ %% the back of *all* of them.
+ Partitions1 = case Partitions -- (Partitions -- alive_nodes()) of
+ [] -> [];
+ _ -> Partitions
+ end,
+ State#state{partitions = Partitions1}.
+
handle_live_rabbit(Node) ->
ok = rabbit_alarm:on_node_up(Node),
ok = rabbit_mnesia:on_node_up(Node).