summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEmile Joubert <emile@rabbitmq.com>2013-03-18 16:20:24 +0000
committerEmile Joubert <emile@rabbitmq.com>2013-03-18 16:20:24 +0000
commitfbc2dd0ca546240e15682fbad7a30c9d00511eff (patch)
tree69b18c93795a925bd24f055c3df094fadf653f4d
parent33ede1dd84207c05878a31abf51e72ee88d17674 (diff)
parent01012b70dadc9f8e1bca13330caaacff04505f71 (diff)
downloadrabbitmq-server-fbc2dd0ca546240e15682fbad7a30c9d00511eff.tar.gz
Merged bug25474 and bug25486
-rw-r--r--src/rabbit_node_monitor.erl37
1 files changed, 32 insertions, 5 deletions
diff --git a/src/rabbit_node_monitor.erl b/src/rabbit_node_monitor.erl
index 98e26a6a..de53b7f0 100644
--- a/src/rabbit_node_monitor.erl
+++ b/src/rabbit_node_monitor.erl
@@ -249,7 +249,8 @@ handle_info({'DOWN', _MRef, process, {rabbit, Node}, _Reason},
write_cluster_status({AllNodes, DiscNodes, del_node(Node, RunningNodes)}),
ok = handle_dead_rabbit(Node),
[P ! {node_down, Node} || P <- pmon:monitored(Subscribers)],
- {noreply, State#state{monitors = pmon:erase({rabbit, Node}, Monitors)}};
+ {noreply, handle_dead_rabbit_state(
+ State#state{monitors = pmon:erase({rabbit, Node}, Monitors)})};
handle_info({'DOWN', _MRef, process, Pid, _Reason},
State = #state{subscribers = Subscribers}) ->
@@ -257,10 +258,19 @@ handle_info({'DOWN', _MRef, process, Pid, _Reason},
handle_info({mnesia_system_event,
{inconsistent_database, running_partitioned_network, Node}},
- State = #state{partitions = Partitions}) ->
+ State = #state{partitions = Partitions,
+ monitors = Monitors}) ->
+ %% We will not get a node_up from this node - yet we should treat it as
+ %% up (mostly).
+ State1 = case pmon:is_monitored({rabbit, Node}, Monitors) of
+ true -> State;
+ false -> State#state{
+ monitors = pmon:monitor({rabbit, Node}, Monitors)}
+ end,
+ ok = handle_live_rabbit(Node),
Partitions1 = ordsets:to_list(
ordsets:add_element(Node, ordsets:from_list(Partitions))),
- {noreply, State#state{partitions = Partitions1}};
+ {noreply, State1#state{partitions = Partitions1}};
handle_info(_Info, State) ->
{noreply, State}.
@@ -299,9 +309,14 @@ handle_dead_rabbit(Node) ->
ok.
majority() ->
+ length(alive_nodes()) / length(rabbit_mnesia:cluster_nodes(all)) > 0.5.
+
+%% mnesia:system_info(db_nodes) (and hence
+%% rabbit_mnesia:cluster_nodes(running)) does not give reliable results
+%% when partitioned.
+alive_nodes() ->
Nodes = rabbit_mnesia:cluster_nodes(all),
- Alive = [N || N <- Nodes, pong =:= net_adm:ping(N)],
- length(Alive) / length(Nodes) > 0.5.
+ [N || N <- Nodes, pong =:= net_adm:ping(N)].
await_cluster_recovery() ->
rabbit_log:warning("Cluster minority status detected - awaiting recovery~n",
@@ -325,6 +340,18 @@ wait_for_cluster_recovery(Nodes) ->
wait_for_cluster_recovery(Nodes)
end.
+handle_dead_rabbit_state(State = #state{partitions = Partitions}) ->
+ %% If we have been partitioned, and we are now in the only remaining
+ %% partition, we no longer care about partitions - forget them. Note
+ %% that we do not attempt to deal with individual (other) partitions
+ %% going away. It's only safe to forget anything about partitions when
+ %% there are no partitions.
+ Partitions1 = case Partitions -- (Partitions -- alive_nodes()) of
+ [] -> [];
+ _ -> Partitions
+ end,
+ State#state{partitions = Partitions1}.
+
handle_live_rabbit(Node) ->
ok = rabbit_alarm:on_node_up(Node),
ok = rabbit_mnesia:on_node_up(Node).