diff options
-rw-r--r-- | src/rabbit_node_monitor.erl | 38 |
1 files changed, 28 insertions, 10 deletions
diff --git a/src/rabbit_node_monitor.erl b/src/rabbit_node_monitor.erl index e6069387..a4ae2a5e 100644 --- a/src/rabbit_node_monitor.erl +++ b/src/rabbit_node_monitor.erl @@ -288,11 +288,9 @@ handle_cast(notify_node_up, State = #state{guid = GUID}) -> %% When one node gets nodedown from another, it then sends %% 'check_partial_partition' to all the nodes it still thinks are %% alive. If any of those (intermediate) nodes still see the "down" -%% node as up, they inform it that this has happened (after a short -%% delay to ensure we don't detect something that would become a full -%% partition anyway as a partial one). The "down" node (in 'ignore' or -%% 'autoheal' mode) will then disconnect from the intermediate node to -%% "upgrade" to a full partition. +%% node as up, they inform it that this has happened. The original +%% node (in 'ignore' or 'autoheal' mode) will then disconnect from the +%% intermediate node to "upgrade" to a full partition. %% %% In pause_minority mode it will instead immediately pause until all %% nodes come back. This is because the contract for pause_minority is @@ -317,17 +315,34 @@ handle_cast({check_partial_partition, Node, Rep, NodeGUID, MyGUID, RepGUID}, node_guids = GUIDs}) -> case lists:member(Node, rabbit_mnesia:cluster_nodes(running)) andalso orddict:find(Node, GUIDs) =:= {ok, NodeGUID} of - true -> cast(Rep, {partial_partition, Node, node(), RepGUID}); + true -> spawn_link( %%[1] + fun () -> + case rpc:call(Node, rabbit, is_running, []) of + {badrpc, _} -> ok; + _ -> cast(Rep, {partial_partition, + Node, node(), RepGUID}) + end + end); false -> ok end, {noreply, State}; +%% [1] We checked that we haven't heard the node go down - but we +%% really should make sure we can actually communicate with +%% it. Otherwise there's a race where we falsely detect a partial +%% partition. +%% +%% Now of course the rpc:call/4 may take a long time to return if +%% connectivity with the node is actually interrupted - but that's OK, +%% we only really want to do something in a timely manner if +%% connectivity is OK. However, of course as always we must not block +%% the node monitor, so we do the check in a separate process. handle_cast({check_partial_partition, _Node, _Reporter, _NodeGUID, _GUID, _ReporterGUID}, State) -> {noreply, State}; handle_cast({partial_partition, NotReallyDown, Proxy, MyGUID}, - State = #state{guid = MyGUID}) -> + State = #state{guid = MyGUID, partitions = Partitions}) -> FmtBase = "Partial partition detected:~n" " * We saw DOWN from ~s~n" " * We can still see ~s which can see ~s~n", @@ -345,7 +360,11 @@ handle_cast({partial_partition, NotReallyDown, Proxy, MyGUID}, FmtBase ++ "We will therefore intentionally disconnect from ~s~n", ArgsBase ++ [Proxy]), erlang:disconnect_node(Proxy), - {noreply, State} + %% In the event of explicitly disconnecting from a node, + %% sometimes Mnesia does not log that we were partitioned + %% - so note it here. + Partitions1 = lists:usort([Proxy | Partitions]), + {noreply, State#state{partitions = Partitions1}} end; handle_cast({partial_partition, _GUID, _Reporter, _Proxy}, State) -> @@ -441,8 +460,7 @@ handle_info({mnesia_system_event, monitors = pmon:monitor({rabbit, Node}, Monitors)} end, ok = handle_live_rabbit(Node), - Partitions1 = ordsets:to_list( - ordsets:add_element(Node, ordsets:from_list(Partitions))), + Partitions1 = lists:usort([Node | Partitions]), {noreply, maybe_autoheal(State1#state{partitions = Partitions1})}; handle_info({autoheal_msg, Msg}, State = #state{autoheal = AState, |