summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/rabbit_node_monitor.erl36
1 files changed, 29 insertions, 7 deletions
diff --git a/src/rabbit_node_monitor.erl b/src/rabbit_node_monitor.erl
index a4ae2a5e..82a7a89b 100644
--- a/src/rabbit_node_monitor.erl
+++ b/src/rabbit_node_monitor.erl
@@ -342,7 +342,7 @@ handle_cast({check_partial_partition, _Node, _Reporter,
{noreply, State};
handle_cast({partial_partition, NotReallyDown, Proxy, MyGUID},
- State = #state{guid = MyGUID, partitions = Partitions}) ->
+ State = #state{guid = MyGUID}) ->
FmtBase = "Partial partition detected:~n"
" * We saw DOWN from ~s~n"
" * We can still see ~s which can see ~s~n",
@@ -359,17 +359,22 @@ handle_cast({partial_partition, NotReallyDown, Proxy, MyGUID},
rabbit_log:error(
FmtBase ++ "We will therefore intentionally disconnect from ~s~n",
ArgsBase ++ [Proxy]),
- erlang:disconnect_node(Proxy),
- %% In the event of explicitly disconnecting from a node,
- %% sometimes Mnesia does not log that we were partitioned
- %% - so note it here.
- Partitions1 = lists:usort([Proxy | Partitions]),
- {noreply, State#state{partitions = Partitions1}}
+ cast(Proxy, {partial_partition_disconnect, node()}),
+ disconnect(Proxy),
+ {noreply, State}
end;
handle_cast({partial_partition, _GUID, _Reporter, _Proxy}, State) ->
{noreply, State};
+%% Sometimes it appears the Erlang VM does not give us nodedown
+%% messages reliably when another node disconnects from us. Therefore
+%% we are told just before the disconnection so we can reciprocate.
+handle_cast({partial_partition_disconnect, Other}, State) ->
+ rabbit_log:error("Partial partition disconnect from ~s~n", [Other]),
+ disconnect(Other),
+ {noreply, State};
+
%% Note: when updating the status file, we can't simply write the
%% mnesia information since the message can (and will) overtake the
%% mnesia propagation.
@@ -448,6 +453,10 @@ handle_info({nodedown, Node, Info}, State = #state{guid = MyGUID,
end,
{noreply, handle_dead_node(Node, State)};
+handle_info({nodeup, Node, _Info}, State) ->
+ rabbit_log:info("node ~p up~n", [Node]),
+ {noreply, State};
+
handle_info({mnesia_system_event,
{inconsistent_database, running_partitioned_network, Node}},
State = #state{partitions = Partitions,
@@ -650,6 +659,19 @@ del_node(Node, Nodes) -> Nodes -- [Node].
cast(Node, Msg) -> gen_server:cast({?SERVER, Node}, Msg).
+%% When we call this, it's because we want to force Mnesia to detect a
+%% partition. But if we just disconnect_node/1 then Mnesia won't
+%% detect a very short partition. So we want to force a slightly
+%% longer disconnect. Unfortunately we don't have a way to blacklist
+%% individual nodes; the best we can do is turn off auto-connect
+%% altogether.
+disconnect(Node) ->
+ application:set_env(kernel, dist_auto_connect, never),
+ erlang:disconnect_node(Node),
+ timer:sleep(1000),
+ application:unset_env(kernel, dist_auto_connect),
+ ok.
+
%%--------------------------------------------------------------------
%% mnesia:system_info(db_nodes) (and hence