From 6bc445afbc6b1f63e8c89613c115accfc5c1ec07 Mon Sep 17 00:00:00 2001 From: Simon MacMullen Date: Fri, 21 Nov 2014 13:35:22 +0000 Subject: Be a bit more careful before declaring a partial partition. --- src/rabbit_node_monitor.erl | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/src/rabbit_node_monitor.erl b/src/rabbit_node_monitor.erl index e6069387..5f453053 100644 --- a/src/rabbit_node_monitor.erl +++ b/src/rabbit_node_monitor.erl @@ -317,10 +317,29 @@ handle_cast({check_partial_partition, Node, Rep, NodeGUID, MyGUID, RepGUID}, node_guids = GUIDs}) -> case lists:member(Node, rabbit_mnesia:cluster_nodes(running)) andalso orddict:find(Node, GUIDs) =:= {ok, NodeGUID} of - true -> cast(Rep, {partial_partition, Node, node(), RepGUID}); + true -> spawn_link( %%[1] + fun () -> + case rpc:call(Node, rabbit, is_running, []) of + {badrpc, _} -> + ok; + _ -> + cast(Rep, {partial_partition, + Node, node(), RepGUID}) + end + end); false -> ok end, {noreply, State}; +%% [1] We checked that we haven't heard the node go down - but we +%% really should make sure we can actually communicate with +%% it. Otherwise there's a race where we falsely detect a partial +%% partition. +%% +%% Now of course the rpc:call/4 may take a long time to return if +%% connectivity with the node is actually interrupted - but that's OK, +%% we only really want to do something in a timely manner if +%% connectivity is OK. However, of course as always we must not block +%% the node monitor, so we do the check in a separate process. handle_cast({check_partial_partition, _Node, _Reporter, _NodeGUID, _GUID, _ReporterGUID}, State) -> -- cgit v1.2.1 From 84a87082d8e971bcfac060c173170961c8abf654 Mon Sep 17 00:00:00 2001 From: Simon MacMullen Date: Fri, 21 Nov 2014 13:36:09 +0000 Subject: The last changeset made it possible for Mnesia to not report a partition, so do so explicitly here. --- src/rabbit_node_monitor.erl | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/rabbit_node_monitor.erl b/src/rabbit_node_monitor.erl index 5f453053..f6059629 100644 --- a/src/rabbit_node_monitor.erl +++ b/src/rabbit_node_monitor.erl @@ -346,7 +346,7 @@ handle_cast({check_partial_partition, _Node, _Reporter, {noreply, State}; handle_cast({partial_partition, NotReallyDown, Proxy, MyGUID}, - State = #state{guid = MyGUID}) -> + State = #state{guid = MyGUID, partitions = Partitions}) -> FmtBase = "Partial partition detected:~n" " * We saw DOWN from ~s~n" " * We can still see ~s which can see ~s~n", @@ -364,7 +364,11 @@ handle_cast({partial_partition, NotReallyDown, Proxy, MyGUID}, FmtBase ++ "We will therefore intentionally disconnect from ~s~n", ArgsBase ++ [Proxy]), erlang:disconnect_node(Proxy), - {noreply, State} + %% In the event of explicitly disconnecting from a node, + %% sometimes Mnesia does not log that we were partitioned + %% - so note it here. + Partitions1 = lists:usort([Proxy | Partitions]), + {noreply, State#state{partitions = Partitions1}} end; handle_cast({partial_partition, _GUID, _Reporter, _Proxy}, State) -> -- cgit v1.2.1 From 7acd27bf9e73f1ac529f980715d8e0fc0a52b914 Mon Sep 17 00:00:00 2001 From: Simon MacMullen Date: Fri, 21 Nov 2014 13:36:24 +0000 Subject: Drive-by simplification. --- src/rabbit_node_monitor.erl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/rabbit_node_monitor.erl b/src/rabbit_node_monitor.erl index f6059629..052636b1 100644 --- a/src/rabbit_node_monitor.erl +++ b/src/rabbit_node_monitor.erl @@ -464,8 +464,7 @@ handle_info({mnesia_system_event, monitors = pmon:monitor({rabbit, Node}, Monitors)} end, ok = handle_live_rabbit(Node), - Partitions1 = ordsets:to_list( - ordsets:add_element(Node, ordsets:from_list(Partitions))), + Partitions1 = lists:usort([Node | Partitions]), {noreply, maybe_autoheal(State1#state{partitions = Partitions1})}; handle_info({autoheal_msg, Msg}, State = #state{autoheal = AState, -- cgit v1.2.1 From 08da35dc7460d09fdf2c3aa30f248e95ba0e210a Mon Sep 17 00:00:00 2001 From: Simon MacMullen Date: Mon, 24 Nov 2014 13:24:29 +0000 Subject: Remove out of date subclause. --- src/rabbit_node_monitor.erl | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/rabbit_node_monitor.erl b/src/rabbit_node_monitor.erl index 052636b1..74685d65 100644 --- a/src/rabbit_node_monitor.erl +++ b/src/rabbit_node_monitor.erl @@ -288,11 +288,9 @@ handle_cast(notify_node_up, State = #state{guid = GUID}) -> %% When one node gets nodedown from another, it then sends %% 'check_partial_partition' to all the nodes it still thinks are %% alive. If any of those (intermediate) nodes still see the "down" -%% node as up, they inform it that this has happened (after a short -%% delay to ensure we don't detect something that would become a full -%% partition anyway as a partial one). The "down" node (in 'ignore' or -%% 'autoheal' mode) will then disconnect from the intermediate node to -%% "upgrade" to a full partition. +%% node as up, they inform it that this has happened. The "down" node +%% (in 'ignore' or 'autoheal' mode) will then disconnect from the +%% intermediate node to "upgrade" to a full partition. %% %% In pause_minority mode it will instead immediately pause until all %% nodes come back. This is because the contract for pause_minority is -- cgit v1.2.1 From 0abc70cef355db8131bfe9e6bce47e785278007f Mon Sep 17 00:00:00 2001 From: Simon MacMullen Date: Mon, 24 Nov 2014 13:26:58 +0000 Subject: Even closer to reality. --- src/rabbit_node_monitor.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/rabbit_node_monitor.erl b/src/rabbit_node_monitor.erl index 74685d65..6f40015d 100644 --- a/src/rabbit_node_monitor.erl +++ b/src/rabbit_node_monitor.erl @@ -288,8 +288,8 @@ handle_cast(notify_node_up, State = #state{guid = GUID}) -> %% When one node gets nodedown from another, it then sends %% 'check_partial_partition' to all the nodes it still thinks are %% alive. If any of those (intermediate) nodes still see the "down" -%% node as up, they inform it that this has happened. The "down" node -%% (in 'ignore' or 'autoheal' mode) will then disconnect from the +%% node as up, they inform it that this has happened. The original +%% node (in 'ignore' or 'autoheal' mode) will then disconnect from the %% intermediate node to "upgrade" to a full partition. %% %% In pause_minority mode it will instead immediately pause until all -- cgit v1.2.1 From 381cb31142fef840c42338ebdd92f03023ebef55 Mon Sep 17 00:00:00 2001 From: Simon MacMullen Date: Mon, 24 Nov 2014 14:04:55 +0000 Subject: Cosmetic --- src/rabbit_node_monitor.erl | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/rabbit_node_monitor.erl b/src/rabbit_node_monitor.erl index 6f40015d..a4ae2a5e 100644 --- a/src/rabbit_node_monitor.erl +++ b/src/rabbit_node_monitor.erl @@ -318,11 +318,9 @@ handle_cast({check_partial_partition, Node, Rep, NodeGUID, MyGUID, RepGUID}, true -> spawn_link( %%[1] fun () -> case rpc:call(Node, rabbit, is_running, []) of - {badrpc, _} -> - ok; - _ -> - cast(Rep, {partial_partition, - Node, node(), RepGUID}) + {badrpc, _} -> ok; + _ -> cast(Rep, {partial_partition, + Node, node(), RepGUID}) end end); false -> ok -- cgit v1.2.1 From a7da5b79f137abc3941b40cf80789359357a4348 Mon Sep 17 00:00:00 2001 From: Simon MacMullen Date: Tue, 25 Nov 2014 12:33:22 +0000 Subject: Do not attempt to "fix up" the partitions field; if Mnesia does not detect the partition it will behave wrong anyway. --- src/rabbit_node_monitor.erl | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/rabbit_node_monitor.erl b/src/rabbit_node_monitor.erl index a4ae2a5e..fc82af0c 100644 --- a/src/rabbit_node_monitor.erl +++ b/src/rabbit_node_monitor.erl @@ -342,7 +342,7 @@ handle_cast({check_partial_partition, _Node, _Reporter, {noreply, State}; handle_cast({partial_partition, NotReallyDown, Proxy, MyGUID}, - State = #state{guid = MyGUID, partitions = Partitions}) -> + State = #state{guid = MyGUID}) -> FmtBase = "Partial partition detected:~n" " * We saw DOWN from ~s~n" " * We can still see ~s which can see ~s~n", @@ -360,11 +360,7 @@ handle_cast({partial_partition, NotReallyDown, Proxy, MyGUID}, FmtBase ++ "We will therefore intentionally disconnect from ~s~n", ArgsBase ++ [Proxy]), erlang:disconnect_node(Proxy), - %% In the event of explicitly disconnecting from a node, - %% sometimes Mnesia does not log that we were partitioned - %% - so note it here. - Partitions1 = lists:usort([Proxy | Partitions]), - {noreply, State#state{partitions = Partitions1}} + {noreply, State} end; handle_cast({partial_partition, _GUID, _Reporter, _Proxy}, State) -> -- cgit v1.2.1 From 3eac8ee3c4374b71450f2834fc1ff7ed7a09dcc0 Mon Sep 17 00:00:00 2001 From: Simon MacMullen Date: Tue, 25 Nov 2014 14:21:26 +0000 Subject: Mutual disconnect when partial partition detected. And don't allow a very short disconnection since Mnesia might not detect it. --- src/rabbit_node_monitor.erl | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/src/rabbit_node_monitor.erl b/src/rabbit_node_monitor.erl index fc82af0c..adc197e9 100644 --- a/src/rabbit_node_monitor.erl +++ b/src/rabbit_node_monitor.erl @@ -359,13 +359,22 @@ handle_cast({partial_partition, NotReallyDown, Proxy, MyGUID}, rabbit_log:error( FmtBase ++ "We will therefore intentionally disconnect from ~s~n", ArgsBase ++ [Proxy]), - erlang:disconnect_node(Proxy), + cast(Proxy, {partial_partition_disconnect, node()}), + disconnect(Proxy), {noreply, State} end; handle_cast({partial_partition, _GUID, _Reporter, _Proxy}, State) -> {noreply, State}; +%% Sometimes it appears the Erlang VM does not give us nodedown +%% messages reliably when another node disconnects from us. Therefore +%% we are told just before the disconnection so we can reciprocate. +handle_cast({partial_partition_disconnect, Other}, State) -> + rabbit_log:error("Partial partition disconnect from ~s~n", [Other]), + disconnect(Other), + {noreply, State}; + %% Note: when updating the status file, we can't simply write the %% mnesia information since the message can (and will) overtake the %% mnesia propagation. @@ -646,6 +655,19 @@ del_node(Node, Nodes) -> Nodes -- [Node]. cast(Node, Msg) -> gen_server:cast({?SERVER, Node}, Msg). +%% When we call this, it's because we want to force Mnesia to detect a +%% partition. But if we just disconnect_node/1 then Mnesia won't +%% detect a very short partition. So we want to force a slightly +%% longer disconnect. Unfortunately we don't have a way to blacklist +%% individual nodes; the best we can do is turn off auto-connect +%% altogether. +disconnect(Node) -> + application:set_env(kernel, dist_auto_connect, never), + erlang:disconnect_node(Node), + timer:sleep(1000), + application:unset_env(kernel, dist_auto_connect), + ok. + %%-------------------------------------------------------------------- %% mnesia:system_info(db_nodes) (and hence -- cgit v1.2.1 From 15aa5301e21e72330bfe3886593f46039a90cadb Mon Sep 17 00:00:00 2001 From: Simon MacMullen Date: Tue, 25 Nov 2014 14:21:48 +0000 Subject: Also add a tiny bit more logging, for symmetry. --- src/rabbit_node_monitor.erl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/rabbit_node_monitor.erl b/src/rabbit_node_monitor.erl index adc197e9..82a7a89b 100644 --- a/src/rabbit_node_monitor.erl +++ b/src/rabbit_node_monitor.erl @@ -453,6 +453,10 @@ handle_info({nodedown, Node, Info}, State = #state{guid = MyGUID, end, {noreply, handle_dead_node(Node, State)}; +handle_info({nodeup, Node, _Info}, State) -> + rabbit_log:info("node ~p up~n", [Node]), + {noreply, State}; + handle_info({mnesia_system_event, {inconsistent_database, running_partitioned_network, Node}}, State = #state{partitions = Partitions, -- cgit v1.2.1