diff options
author | Michal Kuratczyk <mkuratczyk@vmware.com> | 2021-10-29 10:12:42 +0200 |
---|---|---|
committer | Michal Kuratczyk <mkuratczyk@vmware.com> | 2021-10-29 10:12:42 +0200 |
commit | fdde759a18dc3805ed8c755419bb9a47299907c1 (patch) | |
tree | 9ab8a986548303452e95a07907f4e6c5ef9fc949 | |
parent | f751cc1bae3beb3f6fda29fdd9b06ba8bfa34885 (diff) | |
download | rabbitmq-server-git-lh-kura/ad-on-stop.tar.gz |
Use erlang:system_info(creation) instead of GUIDlh-kura/ad-on-stop
GUIDs are generated and announced when rabbit app starts.
In some cases, that takes minutes (many queues/bindings).
During that time, we can get false positives and declare
pause_minority because the Erlang VM responds but the GUID
has not been updated. By using and checking erlang:system_info(creation)
we get a unique value immediately after Erlang VM restart so we
can tell whether the node was restarted.
-rw-r--r-- | deps/rabbit/src/rabbit_node_monitor.erl | 11 |
1 files changed, 7 insertions, 4 deletions
diff --git a/deps/rabbit/src/rabbit_node_monitor.erl b/deps/rabbit/src/rabbit_node_monitor.erl index 4de4e4f1ca..bc2bc64e2e 100644 --- a/deps/rabbit/src/rabbit_node_monitor.erl +++ b/deps/rabbit/src/rabbit_node_monitor.erl @@ -366,7 +366,7 @@ init([]) -> {ok, ensure_keepalive_timer(#state{monitors = Monitors, subscribers = pmon:new(), partitions = [], - guid = rabbit_guid:gen(), + guid = erlang:system_info(creation), node_guids = maps:new(), autoheal = rabbit_autoheal:init()})}. @@ -435,15 +435,18 @@ handle_cast({check_partial_partition, Node, Rep, NodeGUID, MyGUID, RepGUID}, maps:find(Node, GUIDs) =:= {ok, NodeGUID} of true -> spawn_link( %%[1] fun () -> - case rpc:call(Node, rabbit, is_running, []) of + case rpc:call(Node, erlang, system_info, [creation]) of {badrpc, _} -> ok; - _ -> + NodeGUID -> rabbit_log:warning("Received a 'DOWN' message" " from ~p but still can" " communicate with it ", [Node]), cast(Rep, {partial_partition, - Node, node(), RepGUID}) + Node, node(), RepGUID}); + _ -> + rabbit_log:warning("Looks like ~p has been restarted.", [Node]), + ok end end); false -> ok |