From 6276d667177d915aff5c5b86e895a9b38b7ea2ae Mon Sep 17 00:00:00 2001 From: Jean-Sebastien Pedron Date: Thu, 27 Nov 2014 12:13:09 +0100 Subject: Check cluster consistency before joining a cluster Before this change, the node would join the cluster and the consistency would be only checked during application restart (rabbitmqctl start_app). This could lead to crash during the join due to Erlang, Mnesia or RabbitMQ incompatibilities. Now, the join_cluster command reports the problem. Here's an example: $ rabbitmqctl join_cluster Clustering node with ... Error: {inconsistent_cluster, "OTP version mismatch: local node is R16B03-1, remote node 17.3"} --- src/rabbit_mnesia.erl | 42 +++++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl index fa51dd70..b55d53b6 100644 --- a/src/rabbit_mnesia.erl +++ b/src/rabbit_mnesia.erl @@ -174,18 +174,25 @@ join_cluster(DiscoveryNode, NodeType) -> {ClusterNodes, _, _} = discover_cluster([DiscoveryNode]), case me_in_nodes(ClusterNodes) of false -> - %% reset the node. this simplifies things and it will be needed in - %% this case - we're joining a new cluster with new nodes which - %% are not in synch with the current node. I also lifts the burden - %% of reseting the node from the user. - reset_gracefully(), - - %% Join the cluster - rabbit_log:info("Clustering with ~p as ~p node~n", - [ClusterNodes, NodeType]), - ok = init_db_with_mnesia(ClusterNodes, NodeType, true, true), - rabbit_node_monitor:notify_joined_cluster(), - ok; + case check_cluster_consistency(DiscoveryNode, false) of + {ok, _} -> + %% reset the node. this simplifies things and it + %% will be needed in this case - we're joining a new + %% cluster with new nodes which are not in synch + %% with the current node. It also lifts the burden + %% of resetting the node from the user. + reset_gracefully(), + + %% Join the cluster + rabbit_log:info("Clustering with ~p as ~p node~n", + [ClusterNodes, NodeType]), + ok = init_db_with_mnesia(ClusterNodes, NodeType, + true, true), + rabbit_node_monitor:notify_joined_cluster(), + ok; + {error, Reason} -> + {error, Reason} + end; true -> rabbit_log:info("Already member of cluster: ~p~n", [ClusterNodes]), {ok, already_member} @@ -545,7 +552,7 @@ maybe_force_load() -> check_cluster_consistency() -> %% We want to find 0 or 1 consistent nodes. case lists:foldl( - fun (Node, {error, _}) -> check_cluster_consistency(Node); + fun (Node, {error, _}) -> check_cluster_consistency(Node, true); (_Node, {ok, Status}) -> {ok, Status} end, {error, not_found}, nodes_excl_me(cluster_nodes(all))) of @@ -575,17 +582,22 @@ check_cluster_consistency() -> throw(E) end. -check_cluster_consistency(Node) -> +check_cluster_consistency(Node, AlreadyFormed) -> case rpc:call(Node, rabbit_mnesia, node_info, []) of {badrpc, _Reason} -> {error, not_found}; {_OTP, _Rabbit, {error, _}} -> {error, not_found}; - {OTP, Rabbit, {ok, Status}} -> + {OTP, Rabbit, {ok, Status}} when AlreadyFormed -> case check_consistency(OTP, Rabbit, Node, Status) of {error, _} = E -> E; {ok, Res} -> {ok, Res} end; + {OTP, Rabbit, {ok, Status}} -> + case check_consistency(OTP, Rabbit) of + {error, _} = E -> E; + ok -> {ok, Status} + end; {_OTP, Rabbit, _Hash, _Status} -> %% delegate hash checking implies version mismatch version_error("Rabbit", rabbit_misc:version(), Rabbit) -- cgit v1.2.1 From f6cfb1a6195aca0af66652b918160dcce02ce34b Mon Sep 17 00:00:00 2001 From: Simon MacMullen Date: Fri, 28 Nov 2014 10:41:59 +0000 Subject: Rename this to indicate what it does, not the current circumstances in which it is called. --- src/rabbit_mnesia.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl index b55d53b6..80fbcd68 100644 --- a/src/rabbit_mnesia.erl +++ b/src/rabbit_mnesia.erl @@ -582,13 +582,13 @@ check_cluster_consistency() -> throw(E) end. -check_cluster_consistency(Node, AlreadyFormed) -> +check_cluster_consistency(Node, CheckNodesConsistency) -> case rpc:call(Node, rabbit_mnesia, node_info, []) of {badrpc, _Reason} -> {error, not_found}; {_OTP, _Rabbit, {error, _}} -> {error, not_found}; - {OTP, Rabbit, {ok, Status}} when AlreadyFormed -> + {OTP, Rabbit, {ok, Status}} when CheckNodesConsistency -> case check_consistency(OTP, Rabbit, Node, Status) of {error, _} = E -> E; {ok, Res} -> {ok, Res} -- cgit v1.2.1