diff options
author | Francesco Mazzoli <francesco@rabbitmq.com> | 2012-10-24 13:08:54 +0100 |
---|---|---|
committer | Francesco Mazzoli <francesco@rabbitmq.com> | 2012-10-24 13:08:54 +0100 |
commit | 971d806eab43fa569ea1bd09b9b94e0713d52a47 (patch) | |
tree | d9ddf1fe6a21afde520552c27b54c80485706660 | |
parent | 46b2632199b75cf16a17fd47fd530f25352ac3e3 (diff) | |
parent | a03476e7383bdf9e7045067cc35103b13aff611c (diff) | |
download | rabbitmq-server-971d806eab43fa569ea1bd09b9b94e0713d52a47.tar.gz |
merged default
-rw-r--r-- | src/rabbit_mnesia.erl | 98 |
1 files changed, 41 insertions, 57 deletions
diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl index 04ac0904..131f74ac 100644 --- a/src/rabbit_mnesia.erl +++ b/src/rabbit_mnesia.erl @@ -221,7 +221,9 @@ change_cluster_node_type(Type) -> {ok, Status} -> Status; {error, _Reason} -> e(cannot_connect_to_cluster) end, - Node = case RunningNodes of + %% We might still be marked as running by a remote node since the + %% information of us going down might not have propagated yet. + Node = case RunningNodes -- [node()] of [] -> e(no_online_cluster_nodes); [Node0|_] -> Node0 end, @@ -276,18 +278,18 @@ forget_cluster_node(Node, RemoveWhenOffline) -> end. remove_node_offline_node(Node) -> - %% We want the running nodes *now*, so we don't call - %% `cluster_nodes(running)' which will just get what's in the cluster status - %% file. - case {running_nodes(cluster_nodes(all)) -- [Node], node_type()} of + %% Here `mnesia:system_info(running_db_nodes)' will RPC, but that's what we + %% want - we need to know the running nodes *now*. If the current node is a + %% RAM node it will return bogus results, but we don't care since we only do + %% this operation from disc nodes. + case {mnesia:system_info(running_db_nodes) -- [Node], node_type()} of {[], disc} -> - %% Note that while we check if the nodes was the last to - %% go down, apart from the node we're removing from, this - %% is still unsafe. Consider the situation in which A and - %% B are clustered. A goes down, and records B as the - %% running node. Then B gets clustered with C, C goes down - %% and B goes down. In this case, C is the second-to-last, - %% but we don't know that and we'll remove B from A + %% Note that while we check if the nodes was the last to go down, + %% apart from the node we're removing from, this is still unsafe. + %% Consider the situation in which A and B are clustered. A goes + %% down, and records B as the running node. Then B gets clustered + %% with C, C goes down and B goes down. In this case, C is the + %% second-to-last, but we don't know that and we'll remove B from A %% anyway, even if that will lead to bad things. case cluster_nodes(running) -- [node(), Node] of [] -> start_mnesia(), @@ -336,10 +338,30 @@ is_clustered() -> AllNodes = cluster_nodes(all), cluster_nodes(WhichNodes) -> cluster_status(WhichNodes). +cluster_status(WhichNodes) -> + {AllNodes, DiscNodes, RunningNodes} = Nodes = + case cluster_status_from_mnesia() of + {ok, Nodes0} -> + Nodes0; + {error, _Reason} -> + {AllNodes0, DiscNodes0, RunningNodes0} = + rabbit_node_monitor:read_cluster_status(), + %% The cluster status file records the status when the node is + %% online, but we know for sure that the node is offline now, so + %% we can remove it from the list of running nodes. + {AllNodes0, DiscNodes0, nodes_excl_me(RunningNodes0)} + end, + case WhichNodes of + status -> Nodes; + all -> AllNodes; + disc -> DiscNodes; + running -> RunningNodes + end. + %% This function is the actual source of information, since it gets %% the data from mnesia. Obviously it'll work only when mnesia is %% running. -mnesia_nodes() -> +cluster_status_from_mnesia() -> case mnesia:system_info(is_running) of no -> {error, mnesia_not_running}; @@ -359,41 +381,12 @@ mnesia_nodes() -> disc -> nodes_incl_me(DiscCopies); ram -> DiscCopies end, - {ok, {AllNodes, DiscNodes}}; + RunningNodes = mnesia:system_info(running_db_nodes), + {ok, {AllNodes, DiscNodes, RunningNodes}}; false -> {error, tables_not_present} end end. -cluster_status(WhichNodes) -> - %% I don't want to call `running_nodes/1' unless if necessary, since it's - %% pretty expensive. - {AllNodes1, DiscNodes1, RunningNodesThunk} = - case mnesia_nodes() of - {ok, {AllNodes, DiscNodes}} -> - {AllNodes, DiscNodes, fun() -> running_nodes(AllNodes) end}; - {error, _Reason} -> - {AllNodes, DiscNodes, RunningNodes} = - rabbit_node_monitor:read_cluster_status(), - %% The cluster status file records the status when the node is - %% online, but we know for sure that the node is offline now, so - %% we can remove it from the list of running nodes. - {AllNodes, DiscNodes, fun() -> nodes_excl_me(RunningNodes) end} - end, - case WhichNodes of - status -> {AllNodes1, DiscNodes1, RunningNodesThunk()}; - all -> AllNodes1; - disc -> DiscNodes1; - ram -> AllNodes1 -- DiscNodes1; - running -> RunningNodesThunk() - end. - -cluster_status_from_mnesia() -> - case mnesia_nodes() of - {ok, {AllNodes, DiscNodes}} -> {ok, {AllNodes, DiscNodes, - running_nodes(AllNodes)}}; - {error, _} = Err -> Err - end. - node_info() -> {erlang:system_info(otp_release), rabbit_misc:version(), cluster_status_from_mnesia()}. @@ -680,12 +673,10 @@ remove_node_if_mnesia_running(Node) -> end. leave_cluster() -> - case nodes_excl_me(cluster_nodes(all)) of - [] -> ok; - AllNodes -> case lists:any(fun leave_cluster/1, AllNodes) of - true -> ok; - false -> e(no_running_cluster_nodes) - end + AllNodes = cluster_nodes(all) -- [node()], + case not is_clustered() orelse lists:any(fun leave_cluster/1, AllNodes) of + true -> ok; + false -> e(no_running_cluster_nodes) end. leave_cluster(Node) -> @@ -726,14 +717,7 @@ change_extra_db_nodes(ClusterNodes0, CheckOtherNodes) -> Nodes end. -%% We're not using `mnesia:system_info(running_db_nodes)' directly -%% because if the node is a RAM node it won't know about other nodes %% when mnesia is stopped -running_nodes(Nodes) -> - {Replies, _BadNodes} = rpc:multicall(Nodes, - rabbit_mnesia, is_running_remote, []), - [Node || {Running, Node} <- Replies, Running]. - is_running_remote() -> {mnesia:system_info(is_running) =:= yes, node()}. check_consistency(OTP, Rabbit) -> |