summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrancesco Mazzoli <francesco@rabbitmq.com>2012-10-24 13:08:54 +0100
committerFrancesco Mazzoli <francesco@rabbitmq.com>2012-10-24 13:08:54 +0100
commit971d806eab43fa569ea1bd09b9b94e0713d52a47 (patch)
treed9ddf1fe6a21afde520552c27b54c80485706660
parent46b2632199b75cf16a17fd47fd530f25352ac3e3 (diff)
parenta03476e7383bdf9e7045067cc35103b13aff611c (diff)
downloadrabbitmq-server-971d806eab43fa569ea1bd09b9b94e0713d52a47.tar.gz
merged default
-rw-r--r--src/rabbit_mnesia.erl98
1 files changed, 41 insertions, 57 deletions
diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl
index 04ac0904..131f74ac 100644
--- a/src/rabbit_mnesia.erl
+++ b/src/rabbit_mnesia.erl
@@ -221,7 +221,9 @@ change_cluster_node_type(Type) ->
{ok, Status} -> Status;
{error, _Reason} -> e(cannot_connect_to_cluster)
end,
- Node = case RunningNodes of
+ %% We might still be marked as running by a remote node since the
+ %% information of us going down might not have propagated yet.
+ Node = case RunningNodes -- [node()] of
[] -> e(no_online_cluster_nodes);
[Node0|_] -> Node0
end,
@@ -276,18 +278,18 @@ forget_cluster_node(Node, RemoveWhenOffline) ->
end.
remove_node_offline_node(Node) ->
- %% We want the running nodes *now*, so we don't call
- %% `cluster_nodes(running)' which will just get what's in the cluster status
- %% file.
- case {running_nodes(cluster_nodes(all)) -- [Node], node_type()} of
+ %% Here `mnesia:system_info(running_db_nodes)' will RPC, but that's what we
+ %% want - we need to know the running nodes *now*. If the current node is a
+ %% RAM node it will return bogus results, but we don't care since we only do
+ %% this operation from disc nodes.
+ case {mnesia:system_info(running_db_nodes) -- [Node], node_type()} of
{[], disc} ->
- %% Note that while we check if the nodes was the last to
- %% go down, apart from the node we're removing from, this
- %% is still unsafe. Consider the situation in which A and
- %% B are clustered. A goes down, and records B as the
- %% running node. Then B gets clustered with C, C goes down
- %% and B goes down. In this case, C is the second-to-last,
- %% but we don't know that and we'll remove B from A
+ %% Note that while we check if the nodes was the last to go down,
+ %% apart from the node we're removing from, this is still unsafe.
+ %% Consider the situation in which A and B are clustered. A goes
+ %% down, and records B as the running node. Then B gets clustered
+ %% with C, C goes down and B goes down. In this case, C is the
+ %% second-to-last, but we don't know that and we'll remove B from A
%% anyway, even if that will lead to bad things.
case cluster_nodes(running) -- [node(), Node] of
[] -> start_mnesia(),
@@ -336,10 +338,30 @@ is_clustered() -> AllNodes = cluster_nodes(all),
cluster_nodes(WhichNodes) -> cluster_status(WhichNodes).
+cluster_status(WhichNodes) ->
+ {AllNodes, DiscNodes, RunningNodes} = Nodes =
+ case cluster_status_from_mnesia() of
+ {ok, Nodes0} ->
+ Nodes0;
+ {error, _Reason} ->
+ {AllNodes0, DiscNodes0, RunningNodes0} =
+ rabbit_node_monitor:read_cluster_status(),
+ %% The cluster status file records the status when the node is
+ %% online, but we know for sure that the node is offline now, so
+ %% we can remove it from the list of running nodes.
+ {AllNodes0, DiscNodes0, nodes_excl_me(RunningNodes0)}
+ end,
+ case WhichNodes of
+ status -> Nodes;
+ all -> AllNodes;
+ disc -> DiscNodes;
+ running -> RunningNodes
+ end.
+
%% This function is the actual source of information, since it gets
%% the data from mnesia. Obviously it'll work only when mnesia is
%% running.
-mnesia_nodes() ->
+cluster_status_from_mnesia() ->
case mnesia:system_info(is_running) of
no ->
{error, mnesia_not_running};
@@ -359,41 +381,12 @@ mnesia_nodes() ->
disc -> nodes_incl_me(DiscCopies);
ram -> DiscCopies
end,
- {ok, {AllNodes, DiscNodes}};
+ RunningNodes = mnesia:system_info(running_db_nodes),
+ {ok, {AllNodes, DiscNodes, RunningNodes}};
false -> {error, tables_not_present}
end
end.
-cluster_status(WhichNodes) ->
- %% I don't want to call `running_nodes/1' unless if necessary, since it's
- %% pretty expensive.
- {AllNodes1, DiscNodes1, RunningNodesThunk} =
- case mnesia_nodes() of
- {ok, {AllNodes, DiscNodes}} ->
- {AllNodes, DiscNodes, fun() -> running_nodes(AllNodes) end};
- {error, _Reason} ->
- {AllNodes, DiscNodes, RunningNodes} =
- rabbit_node_monitor:read_cluster_status(),
- %% The cluster status file records the status when the node is
- %% online, but we know for sure that the node is offline now, so
- %% we can remove it from the list of running nodes.
- {AllNodes, DiscNodes, fun() -> nodes_excl_me(RunningNodes) end}
- end,
- case WhichNodes of
- status -> {AllNodes1, DiscNodes1, RunningNodesThunk()};
- all -> AllNodes1;
- disc -> DiscNodes1;
- ram -> AllNodes1 -- DiscNodes1;
- running -> RunningNodesThunk()
- end.
-
-cluster_status_from_mnesia() ->
- case mnesia_nodes() of
- {ok, {AllNodes, DiscNodes}} -> {ok, {AllNodes, DiscNodes,
- running_nodes(AllNodes)}};
- {error, _} = Err -> Err
- end.
-
node_info() ->
{erlang:system_info(otp_release), rabbit_misc:version(),
cluster_status_from_mnesia()}.
@@ -680,12 +673,10 @@ remove_node_if_mnesia_running(Node) ->
end.
leave_cluster() ->
- case nodes_excl_me(cluster_nodes(all)) of
- [] -> ok;
- AllNodes -> case lists:any(fun leave_cluster/1, AllNodes) of
- true -> ok;
- false -> e(no_running_cluster_nodes)
- end
+ AllNodes = cluster_nodes(all) -- [node()],
+ case not is_clustered() orelse lists:any(fun leave_cluster/1, AllNodes) of
+ true -> ok;
+ false -> e(no_running_cluster_nodes)
end.
leave_cluster(Node) ->
@@ -726,14 +717,7 @@ change_extra_db_nodes(ClusterNodes0, CheckOtherNodes) ->
Nodes
end.
-%% We're not using `mnesia:system_info(running_db_nodes)' directly
-%% because if the node is a RAM node it won't know about other nodes
%% when mnesia is stopped
-running_nodes(Nodes) ->
- {Replies, _BadNodes} = rpc:multicall(Nodes,
- rabbit_mnesia, is_running_remote, []),
- [Node || {Running, Node} <- Replies, Running].
-
is_running_remote() -> {mnesia:system_info(is_running) =:= yes, node()}.
check_consistency(OTP, Rabbit) ->