summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSimon MacMullen <simon@rabbitmq.com>2013-02-27 14:43:44 +0000
committerSimon MacMullen <simon@rabbitmq.com>2013-02-27 14:43:44 +0000
commit9142ce00fec31cb22cb0351a4b4a257cdffbdfc9 (patch)
tree86dfd176e27dcb35a169ee3f7e1da366cb0e2833
parentfad1b961ea4d53670381caa4b701add21ee406b4 (diff)
downloadrabbitmq-server-9142ce00fec31cb22cb0351a4b4a257cdffbdfc9.tar.gz
Base the whole thing off net_adm:ping/1 - because we might see other nodes come back but also be waiting (in the no-majority case, and RAM nodes). Better to detect they exist and come back than to stay stuck because they don't happen to be running Mnesia.
-rw-r--r--src/rabbit_mnesia.erl6
-rw-r--r--src/rabbit_node_monitor.erl15
2 files changed, 10 insertions, 11 deletions
diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl
index ecb03f54..c39e898c 100644
--- a/src/rabbit_mnesia.erl
+++ b/src/rabbit_mnesia.erl
@@ -26,7 +26,6 @@
status/0,
is_clustered/0,
- majority/0,
cluster_nodes/1,
node_type/0,
dir/0,
@@ -68,7 +67,6 @@
-spec(status/0 :: () -> [{'nodes', [{node_type(), [node()]}]} |
{'running_nodes', [node()]} |
{'partitions', [{node(), [node()]}]}]).
--spec(majority/0 :: () -> boolean()).
-spec(is_clustered/0 :: () -> boolean()).
-spec(cluster_nodes/1 :: ('all' | 'disc' | 'ram' | 'running') -> [node()]).
-spec(node_type/0 :: () -> node_type()).
@@ -340,10 +338,6 @@ status() ->
false -> []
end.
-majority() ->
- ensure_mnesia_running(),
- (length(cluster_nodes(running)) / length(cluster_nodes(all))) > 0.5.
-
mnesia_partitions(Nodes) ->
{Replies, _BadNodes} = rpc:multicall(
Nodes, rabbit_node_monitor, partitions, []),
diff --git a/src/rabbit_node_monitor.erl b/src/rabbit_node_monitor.erl
index 42df6e5d..249c17a4 100644
--- a/src/rabbit_node_monitor.erl
+++ b/src/rabbit_node_monitor.erl
@@ -281,7 +281,7 @@ handle_dead_rabbit(Node) ->
%% down - otherwise we have a race.
handle_dead_according_to_mnesia_rabbit() ->
case application:get_env(rabbit, cluster_cp_mode) of
- {ok, true} -> case rabbit_mnesia:majority() of
+ {ok, true} -> case majority() of
true -> ok;
false -> await_cluster_recovery()
end;
@@ -289,6 +289,13 @@ handle_dead_according_to_mnesia_rabbit() ->
end,
ok.
+majority() ->
+ Nodes = rabbit_mnesia:cluster_nodes(all),
+ Alive = [Status || N <- Nodes,
+ Status <- [net_adm:ping(N)],
+ Status =:= pong],
+ length(Alive) / length(Nodes) > 0.5.
+
await_cluster_recovery() ->
rabbit_log:warning("Cluster minority status detected - awaiting recovery~n",
[]),
@@ -303,11 +310,9 @@ await_cluster_recovery() ->
wait_for_cluster_recovery(Nodes) ->
[erlang:disconnect_node(Node) || Node <- Nodes],
- mnesia:start(),
- case rabbit_mnesia:majority() of
+ case majority() of
true -> rabbit:start();
- false -> mnesia:stop(),
- timer:sleep(1000),
+ false -> timer:sleep(1000),
wait_for_cluster_recovery(Nodes)
end.