diff options
author | Simon MacMullen <simon@rabbitmq.com> | 2013-02-27 14:43:44 +0000 |
---|---|---|
committer | Simon MacMullen <simon@rabbitmq.com> | 2013-02-27 14:43:44 +0000 |
commit | 9142ce00fec31cb22cb0351a4b4a257cdffbdfc9 (patch) | |
tree | 86dfd176e27dcb35a169ee3f7e1da366cb0e2833 | |
parent | fad1b961ea4d53670381caa4b701add21ee406b4 (diff) | |
download | rabbitmq-server-9142ce00fec31cb22cb0351a4b4a257cdffbdfc9.tar.gz |
Base the whole thing off net_adm:ping/1 - because we might see other nodes come back but also be waiting (in the no-majority case, and RAM nodes). Better to detect they exist and come back than to stay stuck because they don't happen to be running Mnesia.
-rw-r--r-- | src/rabbit_mnesia.erl | 6 | ||||
-rw-r--r-- | src/rabbit_node_monitor.erl | 15 |
2 files changed, 10 insertions, 11 deletions
diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl index ecb03f54..c39e898c 100644 --- a/src/rabbit_mnesia.erl +++ b/src/rabbit_mnesia.erl @@ -26,7 +26,6 @@ status/0, is_clustered/0, - majority/0, cluster_nodes/1, node_type/0, dir/0, @@ -68,7 +67,6 @@ -spec(status/0 :: () -> [{'nodes', [{node_type(), [node()]}]} | {'running_nodes', [node()]} | {'partitions', [{node(), [node()]}]}]). --spec(majority/0 :: () -> boolean()). -spec(is_clustered/0 :: () -> boolean()). -spec(cluster_nodes/1 :: ('all' | 'disc' | 'ram' | 'running') -> [node()]). -spec(node_type/0 :: () -> node_type()). @@ -340,10 +338,6 @@ status() -> false -> [] end. -majority() -> - ensure_mnesia_running(), - (length(cluster_nodes(running)) / length(cluster_nodes(all))) > 0.5. - mnesia_partitions(Nodes) -> {Replies, _BadNodes} = rpc:multicall( Nodes, rabbit_node_monitor, partitions, []), diff --git a/src/rabbit_node_monitor.erl b/src/rabbit_node_monitor.erl index 42df6e5d..249c17a4 100644 --- a/src/rabbit_node_monitor.erl +++ b/src/rabbit_node_monitor.erl @@ -281,7 +281,7 @@ handle_dead_rabbit(Node) -> %% down - otherwise we have a race. handle_dead_according_to_mnesia_rabbit() -> case application:get_env(rabbit, cluster_cp_mode) of - {ok, true} -> case rabbit_mnesia:majority() of + {ok, true} -> case majority() of true -> ok; false -> await_cluster_recovery() end; @@ -289,6 +289,13 @@ handle_dead_according_to_mnesia_rabbit() -> end, ok. +majority() -> + Nodes = rabbit_mnesia:cluster_nodes(all), + Alive = [Status || N <- Nodes, + Status <- [net_adm:ping(N)], + Status =:= pong], + length(Alive) / length(Nodes) > 0.5. + await_cluster_recovery() -> rabbit_log:warning("Cluster minority status detected - awaiting recovery~n", []), @@ -303,11 +310,9 @@ await_cluster_recovery() -> wait_for_cluster_recovery(Nodes) -> [erlang:disconnect_node(Node) || Node <- Nodes], - mnesia:start(), - case rabbit_mnesia:majority() of + case majority() of true -> rabbit:start(); - false -> mnesia:stop(), - timer:sleep(1000), + false -> timer:sleep(1000), wait_for_cluster_recovery(Nodes) end. |