diff options
author | Simon MacMullen <simon@rabbitmq.com> | 2014-10-09 16:09:14 +0100 |
---|---|---|
committer | Simon MacMullen <simon@rabbitmq.com> | 2014-10-09 16:09:14 +0100 |
commit | 502e7335316508845080722b063339e9d59a2a9f (patch) | |
tree | 1343797d726a7e31a587325dc082d2396bd8e799 | |
parent | 61e786037e71bf9940bd2309582b2026fde74f57 (diff) | |
download | rabbitmq-server-502e7335316508845080722b063339e9d59a2a9f.tar.gz |
Make rabbit_misc:is_process_alive() return false for nodes we are partitioned from; prevent prequeue:init/1 from entering an infinite loop on partition.
-rw-r--r-- | src/rabbit_misc.erl | 5 | ||||
-rw-r--r-- | src/rabbit_mnesia.erl | 4 | ||||
-rw-r--r-- | src/rabbit_prequeue.erl | 7 |
3 files changed, 10 insertions, 6 deletions
diff --git a/src/rabbit_misc.erl b/src/rabbit_misc.erl index 63f229be..dd4d5c76 100644 --- a/src/rabbit_misc.erl +++ b/src/rabbit_misc.erl @@ -853,9 +853,8 @@ ntoab(IP) -> %% loop in rabbit_amqqueue:on_node_down/1 and any delays we incur %% would be bad news. is_process_alive(Pid) -> - Node = node(Pid), - lists:member(Node, [node() | nodes()]) andalso - rpc:call(Node, erlang, is_process_alive, [Pid]) =:= true. + rabbit_mnesia:on_running_node(Pid) andalso + rpc:call(node(Pid), erlang, is_process_alive, [Pid]) =:= true. pget(K, P) -> proplists:get_value(K, P). pget(K, P, D) -> proplists:get_value(K, P, D). diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl index 2bada7ad..19fd01a1 100644 --- a/src/rabbit_mnesia.erl +++ b/src/rabbit_mnesia.erl @@ -27,6 +27,7 @@ status/0, is_clustered/0, + on_running_node/1, cluster_nodes/1, node_type/0, dir/0, @@ -71,6 +72,7 @@ {'running_nodes', [node()]} | {'partitions', [{node(), [node()]}]}]). -spec(is_clustered/0 :: () -> boolean()). +-spec(on_running_node/1 :: (pid()) -> boolean()). -spec(cluster_nodes/1 :: ('all' | 'disc' | 'ram' | 'running') -> [node()]). -spec(node_type/0 :: () -> node_type()). -spec(dir/0 :: () -> file:filename()). @@ -336,6 +338,8 @@ is_running() -> mnesia:system_info(is_running) =:= yes. is_clustered() -> AllNodes = cluster_nodes(all), AllNodes =/= [] andalso AllNodes =/= [node()]. +on_running_node(Pid) -> lists:member(node(Pid), cluster_nodes(running)). + cluster_nodes(WhichNodes) -> cluster_status(WhichNodes). %% This function is the actual source of information, since it gets diff --git a/src/rabbit_prequeue.erl b/src/rabbit_prequeue.erl index b1d92b89..ba430eba 100644 --- a/src/rabbit_prequeue.erl +++ b/src/rabbit_prequeue.erl @@ -64,13 +64,14 @@ init(Q, slave) -> rabbit_mirror_queue_slave:init(Q); init(#amqqueue{name = QueueName}, restart) -> {ok, Q = #amqqueue{pid = QPid, slave_pids = SPids}} = rabbit_amqqueue:lookup(QueueName), - Local = node(QPid) =:= node(), + LocalOrMasterDown = node(QPid) =:= node() + orelse not rabbit_mnesia:on_running_node(QPid), Slaves = [SPid || SPid <- SPids, rabbit_misc:is_process_alive(SPid)], case rabbit_misc:is_process_alive(QPid) of - true -> false = Local, %% assertion + true -> false = LocalOrMasterDown, %% assertion rabbit_mirror_queue_slave:go(self(), async), rabbit_mirror_queue_slave:init(Q); %% [1] - false -> case Local andalso Slaves =:= [] of + false -> case LocalOrMasterDown andalso, Slaves =:= [] of true -> crash_restart(Q); %% [2] false -> timer:sleep(25), init(Q, restart) %% [3] |