From 536bf6cb36806f9163507d96b0e669bc7fdf9d44 Mon Sep 17 00:00:00 2001 From: Emile Joubert Date: Thu, 22 Aug 2013 15:44:39 +0100 Subject: Solve startup conflict between nodes by restarting slave runtime --- src/rabbit_mirror_queue_slave.erl | 47 ++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 18 deletions(-) (limited to 'src') diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index 54ee7129..ec6e500f 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -137,28 +137,39 @@ init(Q = #amqqueue { name = QName }) -> {stop, {duplicate_live_master, Node}}; existing -> gm:leave(GM), + ignore; + master_recovery_detected -> + init:restart(), ignore end. init_it(Self, GM, Node, QName) -> - [Q = #amqqueue { pid = QPid, slave_pids = SPids, gm_pids = GMPids }] = - mnesia:read({rabbit_queue, QName}), - case [Pid || Pid <- [QPid | SPids], node(Pid) =:= Node] of - [] -> add_slave(Q, Self, GM), - {new, QPid, GMPids}; - [QPid] -> case rabbit_misc:is_process_alive(QPid) of - true -> duplicate_live_master; - false -> {stale, QPid} - end; - [SPid] -> case rabbit_misc:is_process_alive(SPid) of - true -> existing; - false -> Q1 = Q#amqqueue { - slave_pids = SPids -- [SPid], - gm_pids = [T || T = {_, S} <- GMPids, - S =/= SPid] }, - add_slave(Q1, Self, GM), - {new, QPid, GMPids} - end + case mnesia:read({rabbit_queue, QName}) of + [Q = #amqqueue { pid = QPid, slave_pids = SPids, gm_pids = GMPids }] -> + case [Pid || Pid <- [QPid | SPids], node(Pid) =:= Node] of + [] -> + add_slave(Q, Self, GM), + {new, QPid, GMPids}; + [QPid] -> + case rabbit_misc:is_process_alive(QPid) of + true -> duplicate_live_master; + false -> {stale, QPid} + end; + [SPid] -> + case rabbit_misc:is_process_alive(SPid) of + true -> + existing; + false -> + Q1 = Q#amqqueue { + slave_pids = SPids -- [SPid], + gm_pids = [T || T = {_, S} <- GMPids, + S =/= SPid] }, + add_slave(Q1, Self, GM), + {new, QPid, GMPids} + end + end; + [] -> + master_recovery_detected end. %% Add to the end, so they are in descending order of age, see -- cgit v1.2.1 From 348be40e2718bbccb1ec98dfb1b9598ca34af784 Mon Sep 17 00:00:00 2001 From: Emile Joubert Date: Fri, 23 Aug 2013 11:06:53 +0100 Subject: Log reason for restart --- src/rabbit_mirror_queue_slave.erl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index ec6e500f..953abcd2 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -80,7 +80,7 @@ set_maximum_since_use(QPid, Age) -> info(QPid) -> gen_server2:call(QPid, info, infinity). -init(Q = #amqqueue { name = QName }) -> +init(Q = #amqqueue { name = QName, pid = MPid }) -> %% We join the GM group before we add ourselves to the amqqueue %% record. As a result: %% 1. We can receive msgs from GM that correspond to messages we will @@ -139,6 +139,8 @@ init(Q = #amqqueue { name = QName }) -> gm:leave(GM), ignore; master_recovery_detected -> + rabbit_log:info("Restarting to prevent conflict with ~p on ~p~n", + [node(MPid), rabbit_misc:rs(QName)]), init:restart(), ignore end. -- cgit v1.2.1 From c7915f20bf0ef022d4fbb749de40cc7177481693 Mon Sep 17 00:00:00 2001 From: Simon MacMullen Date: Mon, 16 Sep 2013 16:39:54 +0100 Subject: Don't restart the node, and add a comment explaining what's going on. --- src/rabbit_mirror_queue_slave.erl | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index 953abcd2..c92bc1f2 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -80,7 +80,7 @@ set_maximum_since_use(QPid, Age) -> info(QPid) -> gen_server2:call(QPid, info, infinity). -init(Q = #amqqueue { name = QName, pid = MPid }) -> +init(Q = #amqqueue { name = QName }) -> %% We join the GM group before we add ourselves to the amqqueue %% record. As a result: %% 1. We can receive msgs from GM that correspond to messages we will @@ -138,10 +138,11 @@ init(Q = #amqqueue { name = QName, pid = MPid }) -> existing -> gm:leave(GM), ignore; - master_recovery_detected -> - rabbit_log:info("Restarting to prevent conflict with ~p on ~p~n", - [node(MPid), rabbit_misc:rs(QName)]), - init:restart(), + master_in_recovery -> + %% The queue record vanished - we must have a master starting + %% concurrently with us. In that case we can safely decide to do + %% nothing here, and the master will start us in + %% master:init_with_existing_bq/3 ignore end. @@ -171,7 +172,7 @@ init_it(Self, GM, Node, QName) -> end end; [] -> - master_recovery_detected + master_in_recovery end. %% Add to the end, so they are in descending order of age, see -- cgit v1.2.1 From 3d9ed7cd491a087fa63048111169d10b81576843 Mon Sep 17 00:00:00 2001 From: Simon MacMullen Date: Mon, 16 Sep 2013 16:40:11 +0100 Subject: Cosmetic to be closer to what's on default. --- src/rabbit_mirror_queue_slave.erl | 36 ++++++++++++++++-------------------- 1 file changed, 16 insertions(+), 20 deletions(-) (limited to 'src') diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index c92bc1f2..ec57fdc7 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -150,26 +150,22 @@ init_it(Self, GM, Node, QName) -> case mnesia:read({rabbit_queue, QName}) of [Q = #amqqueue { pid = QPid, slave_pids = SPids, gm_pids = GMPids }] -> case [Pid || Pid <- [QPid | SPids], node(Pid) =:= Node] of - [] -> - add_slave(Q, Self, GM), - {new, QPid, GMPids}; - [QPid] -> - case rabbit_misc:is_process_alive(QPid) of - true -> duplicate_live_master; - false -> {stale, QPid} - end; - [SPid] -> - case rabbit_misc:is_process_alive(SPid) of - true -> - existing; - false -> - Q1 = Q#amqqueue { - slave_pids = SPids -- [SPid], - gm_pids = [T || T = {_, S} <- GMPids, - S =/= SPid] }, - add_slave(Q1, Self, GM), - {new, QPid, GMPids} - end + [] -> add_slave(Q, Self, GM), + {new, QPid, GMPids}; + [QPid] -> case rabbit_misc:is_process_alive(QPid) of + true -> duplicate_live_master; + false -> {stale, QPid} + end; + [SPid] -> case rabbit_misc:is_process_alive(SPid) of + true -> existing; + false -> GMPids = [T || T = {_, S} <- GMPids, + S =/= SPid], + Q1 = Q#amqqueue{ + slave_pids = SPids -- [SPid], + gm_pids = GMPids}, + add_slave(Q1, Self, GM), + {new, QPid, GMPids} + end end; [] -> master_in_recovery -- cgit v1.2.1 From 6c338f1ba2f59e77b72105fe173d36a16f2292dd Mon Sep 17 00:00:00 2001 From: Simon MacMullen Date: Mon, 23 Sep 2013 14:40:03 +0100 Subject: node_type() should check the cluster status file rather than ask Mnesia - in the early stages of RAM node startup Mnesia might tell us we are a standalone disc node. --- src/rabbit_mnesia.erl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl index 5fa29b7e..71d9a47e 100644 --- a/src/rabbit_mnesia.erl +++ b/src/rabbit_mnesia.erl @@ -399,7 +399,8 @@ node_info() -> delegate_beam_hash(), cluster_status_from_mnesia()}. node_type() -> - DiscNodes = cluster_nodes(disc), + {_AllNodes, DiscNodes, _RunningNodes} = + rabbit_node_monitor:read_cluster_status(), case DiscNodes =:= [] orelse me_in_nodes(DiscNodes) of true -> disc; false -> ram -- cgit v1.2.1 From a4bc9613298645b2688ad044aa923f4192bea263 Mon Sep 17 00:00:00 2001 From: Simon MacMullen Date: Mon, 23 Sep 2013 14:40:56 +0100 Subject: Don't go through the legacy code path when not dealing with a legacy file; don't arbitrarily rewrite the disc nodes field of this file at startup. --- src/rabbit_node_monitor.erl | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/rabbit_node_monitor.erl b/src/rabbit_node_monitor.erl index 57dce7cd..0851cdbe 100644 --- a/src/rabbit_node_monitor.erl +++ b/src/rabbit_node_monitor.erl @@ -104,23 +104,18 @@ prepare_cluster_status_files() -> %% The running nodes file might contain a set or a list, in case %% of the legacy file RunningNodes2 = lists:usort(ThisNode ++ RunningNodes1), - {AllNodes1, WantDiscNode} = + {AllNodes1, DiscNodes} = case try_read_file(cluster_status_filename()) of {ok, [{AllNodes, DiscNodes0}]} -> - {AllNodes, lists:member(node(), DiscNodes0)}; + {AllNodes, DiscNodes0}; {ok, [AllNodes0]} when is_list(AllNodes0) -> - {legacy_cluster_nodes(AllNodes0), - legacy_should_be_disc_node(AllNodes0)}; + {legacy_cluster_nodes(AllNodes0), legacy_disc_nodes(AllNodes0)}; {ok, Files} -> Corrupt(Files); {error, enoent} -> {legacy_cluster_nodes([]), true} end, AllNodes2 = lists:usort(AllNodes1 ++ RunningNodes2), - DiscNodes = case WantDiscNode of - true -> ThisNode; - false -> [] - end, ok = write_cluster_status({AllNodes2, DiscNodes, RunningNodes2}). write_cluster_status({All, Disc, Running}) -> @@ -441,8 +436,11 @@ legacy_cluster_nodes(Nodes) -> %% list otherwise) lists:usort(Nodes ++ mnesia:system_info(db_nodes)). -legacy_should_be_disc_node(DiscNodes) -> - DiscNodes == [] orelse lists:member(node(), DiscNodes). +legacy_disc_nodes(AllNodes) -> + case AllNodes == [] orelse lists:member(node(), AllNodes) of + true -> [node()]; + false -> [] + end. add_node(Node, Nodes) -> lists:usort([Node | Nodes]). -- cgit v1.2.1 From d3d10cd24da4a3a0c063cfd99bca6ddfdf38608f Mon Sep 17 00:00:00 2001 From: Simon MacMullen Date: Wed, 25 Sep 2013 16:51:30 +0100 Subject: Fix stupidity --- src/rabbit_node_monitor.erl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/rabbit_node_monitor.erl b/src/rabbit_node_monitor.erl index 0851cdbe..10e68198 100644 --- a/src/rabbit_node_monitor.erl +++ b/src/rabbit_node_monitor.erl @@ -113,7 +113,8 @@ prepare_cluster_status_files() -> {ok, Files} -> Corrupt(Files); {error, enoent} -> - {legacy_cluster_nodes([]), true} + LegacyNodes = legacy_cluster_nodes([]), + {LegacyNodes, LegacyNodes} end, AllNodes2 = lists:usort(AllNodes1 ++ RunningNodes2), ok = write_cluster_status({AllNodes2, DiscNodes, RunningNodes2}). -- cgit v1.2.1 From bb5ef5c737a10a2661eb4eea3d3ecff7efa681df Mon Sep 17 00:00:00 2001 From: Matthias Radestock Date: Fri, 27 Sep 2013 14:50:14 +0100 Subject: cosmetic(ish): remove superfluous module prefix --- src/rabbit_mnesia.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl index 5fa29b7e..d058ac36 100644 --- a/src/rabbit_mnesia.erl +++ b/src/rabbit_mnesia.erl @@ -518,7 +518,7 @@ copy_db(Destination) -> rabbit_file:recursive_copy(dir(), Destination). force_load_filename() -> - filename:join(rabbit_mnesia:dir(), "force_load"). + filename:join(dir(), "force_load"). force_load_next_boot() -> rabbit_file:write_file(force_load_filename(), <<"">>). -- cgit v1.2.1