summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRob Harrop <rob@rabbitmq.com>2011-06-23 09:16:40 +0100
committerRob Harrop <rob@rabbitmq.com>2011-06-23 09:16:40 +0100
commitc2698a744b47e24b8ff7fb19b8d574e73792b370 (patch)
tree91306fcebdbe8ebfbf8fb65a72c83855479c09b1
parent20ac2a0c3aef49cadfb48535e8a176ff68f913b4 (diff)
parenteb5db5fde98de1d996ac8fd09f8251c22aafd1d2 (diff)
downloadrabbitmq-server-c2698a744b47e24b8ff7fb19b8d574e73792b370.tar.gz
Merged bug23554 into default
-rw-r--r--include/rabbit.hrl2
-rw-r--r--src/rabbit_amqqueue.erl106
-rw-r--r--src/rabbit_amqqueue_process.erl51
-rw-r--r--src/rabbit_amqqueue_sup.erl6
-rw-r--r--src/rabbit_mirror_queue_coordinator.erl424
-rw-r--r--src/rabbit_mirror_queue_master.erl380
-rw-r--r--src/rabbit_mirror_queue_misc.erl135
-rw-r--r--src/rabbit_mirror_queue_slave.erl873
-rw-r--r--src/rabbit_mirror_queue_slave_sup.erl60
-rw-r--r--src/rabbit_mnesia.erl3
-rw-r--r--src/rabbit_router.erl6
-rw-r--r--src/rabbit_types.erl4
-rw-r--r--src/rabbit_upgrade_functions.erl22
13 files changed, 2034 insertions, 38 deletions
diff --git a/include/rabbit.hrl b/include/rabbit.hrl
index b65f048d..00b7e6e9 100644
--- a/include/rabbit.hrl
+++ b/include/rabbit.hrl
@@ -46,7 +46,7 @@
-record(exchange_serial, {name, next}).
-record(amqqueue, {name, durable, auto_delete, exclusive_owner = none,
- arguments, pid}).
+ arguments, pid, slave_pids, mirror_nodes}).
%% mnesia doesn't like unary records, so we add a dummy 'value' field
-record(route, {binding, value = const}).
diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index c8703740..bacb1d21 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -27,6 +27,8 @@
-export([notify_sent/2, unblock/2, flush_all/2]).
-export([commit_all/3, rollback_all/3, notify_down_all/2, limit_all/3]).
-export([on_node_down/1]).
+-export([store_queue/1]).
+
%% internal
-export([internal_declare/2, internal_delete/1,
@@ -191,18 +193,21 @@ find_durable_queues() ->
end).
recover_durable_queues(DurableQueues) ->
- Qs = [start_queue_process(Q) || Q <- DurableQueues],
+ Qs = [start_queue_process(node(), Q) || Q <- DurableQueues],
[QName || Q = #amqqueue{name = QName, pid = Pid} <- Qs,
gen_server2:call(Pid, {init, true}, infinity) == {new, Q}].
declare(QueueName, Durable, AutoDelete, Args, Owner) ->
ok = check_declare_arguments(QueueName, Args),
- Q = start_queue_process(#amqqueue{name = QueueName,
- durable = Durable,
- auto_delete = AutoDelete,
- arguments = Args,
- exclusive_owner = Owner,
- pid = none}),
+ {Node, MNodes} = determine_queue_nodes(Args),
+ Q = start_queue_process(Node, #amqqueue{name = QueueName,
+ durable = Durable,
+ auto_delete = AutoDelete,
+ arguments = Args,
+ exclusive_owner = Owner,
+ pid = none,
+ slave_pids = [],
+ mirror_nodes = MNodes}),
case gen_server2:call(Q#amqqueue.pid, {init, false}, infinity) of
not_found -> rabbit_misc:not_found(QueueName);
Q1 -> Q1
@@ -240,8 +245,24 @@ store_queue(Q = #amqqueue{durable = false}) ->
ok = mnesia:write(rabbit_queue, Q, write),
ok.
-start_queue_process(Q) ->
- {ok, Pid} = rabbit_amqqueue_sup:start_child([Q]),
+determine_queue_nodes(Args) ->
+ Policy = rabbit_misc:table_lookup(Args, <<"x-ha-policy">>),
+ PolicyParams = rabbit_misc:table_lookup(Args, <<"x-ha-policy-params">>),
+ case {Policy, PolicyParams} of
+ {{_Type, <<"nodes">>}, {array, Nodes}} ->
+ case [list_to_atom(binary_to_list(Node)) ||
+ {longstr, Node} <- Nodes] of
+ [Node] -> {Node, undefined};
+ [First | Rest] -> {First, Rest}
+ end;
+ {{_Type, <<"all">>}, _} ->
+ {node(), all};
+ _ ->
+ {node(), undefined}
+ end.
+
+start_queue_process(Node, Q) ->
+ {ok, Pid} = rabbit_amqqueue_sup:start_child(Node, [Q]),
Q#amqqueue{pid = Pid}.
add_default_binding(#amqqueue{name = QueueName}) ->
@@ -257,8 +278,13 @@ lookup(Name) ->
with(Name, F, E) ->
case lookup(Name) of
- {ok, Q} -> rabbit_misc:with_exit_handler(E, fun () -> F(Q) end);
- {error, not_found} -> E()
+ {ok, Q = #amqqueue{slave_pids = []}} ->
+ rabbit_misc:with_exit_handler(E, fun () -> F(Q) end);
+ {ok, Q} ->
+ E1 = fun () -> timer:sleep(25), with(Name, F, E) end,
+ rabbit_misc:with_exit_handler(E1, fun () -> F(Q) end);
+ {error, not_found} ->
+ E()
end.
with(Name, F) ->
@@ -295,31 +321,58 @@ with_exclusive_access_or_die(Name, ReaderPid, F) ->
assert_args_equivalence(#amqqueue{name = QueueName, arguments = Args},
RequiredArgs) ->
- rabbit_misc:assert_args_equivalence(Args, RequiredArgs, QueueName,
- [<<"x-expires">>, <<"x-message-ttl">>]).
+ rabbit_misc:assert_args_equivalence(
+ Args, RequiredArgs, QueueName,
+ [<<"x-expires">>, <<"x-message-ttl">>, <<"x-ha-policy">>]).
check_declare_arguments(QueueName, Args) ->
- [case Fun(rabbit_misc:table_lookup(Args, Key)) of
+ [case Fun(rabbit_misc:table_lookup(Args, Key), Args) of
ok -> ok;
{error, Error} -> rabbit_misc:protocol_error(
precondition_failed,
"invalid arg '~s' for ~s: ~w",
[Key, rabbit_misc:rs(QueueName), Error])
end || {Key, Fun} <-
- [{<<"x-expires">>, fun check_integer_argument/1},
- {<<"x-message-ttl">>, fun check_integer_argument/1}]],
+ [{<<"x-expires">>, fun check_integer_argument/2},
+ {<<"x-message-ttl">>, fun check_integer_argument/2},
+ {<<"x-ha-policy">>, fun check_ha_policy_argument/2}]],
ok.
-check_integer_argument(undefined) ->
+check_integer_argument(undefined, _Args) ->
ok;
-check_integer_argument({Type, Val}) when Val > 0 ->
+check_integer_argument({Type, Val}, _Args) when Val > 0 ->
case lists:member(Type, ?INTEGER_ARG_TYPES) of
true -> ok;
false -> {error, {unacceptable_type, Type}}
end;
-check_integer_argument({_Type, Val}) ->
+check_integer_argument({_Type, Val}, _Args) ->
{error, {value_zero_or_less, Val}}.
+check_ha_policy_argument(undefined, _Args) ->
+ ok;
+check_ha_policy_argument({longstr, <<"all">>}, _Args) ->
+ ok;
+check_ha_policy_argument({longstr, <<"nodes">>}, Args) ->
+ case rabbit_misc:table_lookup(Args, <<"x-ha-policy-params">>) of
+ undefined ->
+ {error, {require, 'x-ha-policy-params'}};
+ {array, []} ->
+ {error, {require_non_empty_list_of_nodes_for_ha}};
+ {array, Ary} ->
+ case lists:all(fun ({longstr, _Node}) -> true;
+ (_ ) -> false
+ end, Ary) of
+ true -> ok;
+ false -> {error, {require_node_list_as_longstrs_for_ha, Ary}}
+ end;
+ {Type, _} ->
+ {error, {ha_nodes_policy_params_not_array_of_longstr, Type}}
+ end;
+check_ha_policy_argument({longstr, Policy}, _Args) ->
+ {error, {invalid_ha_policy, Policy}};
+check_ha_policy_argument({Type, _}, _Args) ->
+ {error, {unacceptable_type, Type}}.
+
list(VHostPath) ->
mnesia:dirty_match_object(
rabbit_queue,
@@ -474,7 +527,8 @@ drop_expired(QPid) ->
on_node_down(Node) ->
rabbit_misc:execute_mnesia_tx_with_tail(
fun () -> Dels = qlc:e(qlc:q([delete_queue(QueueName) ||
- #amqqueue{name = QueueName, pid = Pid}
+ #amqqueue{name = QueueName, pid = Pid,
+ slave_pids = []}
<- mnesia:table(rabbit_queue),
node(Pid) == Node])),
rabbit_binding:process_deletions(
@@ -487,11 +541,13 @@ delete_queue(QueueName) ->
rabbit_binding:remove_transient_for_destination(QueueName).
pseudo_queue(QueueName, Pid) ->
- #amqqueue{name = QueueName,
- durable = false,
- auto_delete = false,
- arguments = [],
- pid = Pid}.
+ #amqqueue{name = QueueName,
+ durable = false,
+ auto_delete = false,
+ arguments = [],
+ pid = Pid,
+ slave_pids = [],
+ mirror_nodes = undefined}.
safe_delegate_call_ok(F, Pids) ->
case delegate:invoke(Pids, fun (Pid) ->
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index 1e5ad349..c1fa048d 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -33,6 +33,8 @@
handle_info/2, handle_pre_hibernate/1, prioritise_call/3,
prioritise_cast/2, prioritise_info/2]).
+-export([init_with_backing_queue_state/7]).
+
%% Queue's state
-record(q, {q,
exclusive_consumer,
@@ -72,7 +74,8 @@
messages,
consumers,
memory,
- backing_queue_status
+ backing_queue_status,
+ slave_pids
]).
-define(CREATION_EVENT_KEYS,
@@ -81,7 +84,8 @@
durable,
auto_delete,
arguments,
- owner_pid
+ owner_pid,
+ mirror_nodes
]).
-define(INFO_KEYS, ?CREATION_EVENT_KEYS ++ ?STATISTICS_KEYS -- [pid]).
@@ -114,6 +118,34 @@ init(Q) ->
msg_id_to_channel = dict:new()}, hibernate,
{backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
+init_with_backing_queue_state(Q = #amqqueue{exclusive_owner = Owner}, BQ, BQS,
+ RateTRef, AckTags, Deliveries, MTC) ->
+ ?LOGDEBUG("Queue starting - ~p~n", [Q]),
+ case Owner of
+ none -> ok;
+ _ -> erlang:monitor(process, Owner)
+ end,
+ State = requeue_and_run(
+ AckTags,
+ process_args(
+ #q{q = Q,
+ exclusive_consumer = none,
+ has_had_consumers = false,
+ backing_queue = BQ,
+ backing_queue_state = BQS,
+ active_consumers = queue:new(),
+ blocked_consumers = queue:new(),
+ expires = undefined,
+ sync_timer_ref = undefined,
+ rate_timer_ref = RateTRef,
+ expiry_timer_ref = undefined,
+ ttl = undefined,
+ stats_timer = rabbit_event:init_stats_timer(),
+ msg_id_to_channel = MTC})),
+ lists:foldl(
+ fun (Delivery, StateN) -> deliver_or_enqueue(Delivery, StateN) end,
+ State, Deliveries).
+
terminate(shutdown = R, State = #q{backing_queue = BQ}) ->
terminate_shutdown(fun (BQS) -> BQ:terminate(R, BQS) end, State);
terminate({shutdown, _} = R, State = #q{backing_queue = BQ}) ->
@@ -225,9 +257,12 @@ next_state(State = #q{backing_queue = BQ, backing_queue_state = BQS}) ->
timed -> {ensure_sync_timer(State1), 0 }
end.
-backing_queue_module(#amqqueue{}) ->
- {ok, BQM} = application:get_env(backing_queue_module),
- BQM.
+backing_queue_module(#amqqueue{arguments = Args}) ->
+ case rabbit_misc:table_lookup(Args, <<"x-ha-policy">>) of
+ undefined -> {ok, BQM} = application:get_env(backing_queue_module),
+ BQM;
+ _Policy -> rabbit_mirror_queue_master
+ end.
ensure_sync_timer(State = #q{sync_timer_ref = undefined}) ->
{ok, TRef} = timer:apply_after(
@@ -769,6 +804,12 @@ i(memory, _) ->
M;
i(backing_queue_status, #q{backing_queue_state = BQS, backing_queue = BQ}) ->
BQ:status(BQS);
+i(slave_pids, #q{q = #amqqueue{name = Name}}) ->
+ {ok, #amqqueue{slave_pids = SPids}} = rabbit_amqqueue:lookup(Name),
+ SPids;
+i(mirror_nodes, #q{q = #amqqueue{name = Name}}) ->
+ {ok, #amqqueue{mirror_nodes = MNodes}} = rabbit_amqqueue:lookup(Name),
+ MNodes;
i(Item, _) ->
throw({bad_argument, Item}).
diff --git a/src/rabbit_amqqueue_sup.erl b/src/rabbit_amqqueue_sup.erl
index 1344956e..2c28adce 100644
--- a/src/rabbit_amqqueue_sup.erl
+++ b/src/rabbit_amqqueue_sup.erl
@@ -18,7 +18,7 @@
-behaviour(supervisor2).
--export([start_link/0, start_child/1]).
+-export([start_link/0, start_child/2]).
-export([init/1]).
@@ -29,8 +29,8 @@
start_link() ->
supervisor2:start_link({local, ?SERVER}, ?MODULE, []).
-start_child(Args) ->
- supervisor2:start_child(?SERVER, Args).
+start_child(Node, Args) ->
+ supervisor2:start_child({?SERVER, Node}, Args).
init([]) ->
{ok, {{simple_one_for_one_terminate, 10, 10},
diff --git a/src/rabbit_mirror_queue_coordinator.erl b/src/rabbit_mirror_queue_coordinator.erl
new file mode 100644
index 00000000..2727c1d0
--- /dev/null
+++ b/src/rabbit_mirror_queue_coordinator.erl
@@ -0,0 +1,424 @@
+%% The contents of this file are subject to the Mozilla Public License
+%% Version 1.1 (the "License"); you may not use this file except in
+%% compliance with the License. You may obtain a copy of the License at
+%% http://www.mozilla.org/MPL/
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%% License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% The Original Code is RabbitMQ.
+%%
+%% The Initial Developer of the Original Code is VMware, Inc.
+%% Copyright (c) 2007-2010 VMware, Inc. All rights reserved.
+%%
+
+-module(rabbit_mirror_queue_coordinator).
+
+-export([start_link/3, get_gm/1, ensure_monitoring/2]).
+
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2,
+ code_change/3]).
+
+-export([joined/2, members_changed/3, handle_msg/3]).
+
+-behaviour(gen_server2).
+-behaviour(gm).
+
+-include("rabbit.hrl").
+-include("gm_specs.hrl").
+
+-record(state, { q,
+ gm,
+ monitors,
+ death_fun
+ }).
+
+-define(ONE_SECOND, 1000).
+
+%%----------------------------------------------------------------------------
+%%
+%% Mirror Queues
+%%
+%% A queue with mirrors consists of the following:
+%%
+%% #amqqueue{ pid, mirror_pids }
+%% | |
+%% +----------+ +-------+--------------+-----------...etc...
+%% | | |
+%% V V V
+%% amqqueue_process---+ slave-----+ slave-----+ ...etc...
+%% | BQ = master----+ | | BQ = vq | | BQ = vq |
+%% | | BQ = vq | | +-+-------+ +-+-------+
+%% | +-+-------+ | | |
+%% +-++-----|---------+ | | (some details elided)
+%% || | | |
+%% || coordinator-+ | |
+%% || +-+---------+ | |
+%% || | | |
+%% || gm-+ -- -- -- -- gm-+- -- -- -- gm-+- -- --...etc...
+%% || +--+ +--+ +--+
+%% ||
+%% consumers
+%%
+%% The master is merely an implementation of bq, and thus is invoked
+%% through the normal bq interface by the amqqueue_process. The slaves
+%% meanwhile are processes in their own right (as is the
+%% coordinator). The coordinator and all slaves belong to the same gm
+%% group. Every member of a gm group receives messages sent to the gm
+%% group. Because the master is the bq of amqqueue_process, it doesn't
+%% have sole control over its mailbox, and as a result, the master
+%% itself cannot be passed messages directly (well, it could by via
+%% the amqqueue:run_backing_queue_async callback but that would induce
+%% additional unnecessary loading on the master queue process), yet it
+%% needs to react to gm events, such as the death of slaves. Thus the
+%% master creates the coordinator, and it is the coordinator that is
+%% the gm callback module and event handler for the master.
+%%
+%% Consumers are only attached to the master. Thus the master is
+%% responsible for informing all slaves when messages are fetched from
+%% the bq, when they're acked, and when they're requeued.
+%%
+%% The basic goal is to ensure that all slaves performs actions on
+%% their bqs in the same order as the master. Thus the master
+%% intercepts all events going to its bq, and suitably broadcasts
+%% these events on the gm. The slaves thus receive two streams of
+%% events: one stream is via the gm, and one stream is from channels
+%% directly. Whilst the stream via gm is guaranteed to be consistently
+%% seen by all slaves, the same is not true of the stream via
+%% channels. For example, in the event of an unexpected death of a
+%% channel during a publish, only some of the mirrors may receive that
+%% publish. As a result of this problem, the messages broadcast over
+%% the gm contain published content, and thus slaves can operate
+%% successfully on messages that they only receive via the gm. The key
+%% purpose of also sending messages directly from the channels to the
+%% slaves is that without this, in the event of the death of the
+%% master, messages could be lost until a suitable slave is promoted.
+%%
+%% However, that is not the only reason. For example, if confirms are
+%% in use, then there is no guarantee that every slave will see the
+%% delivery with the same msg_seq_no. As a result, the slaves have to
+%% wait until they've seen both the publish via gm, and the publish
+%% via the channel before they have enough information to be able to
+%% perform the publish to their own bq, and subsequently issue the
+%% confirm, if necessary. Either form of publish can arrive first, and
+%% a slave can be upgraded to the master at any point during this
+%% process. Confirms continue to be issued correctly, however.
+%%
+%% Because the slave is a full process, it impersonates parts of the
+%% amqqueue API. However, it does not need to implement all parts: for
+%% example, no ack or consumer-related message can arrive directly at
+%% a slave from a channel: it is only publishes that pass both
+%% directly to the slaves and go via gm.
+%%
+%% Slaves can be added dynamically. When this occurs, there is no
+%% attempt made to sync the current contents of the master with the
+%% new slave, thus the slave will start empty, regardless of the state
+%% of the master. Thus the slave needs to be able to detect and ignore
+%% operations which are for messages it has not received: because of
+%% the strict FIFO nature of queues in general, this is
+%% straightforward - all new publishes that the new slave receives via
+%% gm should be processed as normal, but fetches which are for
+%% messages the slave has never seen should be ignored. Similarly,
+%% acks for messages the slave never fetched should be
+%% ignored. Eventually, as the master is consumed from, the messages
+%% at the head of the queue which were there before the slave joined
+%% will disappear, and the slave will become fully synced with the
+%% state of the master. The detection of the sync-status of a slave is
+%% done entirely based on length: if the slave and the master both
+%% agree on the length of the queue after the fetch of the head of the
+%% queue, then the queues must be in sync. The only other possibility
+%% is that the slave's queue is shorter, and thus the fetch should be
+%% ignored.
+%%
+%% Because acktags are issued by the bq independently, and because
+%% there is no requirement for the master and all slaves to use the
+%% same bq, all references to msgs going over gm is by msg_id. Thus
+%% upon acking, the master must convert the acktags back to msg_ids
+%% (which happens to be what bq:ack returns), then sends the msg_ids
+%% over gm, the slaves must convert the msg_ids to acktags (a mapping
+%% the slaves themselves must maintain).
+%%
+%% When the master dies, a slave gets promoted. This will be the
+%% eldest slave, and thus the hope is that that slave is most likely
+%% to be sync'd with the master. The design of gm is that the
+%% notification of the death of the master will only appear once all
+%% messages in-flight from the master have been fully delivered to all
+%% members of the gm group. Thus at this point, the slave that gets
+%% promoted cannot broadcast different events in a different order
+%% than the master for the same msgs: there is no possibility for the
+%% same msg to be processed by the old master and the new master - if
+%% it was processed by the old master then it will have been processed
+%% by the slave before the slave was promoted, and vice versa.
+%%
+%% Upon promotion, all msgs pending acks are requeued as normal, the
+%% slave constructs state suitable for use in the master module, and
+%% then dynamically changes into an amqqueue_process with the master
+%% as the bq, and the slave's bq as the master's bq. Thus the very
+%% same process that was the slave is now a full amqqueue_process.
+%%
+%% It is important that we avoid memory leaks due to the death of
+%% senders (i.e. channels) and partial publications. A sender
+%% publishing a message may fail mid way through the publish and thus
+%% only some of the mirrors will receive the message. We need the
+%% mirrors to be able to detect this and tidy up as necessary to avoid
+%% leaks. If we just had the master monitoring all senders then we
+%% would have the possibility that a sender appears and only sends the
+%% message to a few of the slaves before dying. Those slaves would
+%% then hold on to the message, assuming they'll receive some
+%% instruction eventually from the master. Thus we have both slaves
+%% and the master monitor all senders they become aware of. But there
+%% is a race: if the slave receives a DOWN of a sender, how does it
+%% know whether or not the master is going to send it instructions
+%% regarding those messages?
+%%
+%% Whilst the master monitors senders, it can't access its mailbox
+%% directly, so it delegates monitoring to the coordinator. When the
+%% coordinator receives a DOWN message from a sender, it informs the
+%% master via a callback. This allows the master to do any tidying
+%% necessary, but more importantly allows the master to broadcast a
+%% sender_death message to all the slaves, saying the sender has
+%% died. Once the slaves receive the sender_death message, they know
+%% that they're not going to receive any more instructions from the gm
+%% regarding that sender, thus they throw away any publications from
+%% the sender pending publication instructions. However, it is
+%% possible that the coordinator receives the DOWN and communicates
+%% that to the master before the master has finished receiving and
+%% processing publishes from the sender. This turns out not to be a
+%% problem: the sender has actually died, and so will not need to
+%% receive confirms or other feedback, and should further messages be
+%% "received" from the sender, the master will ask the coordinator to
+%% set up a new monitor, and will continue to process the messages
+%% normally. Slaves may thus receive publishes via gm from previously
+%% declared "dead" senders, but again, this is fine: should the slave
+%% have just thrown out the message it had received directly from the
+%% sender (due to receiving a sender_death message via gm), it will be
+%% able to cope with the publication purely from the master via gm.
+%%
+%% When a slave receives a DOWN message for a sender, if it has not
+%% received the sender_death message from the master via gm already,
+%% then it will wait 20 seconds before broadcasting a request for
+%% confirmation from the master that the sender really has died.
+%% Should a sender have only sent a publish to slaves, this allows
+%% slaves to inform the master of the previous existence of the
+%% sender. The master will thus monitor the sender, receive the DOWN,
+%% and subsequently broadcast the sender_death message, allowing the
+%% slaves to tidy up. This process can repeat for the same sender:
+%% consider one slave receives the publication, then the DOWN, then
+%% asks for confirmation of death, then the master broadcasts the
+%% sender_death message. Only then does another slave receive the
+%% publication and thus set up its monitoring. Eventually that slave
+%% too will receive the DOWN, ask for confirmation and the master will
+%% monitor the sender again, receive another DOWN, and send out
+%% another sender_death message. Given the 20 second delay before
+%% requesting death confirmation, this is highly unlikely, but it is a
+%% possibility.
+%%
+%% When the 20 second timer expires, the slave first checks to see
+%% whether it still needs confirmation of the death before requesting
+%% it. This prevents unnecessary traffic on gm as it allows one
+%% broadcast of the sender_death message to satisfy many slaves.
+%%
+%% If we consider the promotion of a slave at this point, we have two
+%% possibilities: that of the slave that has received the DOWN and is
+%% thus waiting for confirmation from the master that the sender
+%% really is down; and that of the slave that has not received the
+%% DOWN. In the first case, in the act of promotion to master, the new
+%% master will monitor again the dead sender, and after it has
+%% finished promoting itself, it should find another DOWN waiting,
+%% which it will then broadcast. This will allow slaves to tidy up as
+%% normal. In the second case, we have the possibility that
+%% confirmation-of-sender-death request has been broadcast, but that
+%% it was broadcast before the master failed, and that the slave being
+%% promoted does not know anything about that sender, and so will not
+%% monitor it on promotion. Thus a slave that broadcasts such a
+%% request, at the point of broadcasting it, recurses, setting another
+%% 20 second timer. As before, on expiry of the timer, the slaves
+%% checks to see whether it still has not received a sender_death
+%% message for the dead sender, and if not, broadcasts a death
+%% confirmation request. Thus this ensures that even when a master
+%% dies and the new slave has no knowledge of the dead sender, it will
+%% eventually receive a death confirmation request, shall monitor the
+%% dead sender, receive the DOWN and broadcast the sender_death
+%% message.
+%%
+%% The preceding commentary deals with the possibility of slaves
+%% receiving publications from senders which the master does not, and
+%% the need to prevent memory leaks in such scenarios. The inverse is
+%% also possible: a partial publication may cause only the master to
+%% receive a publication. It will then publish the message via gm. The
+%% slaves will receive it via gm, will publish it to their BQ and will
+%% set up monitoring on the sender. They will then receive the DOWN
+%% message and the master will eventually publish the corresponding
+%% sender_death message. The slave will then be able to tidy up its
+%% state as normal.
+%%
+%% We don't support transactions on mirror queues. To do so is
+%% challenging. The underlying bq is free to add the contents of the
+%% txn to the queue proper at any point after the tx.commit comes in
+%% but before the tx.commit-ok goes out. This means that it is not
+%% safe for all mirrors to simply issue the bq:tx_commit at the same
+%% time, as the addition of the txn's contents to the queue may
+%% subsequently be inconsistently interwoven with other actions on the
+%% bq. The solution to this is, in the master, wrap the PostCommitFun
+%% and do the gm:broadcast in there: at that point, you're in the bq
+%% (well, there's actually nothing to stop that function being invoked
+%% by some other process, but let's pretend for now: you could always
+%% use run_backing_queue to ensure you really are in the queue process
+%% (the _async variant would be unsafe from an ordering pov)), the
+%% gm:broadcast is safe because you don't have to worry about races
+%% with other gm:broadcast calls (same process). Thus this signal
+%% would indicate sufficiently to all the slaves that they must insert
+%% the complete contents of the txn at precisely this point in the
+%% stream of events.
+%%
+%% However, it's quite difficult for the slaves to make that happen:
+%% they would be forced to issue the bq:tx_commit at that point, but
+%% then stall processing any further instructions from gm until they
+%% receive the notification from their bq that the tx_commit has fully
+%% completed (i.e. they need to treat what is an async system as being
+%% fully synchronous). This is not too bad (apart from the
+%% vomit-inducing notion of it all): just need a queue of instructions
+%% from the GM; but then it gets rather worse when you consider what
+%% needs to happen if the master dies at this point and the slave in
+%% the middle of this tx_commit needs to be promoted.
+%%
+%% Finally, we can't possibly hope to make transactions atomic across
+%% mirror queues, and it's not even clear that that's desirable: if a
+%% slave fails whilst there's an open transaction in progress then
+%% when the channel comes to commit the txn, it will detect the
+%% failure and destroy the channel. However, the txn will have
+%% actually committed successfully in all the other mirrors (including
+%% master). To do this bit properly would require 2PC and all the
+%% baggage that goes with that.
+%%
+%% Recovery of mirrored queues is straightforward: as nodes die, the
+%% remaining nodes record this, and eventually a situation is reached
+%% in which only one node is alive, which is the master. This is the
+%% only node which, upon recovery, will resurrect a mirrored queue:
+%% nodes which die and then rejoin as a slave will start off empty as
+%% if they have no mirrored content at all. This is not surprising: to
+%% achieve anything more sophisticated would require the master and
+%% recovering slave to be able to check to see whether they agree on
+%% the last seen state of the queue: checking length alone is not
+%% sufficient in this case.
+%%
+%% For more documentation see the comments in bug 23554.
+%%
+%%----------------------------------------------------------------------------
+
+start_link(Queue, GM, DeathFun) ->
+ gen_server2:start_link(?MODULE, [Queue, GM, DeathFun], []).
+
+get_gm(CPid) ->
+ gen_server2:call(CPid, get_gm, infinity).
+
+ensure_monitoring(CPid, Pids) ->
+ gen_server2:cast(CPid, {ensure_monitoring, Pids}).
+
+%% ---------------------------------------------------------------------------
+%% gen_server
+%% ---------------------------------------------------------------------------
+
+init([#amqqueue { name = QueueName } = Q, GM, DeathFun]) ->
+ GM1 = case GM of
+ undefined ->
+ {ok, GM2} = gm:start_link(QueueName, ?MODULE, [self()]),
+ receive {joined, GM2, _Members} ->
+ ok
+ end,
+ GM2;
+ _ ->
+ true = link(GM),
+ GM
+ end,
+ {ok, _TRef} =
+ timer:apply_interval(?ONE_SECOND, gm, broadcast, [GM1, heartbeat]),
+ {ok, #state { q = Q,
+ gm = GM1,
+ monitors = dict:new(),
+ death_fun = DeathFun },
+ hibernate,
+ {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
+
+handle_call(get_gm, _From, State = #state { gm = GM }) ->
+ reply(GM, State).
+
+handle_cast({gm_deaths, Deaths},
+ State = #state { q = #amqqueue { name = QueueName } }) ->
+ rabbit_log:info("Mirrored-queue (~s): Master ~s saw deaths of mirrors ~s~n",
+ [rabbit_misc:rs(QueueName),
+ rabbit_misc:pid_to_string(self()),
+ [[rabbit_misc:pid_to_string(Pid), $ ] || Pid <- Deaths]]),
+ case rabbit_mirror_queue_misc:remove_from_queue(QueueName, Deaths) of
+ {ok, Pid} when node(Pid) =:= node() ->
+ noreply(State);
+ {error, not_found} ->
+ {stop, normal, State}
+ end;
+
+handle_cast({ensure_monitoring, Pids},
+ State = #state { monitors = Monitors }) ->
+ Monitors1 =
+ lists:foldl(fun (Pid, MonitorsN) ->
+ case dict:is_key(Pid, MonitorsN) of
+ true -> MonitorsN;
+ false -> MRef = erlang:monitor(process, Pid),
+ dict:store(Pid, MRef, MonitorsN)
+ end
+ end, Monitors, Pids),
+ noreply(State #state { monitors = Monitors1 }).
+
+handle_info({'DOWN', _MonitorRef, process, Pid, _Reason},
+ State = #state { monitors = Monitors,
+ death_fun = Fun }) ->
+ noreply(
+ case dict:is_key(Pid, Monitors) of
+ false -> State;
+ true -> ok = Fun(Pid),
+ State #state { monitors = dict:erase(Pid, Monitors) }
+ end);
+
+handle_info(Msg, State) ->
+ {stop, {unexpected_info, Msg}, State}.
+
+terminate(_Reason, #state{}) ->
+ %% gen_server case
+ ok;
+terminate([_CPid], _Reason) ->
+ %% gm case
+ ok.
+
+code_change(_OldVsn, State, _Extra) ->
+ {ok, State}.
+
+%% ---------------------------------------------------------------------------
+%% GM
+%% ---------------------------------------------------------------------------
+
+joined([CPid], Members) ->
+ CPid ! {joined, self(), Members},
+ ok.
+
+members_changed([_CPid], _Births, []) ->
+ ok;
+members_changed([CPid], _Births, Deaths) ->
+ ok = gen_server2:cast(CPid, {gm_deaths, Deaths}).
+
+handle_msg([_CPid], _From, heartbeat) ->
+ ok;
+handle_msg([CPid], _From, {ensure_monitoring, _Pids} = Msg) ->
+ ok = gen_server2:cast(CPid, Msg);
+handle_msg([_CPid], _From, _Msg) ->
+ ok.
+
+%% ---------------------------------------------------------------------------
+%% Others
+%% ---------------------------------------------------------------------------
+
+noreply(State) ->
+ {noreply, State, hibernate}.
+
+reply(Reply, State) ->
+ {reply, Reply, State, hibernate}.
diff --git a/src/rabbit_mirror_queue_master.erl b/src/rabbit_mirror_queue_master.erl
new file mode 100644
index 00000000..463b8cfb
--- /dev/null
+++ b/src/rabbit_mirror_queue_master.erl
@@ -0,0 +1,380 @@
+%% The contents of this file are subject to the Mozilla Public License
+%% Version 1.1 (the "License"); you may not use this file except in
+%% compliance with the License. You may obtain a copy of the License at
+%% http://www.mozilla.org/MPL/
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%% License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% The Original Code is RabbitMQ.
+%%
+%% The Initial Developer of the Original Code is VMware, Inc.
+%% Copyright (c) 2007-2010 VMware, Inc. All rights reserved.
+%%
+
+-module(rabbit_mirror_queue_master).
+
+-export([init/4, terminate/2, delete_and_terminate/2,
+ purge/1, publish/4, publish_delivered/5, fetch/2, ack/2,
+ tx_publish/5, tx_ack/3, tx_rollback/2, tx_commit/4,
+ requeue/3, len/1, is_empty/1, drain_confirmed/1, dropwhile/2,
+ set_ram_duration_target/2, ram_duration/1,
+ needs_timeout/1, timeout/1, handle_pre_hibernate/1,
+ status/1, invoke/3, is_duplicate/3, discard/3]).
+
+-export([start/1, stop/0]).
+
+-export([promote_backing_queue_state/6, sender_death_fun/0]).
+
+-behaviour(rabbit_backing_queue).
+
+-include("rabbit.hrl").
+
+-record(state, { gm,
+ coordinator,
+ backing_queue,
+ backing_queue_state,
+ set_delivered,
+ seen_status,
+ confirmed,
+ ack_msg_id,
+ known_senders
+ }).
+
+%% For general documentation of HA design, see
+%% rabbit_mirror_queue_coordinator
+
+%% ---------------------------------------------------------------------------
+%% Backing queue
+%% ---------------------------------------------------------------------------
+
+start(_DurableQueues) ->
+ %% This will never get called as this module will never be
+ %% installed as the default BQ implementation.
+ exit({not_valid_for_generic_backing_queue, ?MODULE}).
+
+stop() ->
+ %% Same as start/1.
+ exit({not_valid_for_generic_backing_queue, ?MODULE}).
+
+sender_death_fun() ->
+ Self = self(),
+ fun (DeadPid) ->
+ rabbit_amqqueue:run_backing_queue_async(
+ Self, ?MODULE,
+ fun (?MODULE, State = #state { gm = GM, known_senders = KS }) ->
+ ok = gm:broadcast(GM, {sender_death, DeadPid}),
+ KS1 = sets:del_element(DeadPid, KS),
+ State #state { known_senders = KS1 }
+ end)
+ end.
+
+init(#amqqueue { name = QName, mirror_nodes = MNodes } = Q, Recover,
+ AsyncCallback, SyncCallback) ->
+ {ok, CPid} = rabbit_mirror_queue_coordinator:start_link(
+ Q, undefined, sender_death_fun()),
+ GM = rabbit_mirror_queue_coordinator:get_gm(CPid),
+ MNodes1 =
+ (case MNodes of
+ all -> rabbit_mnesia:all_clustered_nodes();
+ undefined -> [];
+ _ -> [list_to_atom(binary_to_list(Node)) || Node <- MNodes]
+ end) -- [node()],
+ [rabbit_mirror_queue_misc:add_mirror(QName, Node) || Node <- MNodes1],
+ {ok, BQ} = application:get_env(backing_queue_module),
+ BQS = BQ:init(Q, Recover, AsyncCallback, SyncCallback),
+ #state { gm = GM,
+ coordinator = CPid,
+ backing_queue = BQ,
+ backing_queue_state = BQS,
+ set_delivered = 0,
+ seen_status = dict:new(),
+ confirmed = [],
+ ack_msg_id = dict:new(),
+ known_senders = sets:new() }.
+
+promote_backing_queue_state(CPid, BQ, BQS, GM, SeenStatus, KS) ->
+ #state { gm = GM,
+ coordinator = CPid,
+ backing_queue = BQ,
+ backing_queue_state = BQS,
+ set_delivered = BQ:len(BQS),
+ seen_status = SeenStatus,
+ confirmed = [],
+ ack_msg_id = dict:new(),
+ known_senders = sets:from_list(KS) }.
+
+terminate({shutdown, dropped} = Reason,
+ State = #state { backing_queue = BQ, backing_queue_state = BQS }) ->
+ %% Backing queue termination - this node has been explicitly
+ %% dropped. Normally, non-durable queues would be tidied up on
+ %% startup, but there's a possibility that we will be added back
+ %% in without this node being restarted. Thus we must do the full
+ %% blown delete_and_terminate now, but only locally: we do not
+ %% broadcast delete_and_terminate.
+ State #state { backing_queue_state = BQ:delete_and_terminate(Reason, BQS),
+ set_delivered = 0 };
+terminate(Reason,
+ State = #state { backing_queue = BQ, backing_queue_state = BQS }) ->
+ %% Backing queue termination. The queue is going down but
+ %% shouldn't be deleted. Most likely safe shutdown of this
+ %% node. Thus just let some other slave take over.
+ State #state { backing_queue_state = BQ:terminate(Reason, BQS) }.
+
+delete_and_terminate(Reason, State = #state { gm = GM,
+ backing_queue = BQ,
+ backing_queue_state = BQS }) ->
+ ok = gm:broadcast(GM, {delete_and_terminate, Reason}),
+ State #state { backing_queue_state = BQ:delete_and_terminate(Reason, BQS),
+ set_delivered = 0 }.
+
+purge(State = #state { gm = GM,
+ backing_queue = BQ,
+ backing_queue_state = BQS }) ->
+ ok = gm:broadcast(GM, {set_length, 0}),
+ {Count, BQS1} = BQ:purge(BQS),
+ {Count, State #state { backing_queue_state = BQS1,
+ set_delivered = 0 }}.
+
+publish(Msg = #basic_message { id = MsgId }, MsgProps, ChPid,
+ State = #state { gm = GM,
+ seen_status = SS,
+ backing_queue = BQ,
+ backing_queue_state = BQS }) ->
+ false = dict:is_key(MsgId, SS), %% ASSERTION
+ ok = gm:broadcast(GM, {publish, false, ChPid, MsgProps, Msg}),
+ BQS1 = BQ:publish(Msg, MsgProps, ChPid, BQS),
+ ensure_monitoring(ChPid, State #state { backing_queue_state = BQS1 }).
+
+publish_delivered(AckRequired, Msg = #basic_message { id = MsgId }, MsgProps,
+ ChPid, State = #state { gm = GM,
+ seen_status = SS,
+ backing_queue = BQ,
+ backing_queue_state = BQS,
+ ack_msg_id = AM }) ->
+ false = dict:is_key(MsgId, SS), %% ASSERTION
+ %% Must use confirmed_broadcast here in order to guarantee that
+ %% all slaves are forced to interpret this publish_delivered at
+ %% the same point, especially if we die and a slave is promoted.
+ ok = gm:confirmed_broadcast(
+ GM, {publish, {true, AckRequired}, ChPid, MsgProps, Msg}),
+ {AckTag, BQS1} =
+ BQ:publish_delivered(AckRequired, Msg, MsgProps, ChPid, BQS),
+ AM1 = maybe_store_acktag(AckTag, MsgId, AM),
+ {AckTag,
+ ensure_monitoring(ChPid, State #state { backing_queue_state = BQS1,
+ ack_msg_id = AM1 })}.
+
+dropwhile(Fun, State = #state { gm = GM,
+ backing_queue = BQ,
+ backing_queue_state = BQS,
+ set_delivered = SetDelivered }) ->
+ Len = BQ:len(BQS),
+ BQS1 = BQ:dropwhile(Fun, BQS),
+ Dropped = Len - BQ:len(BQS1),
+ SetDelivered1 = lists:max([0, SetDelivered - Dropped]),
+ ok = gm:broadcast(GM, {set_length, BQ:len(BQS1)}),
+ State #state { backing_queue_state = BQS1,
+ set_delivered = SetDelivered1 }.
+
+drain_confirmed(State = #state { backing_queue = BQ,
+ backing_queue_state = BQS,
+ seen_status = SS,
+ confirmed = Confirmed }) ->
+ {MsgIds, BQS1} = BQ:drain_confirmed(BQS),
+ {MsgIds1, SS1} =
+ lists:foldl(
+ fun (MsgId, {MsgIdsN, SSN}) ->
+ %% We will never see 'discarded' here
+ case dict:find(MsgId, SSN) of
+ error ->
+ {[MsgId | MsgIdsN], SSN};
+ {ok, published} ->
+ %% It was published when we were a slave,
+ %% and we were promoted before we saw the
+ %% publish from the channel. We still
+ %% haven't seen the channel publish, and
+ %% consequently we need to filter out the
+ %% confirm here. We will issue the confirm
+ %% when we see the publish from the channel.
+ {MsgIdsN, dict:store(MsgId, confirmed, SSN)};
+ {ok, confirmed} ->
+ %% Well, confirms are racy by definition.
+ {[MsgId | MsgIdsN], SSN}
+ end
+ end, {[], SS}, MsgIds),
+ {Confirmed ++ MsgIds1, State #state { backing_queue_state = BQS1,
+ seen_status = SS1,
+ confirmed = [] }}.
+
+fetch(AckRequired, State = #state { gm = GM,
+ backing_queue = BQ,
+ backing_queue_state = BQS,
+ set_delivered = SetDelivered,
+ ack_msg_id = AM }) ->
+ {Result, BQS1} = BQ:fetch(AckRequired, BQS),
+ State1 = State #state { backing_queue_state = BQS1 },
+ case Result of
+ empty ->
+ {Result, State1};
+ {#basic_message { id = MsgId } = Message, IsDelivered, AckTag,
+ Remaining} ->
+ ok = gm:broadcast(GM, {fetch, AckRequired, MsgId, Remaining}),
+ IsDelivered1 = IsDelivered orelse SetDelivered > 0,
+ SetDelivered1 = lists:max([0, SetDelivered - 1]),
+ AM1 = maybe_store_acktag(AckTag, MsgId, AM),
+ {{Message, IsDelivered1, AckTag, Remaining},
+ State1 #state { set_delivered = SetDelivered1,
+ ack_msg_id = AM1 }}
+ end.
+
+ack(AckTags, State = #state { gm = GM,
+ backing_queue = BQ,
+ backing_queue_state = BQS,
+ ack_msg_id = AM }) ->
+ {MsgIds, BQS1} = BQ:ack(AckTags, BQS),
+ AM1 = lists:foldl(fun dict:erase/2, AM, AckTags),
+ case MsgIds of
+ [] -> ok;
+ _ -> ok = gm:broadcast(GM, {ack, MsgIds})
+ end,
+ {MsgIds, State #state { backing_queue_state = BQS1,
+ ack_msg_id = AM1 }}.
+
+tx_publish(_Txn, _Msg, _MsgProps, _ChPid, State) ->
+ %% We don't support txns in mirror queues
+ State.
+
+tx_ack(_Txn, _AckTags, State) ->
+ %% We don't support txns in mirror queues
+ State.
+
+tx_rollback(_Txn, State) ->
+ {[], State}.
+
+tx_commit(_Txn, PostCommitFun, _MsgPropsFun, State) ->
+ PostCommitFun(), %% Probably must run it to avoid deadlocks
+ {[], State}.
+
+requeue(AckTags, MsgPropsFun, State = #state { gm = GM,
+ backing_queue = BQ,
+ backing_queue_state = BQS }) ->
+ {MsgIds, BQS1} = BQ:requeue(AckTags, MsgPropsFun, BQS),
+ ok = gm:broadcast(GM, {requeue, MsgPropsFun, MsgIds}),
+ {MsgIds, State #state { backing_queue_state = BQS1 }}.
+
+len(#state { backing_queue = BQ, backing_queue_state = BQS }) ->
+ BQ:len(BQS).
+
+is_empty(#state { backing_queue = BQ, backing_queue_state = BQS }) ->
+ BQ:is_empty(BQS).
+
+set_ram_duration_target(Target, State = #state { backing_queue = BQ,
+ backing_queue_state = BQS }) ->
+ State #state { backing_queue_state =
+ BQ:set_ram_duration_target(Target, BQS) }.
+
+ram_duration(State = #state { backing_queue = BQ, backing_queue_state = BQS }) ->
+ {Result, BQS1} = BQ:ram_duration(BQS),
+ {Result, State #state { backing_queue_state = BQS1 }}.
+
+needs_timeout(#state { backing_queue = BQ, backing_queue_state = BQS }) ->
+ BQ:needs_timeout(BQS).
+
+timeout(State = #state { backing_queue = BQ, backing_queue_state = BQS }) ->
+ State #state { backing_queue_state = BQ:timeout(BQS) }.
+
+handle_pre_hibernate(State = #state { backing_queue = BQ,
+ backing_queue_state = BQS }) ->
+ State #state { backing_queue_state = BQ:handle_pre_hibernate(BQS) }.
+
+status(#state { backing_queue = BQ, backing_queue_state = BQS }) ->
+ BQ:status(BQS).
+
+invoke(?MODULE, Fun, State) ->
+ Fun(?MODULE, State);
+invoke(Mod, Fun, State = #state { backing_queue = BQ,
+ backing_queue_state = BQS }) ->
+ State #state { backing_queue_state = BQ:invoke(Mod, Fun, BQS) }.
+
+is_duplicate(none, Message = #basic_message { id = MsgId },
+ State = #state { seen_status = SS,
+ backing_queue = BQ,
+ backing_queue_state = BQS,
+ confirmed = Confirmed }) ->
+ %% Here, we need to deal with the possibility that we're about to
+ %% receive a message that we've already seen when we were a slave
+ %% (we received it via gm). Thus if we do receive such message now
+ %% via the channel, there may be a confirm waiting to issue for
+ %% it.
+
+ %% We will never see {published, ChPid, MsgSeqNo} here.
+ case dict:find(MsgId, SS) of
+ error ->
+ %% We permit the underlying BQ to have a peek at it, but
+ %% only if we ourselves are not filtering out the msg.
+ {Result, BQS1} = BQ:is_duplicate(none, Message, BQS),
+ {Result, State #state { backing_queue_state = BQS1 }};
+ {ok, published} ->
+ %% It already got published when we were a slave and no
+ %% confirmation is waiting. amqqueue_process will have, in
+ %% its msg_id_to_channel mapping, the entry for dealing
+ %% with the confirm when that comes back in (it's added
+ %% immediately after calling is_duplicate). The msg is
+ %% invalid. We will not see this again, nor will we be
+ %% further involved in confirming this message, so erase.
+ {published, State #state { seen_status = dict:erase(MsgId, SS) }};
+ {ok, confirmed} ->
+ %% It got published when we were a slave via gm, and
+ %% confirmed some time after that (maybe even after
+ %% promotion), but before we received the publish from the
+ %% channel, so couldn't previously know what the
+ %% msg_seq_no was (and thus confirm as a slave). So we
+ %% need to confirm now. As above, amqqueue_process will
+ %% have the entry for the msg_id_to_channel mapping added
+ %% immediately after calling is_duplicate/2.
+ {published, State #state { seen_status = dict:erase(MsgId, SS),
+ confirmed = [MsgId | Confirmed] }};
+ {ok, discarded} ->
+ %% Don't erase from SS here because discard/2 is about to
+ %% be called and we need to be able to detect this case
+ {discarded, State}
+ end;
+is_duplicate(_Txn, _Msg, State) ->
+ %% In a transaction. We don't support txns in mirror queues. But
+ %% it's probably not a duplicate...
+ {false, State}.
+
+discard(Msg = #basic_message { id = MsgId }, ChPid,
+ State = #state { gm = GM,
+ backing_queue = BQ,
+ backing_queue_state = BQS,
+ seen_status = SS }) ->
+ %% It's a massive error if we get told to discard something that's
+ %% already been published or published-and-confirmed. To do that
+ %% would require non FIFO access. Hence we should not find
+ %% 'published' or 'confirmed' in this dict:find.
+ case dict:find(MsgId, SS) of
+ error ->
+ ok = gm:broadcast(GM, {discard, ChPid, Msg}),
+ State #state { backing_queue_state = BQ:discard(Msg, ChPid, BQS),
+ seen_status = dict:erase(MsgId, SS) };
+ {ok, discarded} ->
+ State
+ end.
+
+maybe_store_acktag(undefined, _MsgId, AM) ->
+ AM;
+maybe_store_acktag(AckTag, MsgId, AM) ->
+ dict:store(AckTag, MsgId, AM).
+
+ensure_monitoring(ChPid, State = #state { coordinator = CPid,
+ known_senders = KS }) ->
+ case sets:is_element(ChPid, KS) of
+ true -> State;
+ false -> ok = rabbit_mirror_queue_coordinator:ensure_monitoring(
+ CPid, [ChPid]),
+ State #state { known_senders = sets:add_element(ChPid, KS) }
+ end.
diff --git a/src/rabbit_mirror_queue_misc.erl b/src/rabbit_mirror_queue_misc.erl
new file mode 100644
index 00000000..4761f79e
--- /dev/null
+++ b/src/rabbit_mirror_queue_misc.erl
@@ -0,0 +1,135 @@
+%% The contents of this file are subject to the Mozilla Public License
+%% Version 1.1 (the "License"); you may not use this file except in
+%% compliance with the License. You may obtain a copy of the License at
+%% http://www.mozilla.org/MPL/
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%% License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% The Original Code is RabbitMQ.
+%%
+%% The Initial Developer of the Original Code is VMware, Inc.
+%% Copyright (c) 2007-2010 VMware, Inc. All rights reserved.
+%%
+
+-module(rabbit_mirror_queue_misc).
+
+-export([remove_from_queue/2, on_node_up/0,
+ drop_mirror/2, drop_mirror/3, add_mirror/2, add_mirror/3]).
+
+-include("rabbit.hrl").
+
+%% If the dead pids include the queue pid (i.e. the master has died)
+%% then only remove that if we are about to be promoted. Otherwise we
+%% can have the situation where a slave updates the mnesia record for
+%% a queue, promoting another slave before that slave realises it has
+%% become the new master, which is bad because it could then mean the
+%% slave (now master) receives messages it's not ready for (for
+%% example, new consumers).
+remove_from_queue(QueueName, DeadPids) ->
+ DeadNodes = [node(DeadPid) || DeadPid <- DeadPids],
+ rabbit_misc:execute_mnesia_transaction(
+ fun () ->
+ %% Someone else could have deleted the queue before we
+ %% get here.
+ case mnesia:read({rabbit_queue, QueueName}) of
+ [] -> {error, not_found};
+ [Q = #amqqueue { pid = QPid,
+ slave_pids = SPids }] ->
+ [QPid1 | SPids1] =
+ [Pid || Pid <- [QPid | SPids],
+ not lists:member(node(Pid), DeadNodes)],
+ case {{QPid, SPids}, {QPid1, SPids1}} of
+ {Same, Same} ->
+ ok;
+ _ when QPid =:= QPid1 orelse node(QPid1) =:= node() ->
+ %% Either master hasn't changed, so
+ %% we're ok to update mnesia; or we have
+ %% become the master.
+ Q1 = Q #amqqueue { pid = QPid1,
+ slave_pids = SPids1 },
+ ok = rabbit_amqqueue:store_queue(Q1);
+ _ ->
+ %% Master has changed, and we're not it,
+ %% so leave alone to allow the promoted
+ %% slave to find it and make its
+ %% promotion atomic.
+ ok
+ end,
+ {ok, QPid1}
+ end
+ end).
+
+on_node_up() ->
+ Qs =
+ rabbit_misc:execute_mnesia_transaction(
+ fun () ->
+ mnesia:foldl(
+ fun (#amqqueue { mirror_nodes = undefined }, QsN) ->
+ QsN;
+ (#amqqueue { name = QName,
+ mirror_nodes = all }, QsN) ->
+ [QName | QsN];
+ (#amqqueue { name = QName,
+ mirror_nodes = MNodes }, QsN) ->
+ case lists:member(node(), MNodes) of
+ true -> [QName | QsN];
+ false -> QsN
+ end
+ end, [], rabbit_queue)
+ end),
+ [add_mirror(Q, node()) || Q <- Qs],
+ ok.
+
+drop_mirror(VHostPath, QueueName, MirrorNode) ->
+ drop_mirror(rabbit_misc:r(VHostPath, queue, QueueName), MirrorNode).
+
+drop_mirror(Queue, MirrorNode) ->
+ if_mirrored_queue(
+ Queue,
+ fun (#amqqueue { name = Name, pid = QPid, slave_pids = SPids }) ->
+ case [Pid || Pid <- [QPid | SPids], node(Pid) =:= MirrorNode] of
+ [] ->
+ {error, {queue_not_mirrored_on_node, MirrorNode}};
+ [QPid] when SPids =:= [] ->
+ {error, cannot_drop_only_mirror};
+ [Pid] ->
+ rabbit_log:info(
+ "Dropping queue mirror on node ~p for ~s~n",
+ [MirrorNode, rabbit_misc:rs(Name)]),
+ exit(Pid, {shutdown, dropped}),
+ ok
+ end
+ end).
+
+add_mirror(VHostPath, QueueName, MirrorNode) ->
+ add_mirror(rabbit_misc:r(VHostPath, queue, QueueName), MirrorNode).
+
+add_mirror(Queue, MirrorNode) ->
+ if_mirrored_queue(
+ Queue,
+ fun (#amqqueue { name = Name, pid = QPid, slave_pids = SPids } = Q) ->
+ case [Pid || Pid <- [QPid | SPids], node(Pid) =:= MirrorNode] of
+ [] -> Result = rabbit_mirror_queue_slave_sup:start_child(
+ MirrorNode, [Q]),
+ rabbit_log:info(
+ "Adding mirror of queue ~s on node ~p: ~p~n",
+ [rabbit_misc:rs(Name), MirrorNode, Result]),
+ case Result of
+ {ok, _Pid} -> ok;
+ _ -> Result
+ end;
+ [_] -> {error, {queue_already_mirrored_on_node, MirrorNode}}
+ end
+ end).
+
+if_mirrored_queue(Queue, Fun) ->
+ rabbit_amqqueue:with(
+ Queue, fun (#amqqueue { arguments = Args } = Q) ->
+ case rabbit_misc:table_lookup(Args, <<"x-ha-policy">>) of
+ undefined -> ok;
+ _ -> Fun(Q)
+ end
+ end).
diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl
new file mode 100644
index 00000000..55d61d41
--- /dev/null
+++ b/src/rabbit_mirror_queue_slave.erl
@@ -0,0 +1,873 @@
+%% The contents of this file are subject to the Mozilla Public License
+%% Version 1.1 (the "License"); you may not use this file except in
+%% compliance with the License. You may obtain a copy of the License at
+%% http://www.mozilla.org/MPL/
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%% License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% The Original Code is RabbitMQ.
+%%
+%% The Initial Developer of the Original Code is VMware, Inc.
+%% Copyright (c) 2007-2010 VMware, Inc. All rights reserved.
+%%
+
+-module(rabbit_mirror_queue_slave).
+
+%% For general documentation of HA design, see
+%% rabbit_mirror_queue_coordinator
+%%
+%% We join the GM group before we add ourselves to the amqqueue
+%% record. As a result:
+%% 1. We can receive msgs from GM that correspond to messages we will
+%% never receive from publishers.
+%% 2. When we receive a message from publishers, we must receive a
+%% message from the GM group for it.
+%% 3. However, that instruction from the GM group can arrive either
+%% before or after the actual message. We need to be able to
+%% distinguish between GM instructions arriving early, and case (1)
+%% above.
+%%
+%% All instructions from the GM group must be processed in the order
+%% in which they're received.
+
+-export([start_link/1, set_maximum_since_use/2]).
+
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2,
+ code_change/3, handle_pre_hibernate/1, prioritise_call/3,
+ prioritise_cast/2]).
+
+-export([joined/2, members_changed/3, handle_msg/3]).
+
+-behaviour(gen_server2).
+-behaviour(gm).
+
+-include("rabbit.hrl").
+-include("gm_specs.hrl").
+
+-define(SYNC_INTERVAL, 25). %% milliseconds
+-define(RAM_DURATION_UPDATE_INTERVAL, 5000).
+-define(DEATH_TIMEOUT, 20000). %% 20 seconds
+
+-record(state, { q,
+ gm,
+ master_pid,
+ backing_queue,
+ backing_queue_state,
+ sync_timer_ref,
+ rate_timer_ref,
+
+ sender_queues, %% :: Pid -> {Q {Msg, Bool}, Set MsgId}
+ msg_id_ack, %% :: MsgId -> AckTag
+ ack_num,
+
+ msg_id_status,
+ known_senders
+ }).
+
+start_link(Q) ->
+ gen_server2:start_link(?MODULE, [Q], []).
+
+set_maximum_since_use(QPid, Age) ->
+ gen_server2:cast(QPid, {set_maximum_since_use, Age}).
+
+init([#amqqueue { name = QueueName } = Q]) ->
+ process_flag(trap_exit, true), %% amqqueue_process traps exits too.
+ {ok, GM} = gm:start_link(QueueName, ?MODULE, [self()]),
+ receive {joined, GM} ->
+ ok
+ end,
+ Self = self(),
+ Node = node(),
+ {ok, MPid} =
+ rabbit_misc:execute_mnesia_transaction(
+ fun () ->
+ [Q1 = #amqqueue { pid = QPid, slave_pids = MPids }] =
+ mnesia:read({rabbit_queue, QueueName}),
+ %% ASSERTION
+ [] = [Pid || Pid <- [QPid | MPids], node(Pid) =:= Node],
+ MPids1 = MPids ++ [Self],
+ mnesia:write(rabbit_queue,
+ Q1 #amqqueue { slave_pids = MPids1 },
+ write),
+ {ok, QPid}
+ end),
+ erlang:monitor(process, MPid),
+ ok = file_handle_cache:register_callback(
+ rabbit_amqqueue, set_maximum_since_use, [self()]),
+ ok = rabbit_memory_monitor:register(
+ self(), {rabbit_amqqueue, set_ram_duration_target, [self()]}),
+ {ok, BQ} = application:get_env(backing_queue_module),
+ BQS = bq_init(BQ, Q, false),
+ {ok, #state { q = Q,
+ gm = GM,
+ master_pid = MPid,
+ backing_queue = BQ,
+ backing_queue_state = BQS,
+ rate_timer_ref = undefined,
+ sync_timer_ref = undefined,
+
+ sender_queues = dict:new(),
+ msg_id_ack = dict:new(),
+ ack_num = 0,
+
+ msg_id_status = dict:new(),
+ known_senders = dict:new()
+ }, hibernate,
+ {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
+
+handle_call({deliver_immediately, Delivery = #delivery {}}, From, State) ->
+ %% Synchronous, "immediate" delivery mode
+
+ %% It is safe to reply 'false' here even if a) we've not seen the
+ %% msg via gm, or b) the master dies before we receive the msg via
+ %% gm. In the case of (a), we will eventually receive the msg via
+ %% gm, and it's only the master's result to the channel that is
+ %% important. In the case of (b), if the master does die and we do
+ %% get promoted then at that point we have no consumers, thus
+ %% 'false' is precisely the correct answer. However, we must be
+ %% careful to _not_ enqueue the message in this case.
+
+ %% Note this is distinct from the case where we receive the msg
+ %% via gm first, then we're promoted to master, and only then do
+ %% we receive the msg from the channel.
+ gen_server2:reply(From, false), %% master may deliver it, not us
+ noreply(maybe_enqueue_message(Delivery, false, State));
+
+handle_call({deliver, Delivery = #delivery {}}, From, State) ->
+ %% Synchronous, "mandatory" delivery mode
+ gen_server2:reply(From, true), %% amqqueue throws away the result anyway
+ noreply(maybe_enqueue_message(Delivery, true, State));
+
+handle_call({gm_deaths, Deaths}, From,
+ State = #state { q = #amqqueue { name = QueueName },
+ gm = GM,
+ master_pid = MPid }) ->
+ rabbit_log:info("Mirrored-queue (~s): Slave ~s saw deaths of mirrors ~s~n",
+ [rabbit_misc:rs(QueueName),
+ rabbit_misc:pid_to_string(self()),
+ [[rabbit_misc:pid_to_string(Pid), $ ] || Pid <- Deaths]]),
+ %% The GM has told us about deaths, which means we're not going to
+ %% receive any more messages from GM
+ case rabbit_mirror_queue_misc:remove_from_queue(QueueName, Deaths) of
+ {ok, Pid} when node(Pid) =:= node(MPid) ->
+ %% master hasn't changed
+ reply(ok, State);
+ {ok, Pid} when node(Pid) =:= node() ->
+ %% we've become master
+ promote_me(From, State);
+ {ok, Pid} ->
+ %% master has changed to not us.
+ gen_server2:reply(From, ok),
+ erlang:monitor(process, Pid),
+ ok = gm:broadcast(GM, heartbeat),
+ noreply(State #state { master_pid = Pid });
+ {error, not_found} ->
+ gen_server2:reply(From, ok),
+ {stop, normal, State}
+ end;
+
+handle_call({run_backing_queue, Mod, Fun}, _From, State) ->
+ reply(ok, run_backing_queue(Mod, Fun, State));
+
+handle_call({commit, _Txn, _ChPid}, _From, State) ->
+ %% We don't support transactions in mirror queues
+ reply(ok, State).
+
+handle_cast({run_backing_queue, Mod, Fun}, State) ->
+ noreply(run_backing_queue(Mod, Fun, State));
+
+handle_cast({gm, Instruction}, State) ->
+ handle_process_result(process_instruction(Instruction, State));
+
+handle_cast({deliver, Delivery = #delivery {}}, State) ->
+ %% Asynchronous, non-"mandatory", non-"immediate" deliver mode.
+ noreply(maybe_enqueue_message(Delivery, true, State));
+
+handle_cast({set_maximum_since_use, Age}, State) ->
+ ok = file_handle_cache:set_maximum_since_use(Age),
+ noreply(State);
+
+handle_cast({set_ram_duration_target, Duration},
+ State = #state { backing_queue = BQ,
+ backing_queue_state = BQS }) ->
+ BQS1 = BQ:set_ram_duration_target(Duration, BQS),
+ noreply(State #state { backing_queue_state = BQS1 });
+
+handle_cast(update_ram_duration,
+ State = #state { backing_queue = BQ,
+ backing_queue_state = BQS }) ->
+ {RamDuration, BQS1} = BQ:ram_duration(BQS),
+ DesiredDuration =
+ rabbit_memory_monitor:report_ram_duration(self(), RamDuration),
+ BQS2 = BQ:set_ram_duration_target(DesiredDuration, BQS1),
+ noreply(State #state { rate_timer_ref = just_measured,
+ backing_queue_state = BQS2 });
+
+handle_cast(sync_timeout, State) ->
+ noreply(backing_queue_timeout(
+ State #state { sync_timer_ref = undefined }));
+
+handle_cast({rollback, _Txn, _ChPid}, State) ->
+ %% We don't support transactions in mirror queues
+ noreply(State).
+
+handle_info(timeout, State) ->
+ noreply(backing_queue_timeout(State));
+
+handle_info({'DOWN', _MonitorRef, process, MPid, _Reason},
+ State = #state { gm = GM, master_pid = MPid }) ->
+ ok = gm:broadcast(GM, {process_death, MPid}),
+ noreply(State);
+
+handle_info({'DOWN', _MonitorRef, process, ChPid, _Reason}, State) ->
+ noreply(local_sender_death(ChPid, State));
+
+handle_info({'EXIT', _Pid, Reason}, State) ->
+ {stop, Reason, State};
+
+handle_info(Msg, State) ->
+ {stop, {unexpected_info, Msg}, State}.
+
+%% If the Reason is shutdown, or {shutdown, _}, it is not the queue
+%% being deleted: it's just the node going down. Even though we're a
+%% slave, we have no idea whether or not we'll be the only copy coming
+%% back up. Thus we must assume we will be, and preserve anything we
+%% have on disk.
+terminate(_Reason, #state { backing_queue_state = undefined }) ->
+ %% We've received a delete_and_terminate from gm, thus nothing to
+ %% do here.
+ ok;
+terminate({shutdown, dropped} = R, #state { backing_queue = BQ,
+ backing_queue_state = BQS }) ->
+ %% See rabbit_mirror_queue_master:terminate/2
+ BQ:delete_and_terminate(R, BQS);
+terminate(Reason, #state { q = Q,
+ gm = GM,
+ backing_queue = BQ,
+ backing_queue_state = BQS,
+ rate_timer_ref = RateTRef }) ->
+ ok = gm:leave(GM),
+ QueueState = rabbit_amqqueue_process:init_with_backing_queue_state(
+ Q, BQ, BQS, RateTRef, [], [], dict:new()),
+ rabbit_amqqueue_process:terminate(Reason, QueueState);
+terminate([_SPid], _Reason) ->
+ %% gm case
+ ok.
+
+code_change(_OldVsn, State, _Extra) ->
+ {ok, State}.
+
+handle_pre_hibernate(State = #state { backing_queue = BQ,
+ backing_queue_state = BQS }) ->
+ {RamDuration, BQS1} = BQ:ram_duration(BQS),
+ DesiredDuration =
+ rabbit_memory_monitor:report_ram_duration(self(), RamDuration),
+ BQS2 = BQ:set_ram_duration_target(DesiredDuration, BQS1),
+ BQS3 = BQ:handle_pre_hibernate(BQS2),
+ {hibernate, stop_rate_timer(State #state { backing_queue_state = BQS3 })}.
+
+prioritise_call(Msg, _From, _State) ->
+ case Msg of
+ {run_backing_queue, _Mod, _Fun} -> 6;
+ {gm_deaths, _Deaths} -> 5;
+ _ -> 0
+ end.
+
+prioritise_cast(Msg, _State) ->
+ case Msg of
+ update_ram_duration -> 8;
+ {set_ram_duration_target, _Duration} -> 8;
+ {set_maximum_since_use, _Age} -> 8;
+ {run_backing_queue, _Mod, _Fun} -> 6;
+ sync_timeout -> 6;
+ {gm, _Msg} -> 5;
+ {post_commit, _Txn, _AckTags} -> 4;
+ _ -> 0
+ end.
+
+%% ---------------------------------------------------------------------------
+%% GM
+%% ---------------------------------------------------------------------------
+
+joined([SPid], _Members) ->
+ SPid ! {joined, self()},
+ ok.
+
+members_changed([_SPid], _Births, []) ->
+ ok;
+members_changed([SPid], _Births, Deaths) ->
+ inform_deaths(SPid, Deaths).
+
+handle_msg([_SPid], _From, heartbeat) ->
+ ok;
+handle_msg([_SPid], _From, {ensure_monitoring, _Pid}) ->
+ %% This is only of value to the master
+ ok;
+handle_msg([SPid], _From, {process_death, Pid}) ->
+ inform_deaths(SPid, [Pid]);
+handle_msg([SPid], _From, Msg) ->
+ ok = gen_server2:cast(SPid, {gm, Msg}).
+
+inform_deaths(SPid, Deaths) ->
+ rabbit_misc:with_exit_handler(
+ fun () -> {stop, normal} end,
+ fun () ->
+ case gen_server2:call(SPid, {gm_deaths, Deaths}, infinity) of
+ ok ->
+ ok;
+ {promote, CPid} ->
+ {become, rabbit_mirror_queue_coordinator, [CPid]}
+ end
+ end).
+
+%% ---------------------------------------------------------------------------
+%% Others
+%% ---------------------------------------------------------------------------
+
+bq_init(BQ, Q, Recover) ->
+ Self = self(),
+ BQ:init(Q, Recover,
+ fun (Mod, Fun) ->
+ rabbit_amqqueue:run_backing_queue_async(Self, Mod, Fun)
+ end,
+ fun (Mod, Fun) ->
+ rabbit_misc:with_exit_handler(
+ fun () -> error end,
+ fun () ->
+ rabbit_amqqueue:run_backing_queue(Self, Mod, Fun)
+ end)
+ end).
+
+run_backing_queue(rabbit_mirror_queue_master, Fun, State) ->
+ %% Yes, this might look a little crazy, but see comments in
+ %% confirm_sender_death/1
+ Fun(?MODULE, State);
+run_backing_queue(Mod, Fun, State = #state { backing_queue = BQ,
+ backing_queue_state = BQS }) ->
+ State #state { backing_queue_state = BQ:invoke(Mod, Fun, BQS) }.
+
+needs_confirming(#delivery{ msg_seq_no = undefined }, _State) ->
+ never;
+needs_confirming(#delivery { message = #basic_message {
+ is_persistent = true } },
+ #state { q = #amqqueue { durable = true } }) ->
+ eventually;
+needs_confirming(_Delivery, _State) ->
+ immediately.
+
+confirm_messages(MsgIds, State = #state { msg_id_status = MS }) ->
+ {MS1, CMs} =
+ lists:foldl(
+ fun (MsgId, {MSN, CMsN} = Acc) ->
+ %% We will never see 'discarded' here
+ case dict:find(MsgId, MSN) of
+ error ->
+ %% If it needed confirming, it'll have
+ %% already been done.
+ Acc;
+ {ok, {published, ChPid}} ->
+ %% Still not seen it from the channel, just
+ %% record that it's been confirmed.
+ {dict:store(MsgId, {confirmed, ChPid}, MSN), CMsN};
+ {ok, {published, ChPid, MsgSeqNo}} ->
+ %% Seen from both GM and Channel. Can now
+ %% confirm.
+ {dict:erase(MsgId, MSN),
+ gb_trees_cons(ChPid, MsgSeqNo, CMsN)};
+ {ok, {confirmed, _ChPid}} ->
+ %% It's already been confirmed. This is
+ %% probably it's been both sync'd to disk
+ %% and then delivered and ack'd before we've
+ %% seen the publish from the
+ %% channel. Nothing to do here.
+ Acc
+ end
+ end, {MS, gb_trees:empty()}, MsgIds),
+ [ok = rabbit_channel:confirm(ChPid, MsgSeqNos)
+ || {ChPid, MsgSeqNos} <- gb_trees:to_list(CMs)],
+ State #state { msg_id_status = MS1 }.
+
+gb_trees_cons(Key, Value, Tree) ->
+ case gb_trees:lookup(Key, Tree) of
+ {value, Values} -> gb_trees:update(Key, [Value | Values], Tree);
+ none -> gb_trees:insert(Key, [Value], Tree)
+ end.
+
+handle_process_result({ok, State}) -> noreply(State);
+handle_process_result({stop, State}) -> {stop, normal, State}.
+
+promote_me(From, #state { q = Q,
+ gm = GM,
+ backing_queue = BQ,
+ backing_queue_state = BQS,
+ rate_timer_ref = RateTRef,
+ sender_queues = SQ,
+ msg_id_ack = MA,
+ msg_id_status = MS,
+ known_senders = KS }) ->
+ rabbit_log:info("Mirrored-queue (~s): Promoting slave ~s to master~n",
+ [rabbit_misc:rs(Q #amqqueue.name),
+ rabbit_misc:pid_to_string(self())]),
+ Q1 = Q #amqqueue { pid = self() },
+ {ok, CPid} = rabbit_mirror_queue_coordinator:start_link(
+ Q1, GM, rabbit_mirror_queue_master:sender_death_fun()),
+ true = unlink(GM),
+ gen_server2:reply(From, {promote, CPid}),
+ ok = gm:confirmed_broadcast(GM, heartbeat),
+
+ %% Everything that we're monitoring, we need to ensure our new
+ %% coordinator is monitoring.
+
+ MonitoringPids = [begin true = erlang:demonitor(MRef),
+ Pid
+ end || {Pid, MRef} <- dict:to_list(KS)],
+ ok = rabbit_mirror_queue_coordinator:ensure_monitoring(
+ CPid, MonitoringPids),
+
+ %% We find all the messages that we've received from channels but
+ %% not from gm, and if they're due to be enqueued on promotion
+ %% then we pass them to the
+ %% queue_process:init_with_backing_queue_state to be enqueued.
+ %%
+ %% We also have to requeue messages which are pending acks: the
+ %% consumers from the master queue have been lost and so these
+ %% messages need requeuing. They might also be pending
+ %% confirmation, and indeed they might also be pending arrival of
+ %% the publication from the channel itself, if we received both
+ %% the publication and the fetch via gm first! Requeuing doesn't
+ %% affect confirmations: if the message was previously pending a
+ %% confirmation then it still will be, under the same msg_id. So
+ %% as a master, we need to be prepared to filter out the
+ %% publication of said messages from the channel (is_duplicate
+ %% (thus such requeued messages must remain in the msg_id_status
+ %% (MS) which becomes seen_status (SS) in the master)).
+ %%
+ %% Then there are messages we already have in the queue, which are
+ %% not currently pending acknowledgement:
+ %% 1. Messages we've only received via gm:
+ %% Filter out subsequent publication from channel through
+ %% validate_message. Might have to issue confirms then or
+ %% later, thus queue_process state will have to know that
+ %% there's a pending confirm.
+ %% 2. Messages received via both gm and channel:
+ %% Queue will have to deal with issuing confirms if necessary.
+ %%
+ %% MS contains the following three entry types:
+ %%
+ %% a) {published, ChPid}:
+ %% published via gm only; pending arrival of publication from
+ %% channel, maybe pending confirm.
+ %%
+ %% b) {published, ChPid, MsgSeqNo}:
+ %% published via gm and channel; pending confirm.
+ %%
+ %% c) {confirmed, ChPid}:
+ %% published via gm only, and confirmed; pending publication
+ %% from channel.
+ %%
+ %% d) discarded
+ %% seen via gm only as discarded. Pending publication from
+ %% channel
+ %%
+ %% The forms a, c and d only, need to go to the master state
+ %% seen_status (SS).
+ %%
+ %% The form b only, needs to go through to the queue_process
+ %% state to form the msg_id_to_channel mapping (MTC).
+ %%
+ %% No messages that are enqueued from SQ at this point will have
+ %% entries in MS.
+ %%
+ %% Messages that are extracted from MA may have entries in MS, and
+ %% those messages are then requeued. However, as discussed above,
+ %% this does not affect MS, nor which bits go through to SS in
+ %% Master, or MTC in queue_process.
+ %%
+ %% Everything that's in MA gets requeued. Consequently the new
+ %% master should start with a fresh AM as there are no messages
+ %% pending acks (txns will have been rolled back).
+
+ MSList = dict:to_list(MS),
+ SS = dict:from_list(
+ [E || E = {_MsgId, discarded} <- MSList] ++
+ [{MsgId, Status}
+ || {MsgId, {Status, _ChPid}} <- MSList,
+ Status =:= published orelse Status =:= confirmed]),
+
+ MasterState = rabbit_mirror_queue_master:promote_backing_queue_state(
+ CPid, BQ, BQS, GM, SS, MonitoringPids),
+
+ MTC = dict:from_list(
+ [{MsgId, {ChPid, MsgSeqNo}} ||
+ {MsgId, {published, ChPid, MsgSeqNo}} <- dict:to_list(MS)]),
+ NumAckTags = [NumAckTag || {_MsgId, NumAckTag} <- dict:to_list(MA)],
+ AckTags = [AckTag || {_Num, AckTag} <- lists:sort(NumAckTags)],
+ Deliveries = [Delivery || {_ChPid, {PubQ, _PendCh}} <- dict:to_list(SQ),
+ {Delivery, true} <- queue:to_list(PubQ)],
+ QueueState = rabbit_amqqueue_process:init_with_backing_queue_state(
+ Q1, rabbit_mirror_queue_master, MasterState, RateTRef,
+ AckTags, Deliveries, MTC),
+ {become, rabbit_amqqueue_process, QueueState, hibernate}.
+
+noreply(State) ->
+ {NewState, Timeout} = next_state(State),
+ {noreply, NewState, Timeout}.
+
+reply(Reply, State) ->
+ {NewState, Timeout} = next_state(State),
+ {reply, Reply, NewState, Timeout}.
+
+next_state(State = #state{backing_queue = BQ, backing_queue_state = BQS}) ->
+ {MsgIds, BQS1} = BQ:drain_confirmed(BQS),
+ State1 = ensure_rate_timer(
+ confirm_messages(MsgIds, State #state {
+ backing_queue_state = BQS1 })),
+ case BQ:needs_timeout(BQS1) of
+ false -> {stop_sync_timer(State1), hibernate};
+ idle -> {stop_sync_timer(State1), 0 };
+ timed -> {ensure_sync_timer(State1), 0 }
+ end.
+
+backing_queue_timeout(State = #state { backing_queue = BQ }) ->
+ run_backing_queue(BQ, fun (M, BQS) -> M:timeout(BQS) end, State).
+
+ensure_sync_timer(State = #state { sync_timer_ref = undefined }) ->
+ {ok, TRef} = timer:apply_after(
+ ?SYNC_INTERVAL, rabbit_amqqueue, sync_timeout, [self()]),
+ State #state { sync_timer_ref = TRef };
+ensure_sync_timer(State) ->
+ State.
+
+stop_sync_timer(State = #state { sync_timer_ref = undefined }) ->
+ State;
+stop_sync_timer(State = #state { sync_timer_ref = TRef }) ->
+ {ok, cancel} = timer:cancel(TRef),
+ State #state { sync_timer_ref = undefined }.
+
+ensure_rate_timer(State = #state { rate_timer_ref = undefined }) ->
+ {ok, TRef} = timer:apply_after(
+ ?RAM_DURATION_UPDATE_INTERVAL,
+ rabbit_amqqueue, update_ram_duration,
+ [self()]),
+ State #state { rate_timer_ref = TRef };
+ensure_rate_timer(State = #state { rate_timer_ref = just_measured }) ->
+ State #state { rate_timer_ref = undefined };
+ensure_rate_timer(State) ->
+ State.
+
+stop_rate_timer(State = #state { rate_timer_ref = undefined }) ->
+ State;
+stop_rate_timer(State = #state { rate_timer_ref = just_measured }) ->
+ State #state { rate_timer_ref = undefined };
+stop_rate_timer(State = #state { rate_timer_ref = TRef }) ->
+ {ok, cancel} = timer:cancel(TRef),
+ State #state { rate_timer_ref = undefined }.
+
+ensure_monitoring(ChPid, State = #state { known_senders = KS }) ->
+ case dict:is_key(ChPid, KS) of
+ true -> State;
+ false -> MRef = erlang:monitor(process, ChPid),
+ State #state { known_senders = dict:store(ChPid, MRef, KS) }
+ end.
+
+local_sender_death(ChPid, State = #state { known_senders = KS }) ->
+ ok = case dict:is_key(ChPid, KS) of
+ false -> ok;
+ true -> confirm_sender_death(ChPid)
+ end,
+ State.
+
+confirm_sender_death(Pid) ->
+ %% We have to deal with the possibility that we'll be promoted to
+ %% master before this thing gets run. Consequently we set the
+ %% module to rabbit_mirror_queue_master so that if we do become a
+ %% rabbit_amqqueue_process before then, sane things will happen.
+ Fun =
+ fun (?MODULE, State = #state { known_senders = KS,
+ gm = GM }) ->
+ %% We're running still as a slave
+ ok = case dict:is_key(Pid, KS) of
+ false -> ok;
+ true -> gm:broadcast(GM, {ensure_monitoring, [Pid]}),
+ confirm_sender_death(Pid)
+ end,
+ State;
+ (rabbit_mirror_queue_master, State) ->
+ %% We've become a master. State is now opaque to
+ %% us. When we became master, if Pid was still known
+ %% to us then we'd have set up monitoring of it then,
+ %% so this is now a noop.
+ State
+ end,
+ %% Note that we do not remove our knowledge of this ChPid until we
+ %% get the sender_death from GM.
+ {ok, _TRef} = timer:apply_after(
+ ?DEATH_TIMEOUT, rabbit_amqqueue, run_backing_queue_async,
+ [self(), rabbit_mirror_queue_master, Fun]),
+ ok.
+
+maybe_enqueue_message(
+ Delivery = #delivery { message = #basic_message { id = MsgId },
+ msg_seq_no = MsgSeqNo,
+ sender = ChPid,
+ txn = none },
+ EnqueueOnPromotion,
+ State = #state { sender_queues = SQ, msg_id_status = MS }) ->
+ State1 = ensure_monitoring(ChPid, State),
+ %% We will never see {published, ChPid, MsgSeqNo} here.
+ case dict:find(MsgId, MS) of
+ error ->
+ {MQ, PendingCh} = get_sender_queue(ChPid, SQ),
+ MQ1 = queue:in({Delivery, EnqueueOnPromotion}, MQ),
+ SQ1 = dict:store(ChPid, {MQ1, PendingCh}, SQ),
+ State1 #state { sender_queues = SQ1 };
+ {ok, {confirmed, ChPid}} ->
+ %% BQ has confirmed it but we didn't know what the
+ %% msg_seq_no was at the time. We do now!
+ ok = rabbit_channel:confirm(ChPid, [MsgSeqNo]),
+ SQ1 = remove_from_pending_ch(MsgId, ChPid, SQ),
+ State1 #state { sender_queues = SQ1,
+ msg_id_status = dict:erase(MsgId, MS) };
+ {ok, {published, ChPid}} ->
+ %% It was published to the BQ and we didn't know the
+ %% msg_seq_no so couldn't confirm it at the time.
+ case needs_confirming(Delivery, State1) of
+ never ->
+ SQ1 = remove_from_pending_ch(MsgId, ChPid, SQ),
+ State1 #state { msg_id_status = dict:erase(MsgId, MS),
+ sender_queues = SQ1 };
+ eventually ->
+ State1 #state {
+ msg_id_status =
+ dict:store(MsgId, {published, ChPid, MsgSeqNo}, MS) };
+ immediately ->
+ ok = rabbit_channel:confirm(ChPid, [MsgSeqNo]),
+ SQ1 = remove_from_pending_ch(MsgId, ChPid, SQ),
+ State1 #state { msg_id_status = dict:erase(MsgId, MS),
+ sender_queues = SQ1 }
+ end;
+ {ok, discarded} ->
+ %% We've already heard from GM that the msg is to be
+ %% discarded. We won't see this again.
+ SQ1 = remove_from_pending_ch(MsgId, ChPid, SQ),
+ State1 #state { msg_id_status = dict:erase(MsgId, MS),
+ sender_queues = SQ1 }
+ end;
+maybe_enqueue_message(_Delivery, _EnqueueOnPromotion, State) ->
+ %% We don't support txns in mirror queues.
+ State.
+
+get_sender_queue(ChPid, SQ) ->
+ case dict:find(ChPid, SQ) of
+ error -> {queue:new(), sets:new()};
+ {ok, Val} -> Val
+ end.
+
+remove_from_pending_ch(MsgId, ChPid, SQ) ->
+ case dict:find(ChPid, SQ) of
+ error ->
+ SQ;
+ {ok, {MQ, PendingCh}} ->
+ dict:store(ChPid, {MQ, sets:del_element(MsgId, PendingCh)}, SQ)
+ end.
+
+process_instruction(
+ {publish, Deliver, ChPid, MsgProps, Msg = #basic_message { id = MsgId }},
+ State = #state { sender_queues = SQ,
+ backing_queue = BQ,
+ backing_queue_state = BQS,
+ msg_id_status = MS }) ->
+
+ %% We really are going to do the publish right now, even though we
+ %% may not have seen it directly from the channel. As a result, we
+ %% may know that it needs confirming without knowing its
+ %% msg_seq_no, which means that we can see the confirmation come
+ %% back from the backing queue without knowing the msg_seq_no,
+ %% which means that we're going to have to hang on to the fact
+ %% that we've seen the msg_id confirmed until we can associate it
+ %% with a msg_seq_no.
+ State1 = ensure_monitoring(ChPid, State),
+ {MQ, PendingCh} = get_sender_queue(ChPid, SQ),
+ {MQ1, PendingCh1, MS1} =
+ case queue:out(MQ) of
+ {empty, _MQ2} ->
+ {MQ, sets:add_element(MsgId, PendingCh),
+ dict:store(MsgId, {published, ChPid}, MS)};
+ {{value, {Delivery = #delivery {
+ msg_seq_no = MsgSeqNo,
+ message = #basic_message { id = MsgId } },
+ _EnqueueOnPromotion}}, MQ2} ->
+ %% We received the msg from the channel first. Thus we
+ %% need to deal with confirms here.
+ case needs_confirming(Delivery, State1) of
+ never ->
+ {MQ2, PendingCh, MS};
+ eventually ->
+ {MQ2, sets:add_element(MsgId, PendingCh),
+ dict:store(MsgId, {published, ChPid, MsgSeqNo}, MS)};
+ immediately ->
+ ok = rabbit_channel:confirm(ChPid, [MsgSeqNo]),
+ {MQ2, PendingCh, MS}
+ end;
+ {{value, {#delivery {}, _EnqueueOnPromotion}}, _MQ2} ->
+ %% The instruction was sent to us before we were
+ %% within the slave_pids within the #amqqueue{}
+ %% record. We'll never receive the message directly
+ %% from the channel. And the channel will not be
+ %% expecting any confirms from us.
+ {MQ, PendingCh, MS}
+ end,
+
+ SQ1 = dict:store(ChPid, {MQ1, PendingCh1}, SQ),
+ State2 = State1 #state { sender_queues = SQ1, msg_id_status = MS1 },
+
+ {ok,
+ case Deliver of
+ false ->
+ BQS1 = BQ:publish(Msg, MsgProps, ChPid, BQS),
+ State2 #state { backing_queue_state = BQS1 };
+ {true, AckRequired} ->
+ {AckTag, BQS1} = BQ:publish_delivered(AckRequired, Msg, MsgProps,
+ ChPid, BQS),
+ maybe_store_ack(AckRequired, MsgId, AckTag,
+ State2 #state { backing_queue_state = BQS1 })
+ end};
+process_instruction({discard, ChPid, Msg = #basic_message { id = MsgId }},
+ State = #state { sender_queues = SQ,
+ backing_queue = BQ,
+ backing_queue_state = BQS,
+ msg_id_status = MS }) ->
+ %% Many of the comments around the publish head above apply here
+ %% too.
+ State1 = ensure_monitoring(ChPid, State),
+ {MQ, PendingCh} = get_sender_queue(ChPid, SQ),
+ {MQ1, PendingCh1, MS1} =
+ case queue:out(MQ) of
+ {empty, _MQ} ->
+ {MQ, sets:add_element(MsgId, PendingCh),
+ dict:store(MsgId, discarded, MS)};
+ {{value, {#delivery { message = #basic_message { id = MsgId } },
+ _EnqueueOnPromotion}}, MQ2} ->
+ %% We've already seen it from the channel, we're not
+ %% going to see this again, so don't add it to MS
+ {MQ2, PendingCh, MS};
+ {{value, {#delivery {}, _EnqueueOnPromotion}}, _MQ2} ->
+ %% The instruction was sent to us before we were
+ %% within the slave_pids within the #amqqueue{}
+ %% record. We'll never receive the message directly
+ %% from the channel.
+ {MQ, PendingCh, MS}
+ end,
+ SQ1 = dict:store(ChPid, {MQ1, PendingCh1}, SQ),
+ BQS1 = BQ:discard(Msg, ChPid, BQS),
+ {ok, State1 #state { sender_queues = SQ1,
+ msg_id_status = MS1,
+ backing_queue_state = BQS1 }};
+process_instruction({set_length, Length},
+ State = #state { backing_queue = BQ,
+ backing_queue_state = BQS }) ->
+ QLen = BQ:len(BQS),
+ ToDrop = QLen - Length,
+ {ok, case ToDrop > 0 of
+ true -> BQS1 =
+ lists:foldl(
+ fun (const, BQSN) ->
+ {{_Msg, _IsDelivered, _AckTag, _Remaining},
+ BQSN1} = BQ:fetch(false, BQSN),
+ BQSN1
+ end, BQS, lists:duplicate(ToDrop, const)),
+ State #state { backing_queue_state = BQS1 };
+ false -> State
+ end};
+process_instruction({fetch, AckRequired, MsgId, Remaining},
+ State = #state { backing_queue = BQ,
+ backing_queue_state = BQS }) ->
+ QLen = BQ:len(BQS),
+ {ok, case QLen - 1 of
+ Remaining ->
+ {{#basic_message{id = MsgId}, _IsDelivered,
+ AckTag, Remaining}, BQS1} = BQ:fetch(AckRequired, BQS),
+ maybe_store_ack(AckRequired, MsgId, AckTag,
+ State #state { backing_queue_state = BQS1 });
+ Other when Other < Remaining ->
+ %% we must be shorter than the master
+ State
+ end};
+process_instruction({ack, MsgIds},
+ State = #state { backing_queue = BQ,
+ backing_queue_state = BQS,
+ msg_id_ack = MA }) ->
+ {AckTags, MA1} = msg_ids_to_acktags(MsgIds, MA),
+ {MsgIds1, BQS1} = BQ:ack(AckTags, BQS),
+ [] = MsgIds1 -- MsgIds, %% ASSERTION
+ {ok, State #state { msg_id_ack = MA1,
+ backing_queue_state = BQS1 }};
+process_instruction({requeue, MsgPropsFun, MsgIds},
+ State = #state { backing_queue = BQ,
+ backing_queue_state = BQS,
+ msg_id_ack = MA }) ->
+ {AckTags, MA1} = msg_ids_to_acktags(MsgIds, MA),
+ {ok, case length(AckTags) =:= length(MsgIds) of
+ true ->
+ {MsgIds, BQS1} = BQ:requeue(AckTags, MsgPropsFun, BQS),
+ State #state { msg_id_ack = MA1,
+ backing_queue_state = BQS1 };
+ false ->
+ %% The only thing we can safely do is nuke out our BQ
+ %% and MA. The interaction between this and confirms
+ %% doesn't really bear thinking about...
+ {_Count, BQS1} = BQ:purge(BQS),
+ {_MsgIds, BQS2} = ack_all(BQ, MA, BQS1),
+ State #state { msg_id_ack = dict:new(),
+ backing_queue_state = BQS2 }
+ end};
+process_instruction({sender_death, ChPid},
+ State = #state { sender_queues = SQ,
+ msg_id_status = MS,
+ known_senders = KS }) ->
+ {ok, case dict:find(ChPid, KS) of
+ error ->
+ State;
+ {ok, MRef} ->
+ true = erlang:demonitor(MRef),
+ MS1 = case dict:find(ChPid, SQ) of
+ error ->
+ MS;
+ {ok, {_MQ, PendingCh}} ->
+ lists:foldl(fun dict:erase/2, MS,
+ sets:to_list(PendingCh))
+ end,
+ State #state { sender_queues = dict:erase(ChPid, SQ),
+ msg_id_status = MS1,
+ known_senders = dict:erase(ChPid, KS) }
+ end};
+process_instruction({delete_and_terminate, Reason},
+ State = #state { backing_queue = BQ,
+ backing_queue_state = BQS }) ->
+ BQ:delete_and_terminate(Reason, BQS),
+ {stop, State #state { backing_queue_state = undefined }}.
+
+msg_ids_to_acktags(MsgIds, MA) ->
+ {AckTags, MA1} =
+ lists:foldl(
+ fun (MsgId, {Acc, MAN}) ->
+ case dict:find(MsgId, MA) of
+ error -> {Acc, MAN};
+ {ok, {_Num, AckTag}} -> {[AckTag | Acc],
+ dict:erase(MsgId, MAN)}
+ end
+ end, {[], MA}, MsgIds),
+ {lists:reverse(AckTags), MA1}.
+
+ack_all(BQ, MA, BQS) ->
+ BQ:ack([AckTag || {_MsgId, {_Num, AckTag}} <- dict:to_list(MA)], BQS).
+
+maybe_store_ack(false, _MsgId, _AckTag, State) ->
+ State;
+maybe_store_ack(true, MsgId, AckTag, State = #state { msg_id_ack = MA,
+ ack_num = Num }) ->
+ State #state { msg_id_ack = dict:store(MsgId, {Num, AckTag}, MA),
+ ack_num = Num + 1 }.
diff --git a/src/rabbit_mirror_queue_slave_sup.erl b/src/rabbit_mirror_queue_slave_sup.erl
new file mode 100644
index 00000000..2ce5941e
--- /dev/null
+++ b/src/rabbit_mirror_queue_slave_sup.erl
@@ -0,0 +1,60 @@
+%% The contents of this file are subject to the Mozilla Public License
+%% Version 1.1 (the "License"); you may not use this file except in
+%% compliance with the License. You may obtain a copy of the License at
+%% http://www.mozilla.org/MPL/
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%% License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% The Original Code is RabbitMQ.
+%%
+%% The Initial Developer of the Original Code is VMware, Inc.
+%% Copyright (c) 2007-2010 VMware, Inc. All rights reserved.
+%%
+
+-module(rabbit_mirror_queue_slave_sup).
+
+-rabbit_boot_step({mirror_queue_slave_sup,
+ [{description, "mirror queue slave sup"},
+ {mfa, {rabbit_mirror_queue_slave_sup, start, []}},
+ {requires, recovery},
+ {enables, routing_ready}]}).
+
+-rabbit_boot_step({mirrored_queues,
+ [{description, "adding mirrors to queues"},
+ {mfa, {rabbit_mirror_queue_misc, on_node_up, []}},
+ {requires, mirror_queue_slave_sup},
+ {enables, routing_ready}]}).
+
+-behaviour(supervisor2).
+
+-export([start/0, start_link/0, start_child/2]).
+
+-export([init/1]).
+
+-include_lib("rabbit.hrl").
+
+-define(SERVER, ?MODULE).
+
+start() ->
+ {ok, _} =
+ supervisor2:start_child(
+ rabbit_sup,
+ {rabbit_mirror_queue_slave_sup,
+ {rabbit_mirror_queue_slave_sup, start_link, []},
+ transient, infinity, supervisor, [rabbit_mirror_queue_slave_sup]}),
+ ok.
+
+start_link() ->
+ supervisor2:start_link({local, ?SERVER}, ?MODULE, []).
+
+start_child(Node, Args) ->
+ supervisor2:start_child({?SERVER, Node}, Args).
+
+init([]) ->
+ {ok, {{simple_one_for_one_terminate, 10, 10},
+ [{rabbit_mirror_queue_slave,
+ {rabbit_mirror_queue_slave, start_link, []},
+ temporary, ?MAX_WAIT, worker, [rabbit_mirror_queue_slave]}]}}.
diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl
index 568b9ce6..8d5c8646 100644
--- a/src/rabbit_mnesia.erl
+++ b/src/rabbit_mnesia.erl
@@ -241,7 +241,8 @@ table_definitions() ->
{rabbit_queue,
[{record_name, amqqueue},
{attributes, record_info(fields, amqqueue)},
- {match, #amqqueue{name = queue_name_match(), _='_'}}]}].
+ {match, #amqqueue{name = queue_name_match(), _='_'}}]}]
+ ++ gm:table_definitions().
binding_match() ->
#binding{source = exchange_name_match(),
diff --git a/src/rabbit_router.erl b/src/rabbit_router.erl
index 8f166672..26780676 100644
--- a/src/rabbit_router.erl
+++ b/src/rabbit_router.erl
@@ -110,8 +110,10 @@ check_delivery(_ , _ , {_ , Qs}) -> {routed, Qs}.
lookup_qpids(QNames) ->
lists:foldl(fun (QName, QPids) ->
case mnesia:dirty_read({rabbit_queue, QName}) of
- [#amqqueue{pid = QPid}] -> [QPid | QPids];
- [] -> QPids
+ [#amqqueue{pid = QPid, slave_pids = SPids}] ->
+ SPids ++ [QPid | QPids];
+ [] ->
+ QPids
end
end, [], QNames).
diff --git a/src/rabbit_types.erl b/src/rabbit_types.erl
index a18118e3..03b2c9e8 100644
--- a/src/rabbit_types.erl
+++ b/src/rabbit_types.erl
@@ -124,7 +124,9 @@
auto_delete :: boolean(),
exclusive_owner :: rabbit_types:maybe(pid()),
arguments :: rabbit_framing:amqp_table(),
- pid :: rabbit_types:maybe(pid())}).
+ pid :: rabbit_types:maybe(pid()),
+ slave_pids :: [pid()],
+ mirror_nodes :: [node()] | 'undefined' | 'all'}).
-type(exchange() ::
#exchange{name :: rabbit_exchange:name(),
diff --git a/src/rabbit_upgrade_functions.erl b/src/rabbit_upgrade_functions.erl
index c2dd860a..ac2c378c 100644
--- a/src/rabbit_upgrade_functions.erl
+++ b/src/rabbit_upgrade_functions.erl
@@ -30,6 +30,8 @@
-rabbit_upgrade({exchange_event_serial, mnesia, []}).
-rabbit_upgrade({trace_exchanges, mnesia, []}).
-rabbit_upgrade({user_admin_to_tags, mnesia, [user_to_internal_user]}).
+-rabbit_upgrade({ha_mirrors, mnesia, []}).
+-rabbit_upgrade({gm, mnesia, []}).
%% -------------------------------------------------------------------
@@ -45,6 +47,8 @@
-spec(exchange_event_serial/0 :: () -> 'ok').
-spec(trace_exchanges/0 :: () -> 'ok').
-spec(user_admin_to_tags/0 :: () -> 'ok').
+-spec(ha_mirrors/0 :: () -> 'ok').
+-spec(gm/0 :: () -> 'ok').
-endif.
@@ -133,6 +137,24 @@ user_admin_to_tags() ->
end,
[username, password_hash, tags], internal_user).
+ha_mirrors() ->
+ Tables = [rabbit_queue, rabbit_durable_queue],
+ AddMirrorPidsFun =
+ fun ({amqqueue, Name, Durable, AutoDelete, Owner, Arguments, Pid}) ->
+ {amqqueue, Name, Durable, AutoDelete, Owner, Arguments, Pid,
+ [], undefined}
+ end,
+ [ ok = transform(T,
+ AddMirrorPidsFun,
+ [name, durable, auto_delete, exclusive_owner, arguments,
+ pid, slave_pids, mirror_nodes])
+ || T <- Tables ],
+ ok.
+
+gm() ->
+ create(gm_group, [{record_name, gm_group},
+ {attributes, [name, version, members]}]).
+
%%--------------------------------------------------------------------
transform(TableName, Fun, FieldList) ->