From 4f7cf9e15982a14d8ec8518631950914a7a6262d Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Mon, 29 Nov 2010 12:19:59 +0000 Subject: Added mechanism to dynamically choose the backing queue module. Ideally, this'll end up using the generic registry that's part of bug 23467 and allow plugins to register this stuff on boot --- src/rabbit_amqqueue_process.erl | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl index a999fe58..87d074c2 100644 --- a/src/rabbit_amqqueue_process.erl +++ b/src/rabbit_amqqueue_process.erl @@ -114,12 +114,11 @@ info_keys() -> ?INFO_KEYS. init(Q) -> ?LOGDEBUG("Queue starting - ~p~n", [Q]), process_flag(trap_exit, true), - {ok, BQ} = application:get_env(backing_queue_module), {ok, #q{q = Q#amqqueue{pid = self()}, exclusive_consumer = none, has_had_consumers = false, - backing_queue = BQ, + backing_queue = backing_queue_module(Q), backing_queue_state = undefined, active_consumers = queue:new(), blocked_consumers = queue:new(), @@ -223,6 +222,13 @@ next_state(State) -> false -> {stop_sync_timer(State2), hibernate} end. +backing_queue_module(#amqqueue{arguments = Args}) -> + case rabbit_misc:table_lookup(Args, <<"x-mirror">>) of + undefined -> {ok, BQM} = application:get_env(backing_queue_module), + BQM; + _Nodes -> rabbit_mirror_queue_master + end. + ensure_sync_timer(State = #q{sync_timer_ref = undefined, backing_queue = BQ}) -> {ok, TRef} = timer:apply_after( ?SYNC_INTERVAL, -- cgit v1.2.1 From d895717caa4cfb16a031e8d449404585950678bb Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Mon, 29 Nov 2010 12:33:31 +0000 Subject: Present the whole amqqueue record to the BQ --- src/rabbit_amqqueue_process.erl | 4 ++-- src/rabbit_invariable_queue.erl | 2 +- src/rabbit_variable_queue.erl | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl index 87d074c2..2ae05300 100644 --- a/src/rabbit_amqqueue_process.erl +++ b/src/rabbit_amqqueue_process.erl @@ -150,7 +150,7 @@ code_change(_OldVsn, State, _Extra) -> %%---------------------------------------------------------------------------- declare(Recover, From, - State = #q{q = Q = #amqqueue{name = QName, durable = IsDurable}, + State = #q{q = Q = #amqqueue{durable = IsDurable}, backing_queue = BQ, backing_queue_state = undefined, stats_timer = StatsTimer}) -> case rabbit_amqqueue:internal_declare(Q, Recover) of @@ -162,7 +162,7 @@ declare(Recover, From, ok = rabbit_memory_monitor:register( self(), {rabbit_amqqueue, set_ram_duration_target, [self()]}), - BQS = BQ:init(QName, IsDurable, Recover), + BQS = BQ:init(Q, IsDurable, Recover), State1 = process_args(State#q{backing_queue_state = BQS}), rabbit_event:notify(queue_created, infos(?CREATION_EVENT_KEYS, State1)), diff --git a/src/rabbit_invariable_queue.erl b/src/rabbit_invariable_queue.erl index 5a0532ea..a2ed13d5 100644 --- a/src/rabbit_invariable_queue.erl +++ b/src/rabbit_invariable_queue.erl @@ -64,7 +64,7 @@ start(DurableQueues) -> stop() -> ok = rabbit_sup:stop_child(rabbit_persister). -init(QName, IsDurable, Recover) -> +init(#amqqueue { name = QName }, IsDurable, Recover) -> Q = queue:from_list(case IsDurable andalso Recover of true -> rabbit_persister:queue_content(QName); false -> [] diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl index 5ac042a2..dd39a1c2 100644 --- a/src/rabbit_variable_queue.erl +++ b/src/rabbit_variable_queue.erl @@ -403,7 +403,7 @@ stop_msg_store() -> ok = rabbit_sup:stop_child(?PERSISTENT_MSG_STORE), ok = rabbit_sup:stop_child(?TRANSIENT_MSG_STORE). -init(QueueName, IsDurable, false) -> +init(#amqqueue { name = QueueName }, IsDurable, false) -> IndexState = rabbit_queue_index:init(QueueName), init(IsDurable, IndexState, 0, [], case IsDurable of @@ -412,7 +412,7 @@ init(QueueName, IsDurable, false) -> end, msg_store_client_init(?TRANSIENT_MSG_STORE)); -init(QueueName, true, true) -> +init(#amqqueue { name = QueueName }, true, true) -> Terms = rabbit_queue_index:shutdown_terms(QueueName), {PRef, TRef, Terms1} = case [persistent_ref, transient_ref] -- proplists:get_keys(Terms) of -- cgit v1.2.1 From 809e3999e60dd89e1e8e86a84695f45202232e11 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Mon, 29 Nov 2010 12:40:52 +0000 Subject: Present the whole amqqueue record to the BQ (remove duplicated IsDurable flag, correct BQ callbacks) --- include/rabbit_backing_queue_spec.hrl | 3 +-- src/rabbit_amqqueue_process.erl | 5 ++--- src/rabbit_backing_queue.erl | 2 +- src/rabbit_invariable_queue.erl | 4 ++-- src/rabbit_variable_queue.erl | 6 +++--- 5 files changed, 9 insertions(+), 11 deletions(-) diff --git a/include/rabbit_backing_queue_spec.hrl b/include/rabbit_backing_queue_spec.hrl index 20230b24..7b7f3885 100644 --- a/include/rabbit_backing_queue_spec.hrl +++ b/include/rabbit_backing_queue_spec.hrl @@ -43,8 +43,7 @@ -spec(start/1 :: ([rabbit_amqqueue:name()]) -> 'ok'). -spec(stop/0 :: () -> 'ok'). --spec(init/3 :: (rabbit_amqqueue:name(), is_durable(), attempt_recovery()) -> - state()). +-spec(init/2 :: (rabbit_types:amqqueue(), attempt_recovery()) -> state()). -spec(terminate/1 :: (state()) -> state()). -spec(delete_and_terminate/1 :: (state()) -> state()). -spec(purge/1 :: (state()) -> {purged_msg_count(), state()}). diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl index 2ae05300..08c688c7 100644 --- a/src/rabbit_amqqueue_process.erl +++ b/src/rabbit_amqqueue_process.erl @@ -150,8 +150,7 @@ code_change(_OldVsn, State, _Extra) -> %%---------------------------------------------------------------------------- declare(Recover, From, - State = #q{q = Q = #amqqueue{durable = IsDurable}, - backing_queue = BQ, backing_queue_state = undefined, + State = #q{q = Q, backing_queue = BQ, backing_queue_state = undefined, stats_timer = StatsTimer}) -> case rabbit_amqqueue:internal_declare(Q, Recover) of not_found -> {stop, normal, not_found, State}; @@ -162,7 +161,7 @@ declare(Recover, From, ok = rabbit_memory_monitor:register( self(), {rabbit_amqqueue, set_ram_duration_target, [self()]}), - BQS = BQ:init(Q, IsDurable, Recover), + BQS = BQ:init(Q, Recover), State1 = process_args(State#q{backing_queue_state = BQS}), rabbit_event:notify(queue_created, infos(?CREATION_EVENT_KEYS, State1)), diff --git a/src/rabbit_backing_queue.erl b/src/rabbit_backing_queue.erl index 352e76fd..7237f0ea 100644 --- a/src/rabbit_backing_queue.erl +++ b/src/rabbit_backing_queue.erl @@ -48,7 +48,7 @@ behaviour_info(callbacks) -> {stop, 0}, %% Initialise the backing queue and its state. - {init, 3}, + {init, 2}, %% Called on queue shutdown when queue isn't being deleted. {terminate, 1}, diff --git a/src/rabbit_invariable_queue.erl b/src/rabbit_invariable_queue.erl index a2ed13d5..41aff185 100644 --- a/src/rabbit_invariable_queue.erl +++ b/src/rabbit_invariable_queue.erl @@ -31,7 +31,7 @@ -module(rabbit_invariable_queue). --export([init/3, terminate/1, delete_and_terminate/1, purge/1, publish/3, +-export([init/2, terminate/1, delete_and_terminate/1, purge/1, publish/3, publish_delivered/4, fetch/2, ack/2, tx_publish/4, tx_ack/3, dropwhile/2, tx_rollback/2, tx_commit/4, requeue/3, len/1, is_empty/1, set_ram_duration_target/2, ram_duration/1, needs_idle_timeout/1, @@ -64,7 +64,7 @@ start(DurableQueues) -> stop() -> ok = rabbit_sup:stop_child(rabbit_persister). -init(#amqqueue { name = QName }, IsDurable, Recover) -> +init(#amqqueue { name = QName, durable = IsDurable }, Recover) -> Q = queue:from_list(case IsDurable andalso Recover of true -> rabbit_persister:queue_content(QName); false -> [] diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl index dd39a1c2..73a68ec3 100644 --- a/src/rabbit_variable_queue.erl +++ b/src/rabbit_variable_queue.erl @@ -31,7 +31,7 @@ -module(rabbit_variable_queue). --export([init/3, terminate/1, delete_and_terminate/1, +-export([init/2, terminate/1, delete_and_terminate/1, purge/1, publish/3, publish_delivered/4, fetch/2, ack/2, tx_publish/4, tx_ack/3, tx_rollback/2, tx_commit/4, requeue/3, len/1, is_empty/1, dropwhile/2, @@ -403,7 +403,7 @@ stop_msg_store() -> ok = rabbit_sup:stop_child(?PERSISTENT_MSG_STORE), ok = rabbit_sup:stop_child(?TRANSIENT_MSG_STORE). -init(#amqqueue { name = QueueName }, IsDurable, false) -> +init(#amqqueue { name = QueueName, durable = IsDurable }, false) -> IndexState = rabbit_queue_index:init(QueueName), init(IsDurable, IndexState, 0, [], case IsDurable of @@ -412,7 +412,7 @@ init(#amqqueue { name = QueueName }, IsDurable, false) -> end, msg_store_client_init(?TRANSIENT_MSG_STORE)); -init(#amqqueue { name = QueueName }, true, true) -> +init(#amqqueue { name = QueueName, durable = true }, true) -> Terms = rabbit_queue_index:shutdown_terms(QueueName), {PRef, TRef, Terms1} = case [persistent_ref, transient_ref] -- proplists:get_keys(Terms) of -- cgit v1.2.1 From 5a2ad8926058366a3cd01dde6f8a9f96df1cf283 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Mon, 29 Nov 2010 16:37:03 +0000 Subject: Lesser of many evils... --- include/rabbit.hrl | 2 +- src/rabbit_amqqueue.erl | 11 ++++++----- src/rabbit_router.erl | 6 ++++-- src/rabbit_types.erl | 3 ++- 4 files changed, 13 insertions(+), 9 deletions(-) diff --git a/include/rabbit.hrl b/include/rabbit.hrl index a1987fb2..09cc3eb3 100644 --- a/include/rabbit.hrl +++ b/include/rabbit.hrl @@ -54,7 +54,7 @@ -record(exchange, {name, type, durable, auto_delete, arguments}). -record(amqqueue, {name, durable, auto_delete, exclusive_owner = none, - arguments, pid}). + arguments, pid, mirror_pids}). %% mnesia doesn't like unary records, so we add a dummy 'value' field -record(route, {binding, value = const}). diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl index fa417544..5390bb86 100644 --- a/src/rabbit_amqqueue.erl +++ b/src/rabbit_amqqueue.erl @@ -203,12 +203,13 @@ recover_durable_queues(DurableQueues) -> declare(QueueName, Durable, AutoDelete, Args, Owner) -> ok = check_declare_arguments(QueueName, Args), - Q = start_queue_process(#amqqueue{name = QueueName, - durable = Durable, - auto_delete = AutoDelete, - arguments = Args, + Q = start_queue_process(#amqqueue{name = QueueName, + durable = Durable, + auto_delete = AutoDelete, + arguments = Args, exclusive_owner = Owner, - pid = none}), + pid = none, + mirror_pids = []}), case gen_server2:call(Q#amqqueue.pid, {init, false}) of not_found -> rabbit_misc:not_found(QueueName); Q1 -> Q1 diff --git a/src/rabbit_router.erl b/src/rabbit_router.erl index 00df1ce1..b1ce380b 100644 --- a/src/rabbit_router.erl +++ b/src/rabbit_router.erl @@ -116,7 +116,9 @@ check_delivery(_ , _ , {_ , Qs}) -> {routed, Qs}. lookup_qpids(QNames) -> lists:foldl(fun (QName, QPids) -> case mnesia:dirty_read({rabbit_queue, QName}) of - [#amqqueue{pid = QPid}] -> [QPid | QPids]; - [] -> QPids + [#amqqueue{pid = QPid, mirror_pids = Pids}] -> + Pids ++ [QPid | QPids]; + [] -> + QPids end end, [], QNames). diff --git a/src/rabbit_types.erl b/src/rabbit_types.erl index b9993823..3a0c0925 100644 --- a/src/rabbit_types.erl +++ b/src/rabbit_types.erl @@ -136,7 +136,8 @@ auto_delete :: boolean(), exclusive_owner :: rabbit_types:maybe(pid()), arguments :: rabbit_framing:amqp_table(), - pid :: rabbit_types:maybe(pid())}). + pid :: rabbit_types:maybe(pid()), + mirror_pids :: [pid()]}). -type(exchange() :: #exchange{name :: rabbit_exchange:name(), -- cgit v1.2.1 From b1eeb9af85fdd85a44d164f202c6879a74924cb6 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Mon, 29 Nov 2010 17:20:26 +0000 Subject: Make the tests work again. Also mirror_pids => extra_pids on the basis that it could be reusable for other purposes --- include/rabbit.hrl | 2 +- src/rabbit_amqqueue.erl | 2 +- src/rabbit_router.erl | 4 ++-- src/rabbit_tests.erl | 14 +++++++++----- src/rabbit_types.erl | 2 +- 5 files changed, 14 insertions(+), 10 deletions(-) diff --git a/include/rabbit.hrl b/include/rabbit.hrl index 09cc3eb3..7bea4f52 100644 --- a/include/rabbit.hrl +++ b/include/rabbit.hrl @@ -54,7 +54,7 @@ -record(exchange, {name, type, durable, auto_delete, arguments}). -record(amqqueue, {name, durable, auto_delete, exclusive_owner = none, - arguments, pid, mirror_pids}). + arguments, pid, extra_pids}). %% mnesia doesn't like unary records, so we add a dummy 'value' field -record(route, {binding, value = const}). diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl index 5390bb86..fd157231 100644 --- a/src/rabbit_amqqueue.erl +++ b/src/rabbit_amqqueue.erl @@ -209,7 +209,7 @@ declare(QueueName, Durable, AutoDelete, Args, Owner) -> arguments = Args, exclusive_owner = Owner, pid = none, - mirror_pids = []}), + extra_pids = []}), case gen_server2:call(Q#amqqueue.pid, {init, false}) of not_found -> rabbit_misc:not_found(QueueName); Q1 -> Q1 diff --git a/src/rabbit_router.erl b/src/rabbit_router.erl index b1ce380b..40e4edee 100644 --- a/src/rabbit_router.erl +++ b/src/rabbit_router.erl @@ -116,8 +116,8 @@ check_delivery(_ , _ , {_ , Qs}) -> {routed, Qs}. lookup_qpids(QNames) -> lists:foldl(fun (QName, QPids) -> case mnesia:dirty_read({rabbit_queue, QName}) of - [#amqqueue{pid = QPid, mirror_pids = Pids}] -> - Pids ++ [QPid | QPids]; + [#amqqueue{pid = QPid, extra_pids = EPids}] -> + EPids ++ [QPid | QPids]; [] -> QPids end diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl index 27e4d925..a63baddb 100644 --- a/src/rabbit_tests.erl +++ b/src/rabbit_tests.erl @@ -1848,9 +1848,13 @@ assert_prop(List, Prop, Value) -> assert_props(List, PropVals) -> [assert_prop(List, Prop, Value) || {Prop, Value} <- PropVals]. +test_amqqueue(Durable) -> + #amqqueue{name = test_queue(), + durable = Durable}. + with_fresh_variable_queue(Fun) -> ok = empty_test_queue(), - VQ = rabbit_variable_queue:init(test_queue(), true, false), + VQ = rabbit_variable_queue:init(test_amqqueue(true), false), S0 = rabbit_variable_queue:status(VQ), assert_props(S0, [{q1, 0}, {q2, 0}, {delta, {delta, undefined, 0, undefined}}, @@ -2025,7 +2029,7 @@ test_variable_queue_all_the_bits_not_covered_elsewhere1(VQ0) -> {VQ5, _AckTags1} = variable_queue_fetch(Count, false, false, Count, VQ4), _VQ6 = rabbit_variable_queue:terminate(VQ5), - VQ7 = rabbit_variable_queue:init(test_queue(), true, true), + VQ7 = rabbit_variable_queue:init(test_amqqueue(true), true), {{_Msg1, true, _AckTag1, Count1}, VQ8} = rabbit_variable_queue:fetch(true, VQ7), VQ9 = variable_queue_publish(false, 1, VQ8), @@ -2041,14 +2045,14 @@ test_variable_queue_all_the_bits_not_covered_elsewhere2(VQ0) -> VQ4 = rabbit_variable_queue:requeue(AckTags, fun(X) -> X end, VQ3), VQ5 = rabbit_variable_queue:idle_timeout(VQ4), _VQ6 = rabbit_variable_queue:terminate(VQ5), - VQ7 = rabbit_variable_queue:init(test_queue(), true, true), + VQ7 = rabbit_variable_queue:init(test_amqqueue(true), true), {empty, VQ8} = rabbit_variable_queue:fetch(false, VQ7), VQ8. test_queue_recover() -> Count = 2 * rabbit_queue_index:next_segment_boundary(0), TxID = rabbit_guid:guid(), - {new, #amqqueue { pid = QPid, name = QName }} = + {new, #amqqueue { pid = QPid, name = QName } = Q} = rabbit_amqqueue:declare(test_queue(), true, false, [], none), Msg = rabbit_basic:message(rabbit_misc:r(<<>>, exchange, <<>>), <<>>, #'P_basic'{delivery_mode = 2}, <<>>), @@ -2071,7 +2075,7 @@ test_queue_recover() -> {ok, CountMinusOne, {QName, QPid1, _AckTag, true, _Msg}} = rabbit_amqqueue:basic_get(Q1, self(), false), exit(QPid1, shutdown), - VQ1 = rabbit_variable_queue:init(QName, true, true), + VQ1 = rabbit_variable_queue:init(Q, true), {{_Msg1, true, _AckTag1, CountMinusOne}, VQ2} = rabbit_variable_queue:fetch(true, VQ1), _VQ3 = rabbit_variable_queue:delete_and_terminate(VQ2), diff --git a/src/rabbit_types.erl b/src/rabbit_types.erl index 3a0c0925..4709532d 100644 --- a/src/rabbit_types.erl +++ b/src/rabbit_types.erl @@ -137,7 +137,7 @@ exclusive_owner :: rabbit_types:maybe(pid()), arguments :: rabbit_framing:amqp_table(), pid :: rabbit_types:maybe(pid()), - mirror_pids :: [pid()]}). + extra_pids :: [pid()]}). -type(exchange() :: #exchange{name :: rabbit_exchange:name(), -- cgit v1.2.1 From b0e19d5fd7803bfae1b7fe6a73d897f659f5151b Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Tue, 30 Nov 2010 12:05:37 +0000 Subject: Exposing the publishing chpid right through to the BQ (and tidying up tests/types/specs) --- include/rabbit_backing_queue_spec.hrl | 14 ++++++++------ src/rabbit_amqqueue.erl | 9 +++++---- src/rabbit_amqqueue_process.erl | 12 ++++++------ src/rabbit_backing_queue.erl | 6 +++--- src/rabbit_invariable_queue.erl | 21 +++++++++++---------- src/rabbit_tests.erl | 8 ++++---- src/rabbit_variable_queue.erl | 15 ++++++++------- 7 files changed, 45 insertions(+), 40 deletions(-) diff --git a/include/rabbit_backing_queue_spec.hrl b/include/rabbit_backing_queue_spec.hrl index 7b7f3885..fd6d6b8a 100644 --- a/include/rabbit_backing_queue_spec.hrl +++ b/include/rabbit_backing_queue_spec.hrl @@ -47,18 +47,20 @@ -spec(terminate/1 :: (state()) -> state()). -spec(delete_and_terminate/1 :: (state()) -> state()). -spec(purge/1 :: (state()) -> {purged_msg_count(), state()}). --spec(publish/3 :: (rabbit_types:basic_message(), - rabbit_types:message_properties(), state()) -> state()). --spec(publish_delivered/4 :: (ack_required(), rabbit_types:basic_message(), - rabbit_types:message_properties(), state()) +-spec(publish/4 :: (rabbit_types:basic_message(), + rabbit_types:message_properties(), pid(), state()) + -> state()). +-spec(publish_delivered/5 :: (ack_required(), rabbit_types:basic_message(), + rabbit_types:message_properties(), pid(), state()) -> {ack(), state()}). -spec(dropwhile/2 :: (fun ((rabbit_types:message_properties()) -> boolean()), state()) -> state()). -spec(fetch/2 :: (ack_required(), state()) -> {fetch_result(), state()}). -spec(ack/2 :: ([ack()], state()) -> state()). --spec(tx_publish/4 :: (rabbit_types:txn(), rabbit_types:basic_message(), - rabbit_types:message_properties(), state()) -> state()). +-spec(tx_publish/5 :: (rabbit_types:txn(), rabbit_types:basic_message(), + rabbit_types:message_properties(), pid(), state()) -> + state()). -spec(tx_ack/3 :: (rabbit_types:txn(), [ack()], state()) -> state()). -spec(tx_rollback/2 :: (rabbit_types:txn(), state()) -> {[ack()], state()}). -spec(tx_commit/4 :: diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl index fd157231..1bbe3f1c 100644 --- a/src/rabbit_amqqueue.erl +++ b/src/rabbit_amqqueue.erl @@ -499,11 +499,12 @@ delete_queue(QueueName) -> rabbit_binding:remove_transient_for_destination(QueueName). pseudo_queue(QueueName, Pid) -> - #amqqueue{name = QueueName, - durable = false, + #amqqueue{name = QueueName, + durable = false, auto_delete = false, - arguments = [], - pid = Pid}. + arguments = [], + pid = Pid, + extra_pids = []}. safe_delegate_call_ok(F, Pids) -> {_, Bad} = delegate:invoke(Pids, diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl index 08c688c7..1e45ef0b 100644 --- a/src/rabbit_amqqueue_process.erl +++ b/src/rabbit_amqqueue_process.erl @@ -438,7 +438,7 @@ run_message_queue(State) -> {_IsEmpty1, State2} = deliver_msgs_to_consumers(Funs, IsEmpty, State1), State2. -attempt_delivery(none, _ChPid, Message, State = #q{backing_queue = BQ}) -> +attempt_delivery(none, ChPid, Message, State = #q{backing_queue = BQ}) -> PredFun = fun (IsEmpty, _State) -> not IsEmpty end, DeliverFun = fun (AckRequired, false, State1 = #q{backing_queue_state = BQS}) -> @@ -447,7 +447,7 @@ attempt_delivery(none, _ChPid, Message, State = #q{backing_queue = BQ}) -> %% message_properties. {AckTag, BQS1} = BQ:publish_delivered(AckRequired, Message, - ?BASE_MESSAGE_PROPERTIES, BQS), + ?BASE_MESSAGE_PROPERTIES, ChPid, BQS), {{Message, false, AckTag}, true, State1#q{backing_queue_state = BQS1}} end, @@ -455,9 +455,9 @@ attempt_delivery(none, _ChPid, Message, State = #q{backing_queue = BQ}) -> attempt_delivery(Txn, ChPid, Message, State = #q{backing_queue = BQ, backing_queue_state = BQS}) -> record_current_channel_tx(ChPid, Txn), - {true, - State#q{backing_queue_state = - BQ:tx_publish(Txn, Message, ?BASE_MESSAGE_PROPERTIES, BQS)}}. + {true, State#q{backing_queue_state = + BQ:tx_publish(Txn, Message, ?BASE_MESSAGE_PROPERTIES, + ChPid, BQS)}}. deliver_or_enqueue(Txn, ChPid, Message, State = #q{backing_queue = BQ}) -> case attempt_delivery(Txn, ChPid, Message, State) of @@ -466,7 +466,7 @@ deliver_or_enqueue(Txn, ChPid, Message, State = #q{backing_queue = BQ}) -> {false, NewState} -> %% Txn is none and no unblocked channels with consumers BQS = BQ:publish(Message, - message_properties(State), + message_properties(State), ChPid, State #q.backing_queue_state), {false, ensure_ttl_timer(NewState#q{backing_queue_state = BQS})} end. diff --git a/src/rabbit_backing_queue.erl b/src/rabbit_backing_queue.erl index 7237f0ea..d04944f9 100644 --- a/src/rabbit_backing_queue.erl +++ b/src/rabbit_backing_queue.erl @@ -62,12 +62,12 @@ behaviour_info(callbacks) -> {purge, 1}, %% Publish a message. - {publish, 3}, + {publish, 4}, %% Called for messages which have already been passed straight %% out to a client. The queue will be empty for these calls %% (i.e. saves the round trip through the backing queue). - {publish_delivered, 4}, + {publish_delivered, 5}, %% Drop messages from the head of the queue while the supplied %% predicate returns true. @@ -81,7 +81,7 @@ behaviour_info(callbacks) -> {ack, 2}, %% A publish, but in the context of a transaction. - {tx_publish, 4}, + {tx_publish, 5}, %% Acks, but in the context of a transaction. {tx_ack, 3}, diff --git a/src/rabbit_invariable_queue.erl b/src/rabbit_invariable_queue.erl index 41aff185..51819799 100644 --- a/src/rabbit_invariable_queue.erl +++ b/src/rabbit_invariable_queue.erl @@ -31,8 +31,8 @@ -module(rabbit_invariable_queue). --export([init/2, terminate/1, delete_and_terminate/1, purge/1, publish/3, - publish_delivered/4, fetch/2, ack/2, tx_publish/4, tx_ack/3, +-export([init/2, terminate/1, delete_and_terminate/1, purge/1, publish/4, + publish_delivered/5, fetch/2, ack/2, tx_publish/5, tx_ack/3, dropwhile/2, tx_rollback/2, tx_commit/4, requeue/3, len/1, is_empty/1, set_ram_duration_target/2, ram_duration/1, needs_idle_timeout/1, idle_timeout/1, handle_pre_hibernate/1, status/1]). @@ -100,17 +100,17 @@ purge(State = #iv_state { queue = Q, qname = QName, durable = IsDurable, ok = persist_acks(QName, IsDurable, none, AckTags, PA), {Len, State #iv_state { len = 0, queue = queue:new() }}. -publish(Msg, MsgProps, State = #iv_state { queue = Q, - qname = QName, - durable = IsDurable, - len = Len }) -> +publish(Msg, MsgProps, _ChPid, State = #iv_state { queue = Q, + qname = QName, + durable = IsDurable, + len = Len }) -> ok = persist_message(QName, IsDurable, none, Msg, MsgProps), State #iv_state { queue = enqueue(Msg, MsgProps, false, Q), len = Len + 1 }. -publish_delivered(false, _Msg, _MsgProps, State) -> +publish_delivered(false, _Msg, _MsgProps, _ChPid, State) -> {blank_ack, State}; publish_delivered(true, Msg = #basic_message { guid = Guid }, - MsgProps, + MsgProps, _ChPid, State = #iv_state { qname = QName, durable = IsDurable, len = 0, pending_ack = PA }) -> ok = persist_message(QName, IsDurable, none, Msg, MsgProps), @@ -159,8 +159,9 @@ ack(AckTags, State = #iv_state { qname = QName, durable = IsDurable, PA1 = remove_acks(AckTags, PA), State #iv_state { pending_ack = PA1 }. -tx_publish(Txn, Msg, MsgProps, State = #iv_state { qname = QName, - durable = IsDurable }) -> +tx_publish(Txn, Msg, MsgProps, _ChPid, + State = #iv_state { qname = QName, + durable = IsDurable }) -> Tx = #tx { pending_messages = Pubs } = lookup_tx(Txn), store_tx(Txn, Tx #tx { pending_messages = [{Msg, MsgProps} | Pubs] }), ok = persist_message(QName, IsDurable, Txn, Msg, MsgProps), diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl index a63baddb..572f1457 100644 --- a/src/rabbit_tests.erl +++ b/src/rabbit_tests.erl @@ -1830,7 +1830,7 @@ variable_queue_publish(IsPersistent, Count, VQ) -> true -> 2; false -> 1 end}, <<>>), - #message_properties{}, VQN) + #message_properties{}, self(), VQN) end, VQ, lists:seq(1, Count)). variable_queue_fetch(Count, IsPersistent, IsDelivered, Len, VQ) -> @@ -1849,8 +1849,8 @@ assert_props(List, PropVals) -> [assert_prop(List, Prop, Value) || {Prop, Value} <- PropVals]. test_amqqueue(Durable) -> - #amqqueue{name = test_queue(), - durable = Durable}. + (rabbit_amqqueue:pseudo_queue(test_queue(), self())) + #amqqueue { durable = Durable }. with_fresh_variable_queue(Fun) -> ok = empty_test_queue(), @@ -1912,7 +1912,7 @@ test_dropwhile(VQ0) -> rabbit_basic:message( rabbit_misc:r(<<>>, exchange, <<>>), <<>>, #'P_basic'{}, <<>>), - #message_properties{expiry = N}, VQN) + #message_properties{expiry = N}, self(), VQN) end, VQ0, lists:seq(1, Count)), %% drop the first 5 messages diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl index 73a68ec3..cd4101fb 100644 --- a/src/rabbit_variable_queue.erl +++ b/src/rabbit_variable_queue.erl @@ -32,8 +32,8 @@ -module(rabbit_variable_queue). -export([init/2, terminate/1, delete_and_terminate/1, - purge/1, publish/3, publish_delivered/4, fetch/2, ack/2, - tx_publish/4, tx_ack/3, tx_rollback/2, tx_commit/4, + purge/1, publish/4, publish_delivered/5, fetch/2, ack/2, + tx_publish/5, tx_ack/3, tx_rollback/2, tx_commit/4, requeue/3, len/1, is_empty/1, dropwhile/2, set_ram_duration_target/2, ram_duration/1, needs_idle_timeout/1, idle_timeout/1, handle_pre_hibernate/1, @@ -501,14 +501,15 @@ purge(State = #vqstate { q4 = Q4, ram_index_count = 0, persistent_count = PCount1 })}. -publish(Msg, MsgProps, State) -> +publish(Msg, MsgProps, _ChPid, State) -> {_SeqId, State1} = publish(Msg, MsgProps, false, false, State), a(reduce_memory_use(State1)). -publish_delivered(false, _Msg, _MsgProps, State = #vqstate { len = 0 }) -> +publish_delivered(false, _Msg, _MsgProps, _ChPid, + State = #vqstate { len = 0 }) -> {blank_ack, a(State)}; publish_delivered(true, Msg = #basic_message { is_persistent = IsPersistent }, - MsgProps, + MsgProps, _ChPid, State = #vqstate { len = 0, next_seq_id = SeqId, out_counter = OutCount, @@ -640,8 +641,8 @@ ack(AckTags, State) -> AckTags, State)). tx_publish(Txn, Msg = #basic_message { is_persistent = IsPersistent }, MsgProps, - State = #vqstate { durable = IsDurable, - msg_store_clients = MSCState }) -> + _ChPid, State = #vqstate { durable = IsDurable, + msg_store_clients = MSCState }) -> Tx = #tx { pending_messages = Pubs } = lookup_tx(Txn), store_tx(Txn, Tx #tx { pending_messages = [{Msg, MsgProps} | Pubs] }), case IsPersistent andalso IsDurable of -- cgit v1.2.1 From 6d72c379a0a1e5e2c258b411e6c905e52153c2ba Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Wed, 8 Dec 2010 15:57:42 +0000 Subject: Provide a means for plugins to add extra tables which are looked after by rabbit_mnesia --- src/rabbit_mnesia.erl | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl index a62e7a6f..65404aae 100644 --- a/src/rabbit_mnesia.erl +++ b/src/rabbit_mnesia.erl @@ -34,7 +34,8 @@ -export([ensure_mnesia_dir/0, dir/0, status/0, init/0, is_db_empty/0, cluster/1, force_cluster/1, reset/0, force_reset/0, - is_clustered/0, empty_ram_only_tables/0, copy_db/1]). + is_clustered/0, empty_ram_only_tables/0, copy_db/1, + add_table_definition/1]). -export([table_names/0]). @@ -210,7 +211,18 @@ table_definitions() -> {rabbit_queue, [{record_name, amqqueue}, {attributes, record_info(fields, amqqueue)}, - {match, #amqqueue{name = queue_name_match(), _='_'}}]}]. + {match, #amqqueue{name = queue_name_match(), _='_'}}]}] + ++ plugin_table_definitions(). + +add_table_definition(Def) -> + ok = application:set_env(rabbit, plugin_mnesia_tables, + [Def | plugin_table_definitions()], infinity). + +plugin_table_definitions() -> + case application:get_env(rabbit, plugin_mnesia_tables) of + {ok, Defs} -> Defs; + undefined -> [] + end. binding_match() -> #binding{source = exchange_name_match(), -- cgit v1.2.1 From 95898ec0f59c752ab73833b3444be5f6b2215371 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Wed, 8 Dec 2010 16:08:57 +0000 Subject: Add a TODO --- src/rabbit_mnesia.erl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl index 65404aae..bdc4d76c 100644 --- a/src/rabbit_mnesia.erl +++ b/src/rabbit_mnesia.erl @@ -214,6 +214,8 @@ table_definitions() -> {match, #amqqueue{name = queue_name_match(), _='_'}}]}] ++ plugin_table_definitions(). +%% TODO: re-work this abuse of the application env as a register with +%% the generic registry that should be landing at some point. add_table_definition(Def) -> ok = application:set_env(rabbit, plugin_mnesia_tables, [Def | plugin_table_definitions()], infinity). -- cgit v1.2.1 From c87841495733b9bdfde4ee8d8528f0979045b04a Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Wed, 8 Dec 2010 16:26:54 +0000 Subject: On other node death, don't delete queues which still have some extra pids. --- src/rabbit_amqqueue.erl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl index e322e844..f525ffd1 100644 --- a/src/rabbit_amqqueue.erl +++ b/src/rabbit_amqqueue.erl @@ -492,9 +492,11 @@ on_node_down(Node) -> rabbit_binding:new_deletions(), rabbit_misc:execute_mnesia_transaction( fun () -> qlc:e(qlc:q([delete_queue(QueueName) || - #amqqueue{name = QueueName, pid = Pid} + #amqqueue{name = QueueName, pid = Pid, + extra_pids = EPids} <- mnesia:table(rabbit_queue), - node(Pid) == Node])) + node(Pid) == Node, + [] =:= EPids])) end))). delete_queue(QueueName) -> -- cgit v1.2.1 From 08bf37780a75601c8dd58d134a29215c4c5bc030 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Thu, 9 Dec 2010 12:42:57 +0000 Subject: Allow gen_server2s to change their callback modules dynamically. That was easy. --- src/gen_server2.erl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/gen_server2.erl b/src/gen_server2.erl index 6e02b23e..a54df3b8 100644 --- a/src/gen_server2.erl +++ b/src/gen_server2.erl @@ -880,6 +880,10 @@ handle_common_reply(Reply, Msg, GS2State = #gs2_state { name = Name, loop(GS2State #gs2_state { state = NState, time = Time1, debug = Debug1 }); + {become, Mod, NState} -> + loop(find_prioritisers( + GS2State #gs2_state { mod = Mod, + state = NState })); _ -> handle_common_termination(Reply, Msg, GS2State) end. -- cgit v1.2.1 From ca6ef9622a8268726504d348c85b6261d1a50d31 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Thu, 9 Dec 2010 12:48:26 +0000 Subject: Increase the probability that the above works --- src/gen_server2.erl | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/gen_server2.erl b/src/gen_server2.erl index a54df3b8..d6b09bab 100644 --- a/src/gen_server2.erl +++ b/src/gen_server2.erl @@ -881,9 +881,21 @@ handle_common_reply(Reply, Msg, GS2State = #gs2_state { name = Name, time = Time1, debug = Debug1 }); {become, Mod, NState} -> + Debug1 = common_debug(Debug, fun print_event/3, Name, + {become, Mod, NState}), + loop(find_prioritisers( + GS2State #gs2_state { mod = Mod, + state = NState, + time = infinity, + debug = Debug1 })); + {become, Mod, NState, Time1} -> + Debug1 = common_debug(Debug, fun print_event/3, Name, + {become, Mod, NState}), loop(find_prioritisers( GS2State #gs2_state { mod = Mod, - state = NState })); + state = NState, + time = Time1, + debug = Debug1 })); _ -> handle_common_termination(Reply, Msg, GS2State) end. -- cgit v1.2.1 From 263b80a556fabe6927f85e793a207ab49adce887 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Thu, 9 Dec 2010 16:41:20 +0000 Subject: Permit queue processes to be initialised with existing state. This raises the need for the ability to change the callbacks in rabbit_memory_monitor and fhc --- src/rabbit_amqqueue_process.erl | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl index cecc85d0..027a82e7 100644 --- a/src/rabbit_amqqueue_process.erl +++ b/src/rabbit_amqqueue_process.erl @@ -48,6 +48,8 @@ handle_info/2, handle_pre_hibernate/1, prioritise_call/3, prioritise_cast/2, prioritise_info/2]). +-export([init_with_backing_queue_state/3]). + -import(queue). -import(erlang). -import(lists). @@ -133,6 +135,28 @@ init(Q) -> guid_to_channel = dict:new()}, hibernate, {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. +init_with_backing_queue_state(Q, BQ, BQS) -> + ?LOGDEBUG("Queue starting - ~p~n", [Q]), + process_flag(trap_exit, true), + ok = file_handle_cache:register_callback( + rabbit_amqqueue, set_maximum_since_use, [self()]), + ok = rabbit_memory_monitor:register( + self(), {rabbit_amqqueue, set_ram_duration_target, [self()]}), + process_args(#q{q = Q#amqqueue{pid = self()}, + exclusive_consumer = none, + has_had_consumers = false, + backing_queue = BQ, + backing_queue_state = BQS, + active_consumers = queue:new(), + blocked_consumers = queue:new(), + expires = undefined, + sync_timer_ref = undefined, + rate_timer_ref = undefined, + expiry_timer_ref = undefined, + ttl = undefined, + stats_timer = rabbit_event:init_stats_timer(), + guid_to_channel = dict:new()}). + terminate(shutdown, State = #q{backing_queue = BQ}) -> terminate_shutdown(fun (BQS) -> BQ:terminate(BQS) end, State); terminate({shutdown, _}, State = #q{backing_queue = BQ}) -> -- cgit v1.2.1 From fc23d5651234bd999dd0388b1f8d7801908b817b Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Thu, 9 Dec 2010 17:01:06 +0000 Subject: Turns out the memory_monitor and fhc callback registration is idempotent, and it's also just simply easier to correctly fake the amqqueue_process API. Need to then deal with rate timers and pre hibernation callbacks. Groan --- src/rabbit_amqqueue_process.erl | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl index 027a82e7..49856431 100644 --- a/src/rabbit_amqqueue_process.erl +++ b/src/rabbit_amqqueue_process.erl @@ -138,10 +138,6 @@ init(Q) -> init_with_backing_queue_state(Q, BQ, BQS) -> ?LOGDEBUG("Queue starting - ~p~n", [Q]), process_flag(trap_exit, true), - ok = file_handle_cache:register_callback( - rabbit_amqqueue, set_maximum_since_use, [self()]), - ok = rabbit_memory_monitor:register( - self(), {rabbit_amqqueue, set_ram_duration_target, [self()]}), process_args(#q{q = Q#amqqueue{pid = self()}, exclusive_consumer = none, has_had_consumers = false, -- cgit v1.2.1 From 322295618ae9593e10ae9fb8ee4eace840ef8a5e Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Thu, 9 Dec 2010 17:43:46 +0000 Subject: Predictions of parameter explosions are high --- src/rabbit_amqqueue_process.erl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl index 49856431..468a6ee3 100644 --- a/src/rabbit_amqqueue_process.erl +++ b/src/rabbit_amqqueue_process.erl @@ -48,7 +48,7 @@ handle_info/2, handle_pre_hibernate/1, prioritise_call/3, prioritise_cast/2, prioritise_info/2]). --export([init_with_backing_queue_state/3]). +-export([init_with_backing_queue_state/4]). -import(queue). -import(erlang). @@ -135,7 +135,7 @@ init(Q) -> guid_to_channel = dict:new()}, hibernate, {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. -init_with_backing_queue_state(Q, BQ, BQS) -> +init_with_backing_queue_state(Q, BQ, BQS, RateTRef) -> ?LOGDEBUG("Queue starting - ~p~n", [Q]), process_flag(trap_exit, true), process_args(#q{q = Q#amqqueue{pid = self()}, @@ -147,7 +147,7 @@ init_with_backing_queue_state(Q, BQ, BQS) -> blocked_consumers = queue:new(), expires = undefined, sync_timer_ref = undefined, - rate_timer_ref = undefined, + rate_timer_ref = RateTRef, expiry_timer_ref = undefined, ttl = undefined, stats_timer = rabbit_event:init_stats_timer(), -- cgit v1.2.1 From 471480808bf862511f2277b7f2eb815db52c7342 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Thu, 9 Dec 2010 17:55:51 +0000 Subject: Contract will be process is already trapping exits --- src/rabbit_amqqueue_process.erl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl index 468a6ee3..d781cd35 100644 --- a/src/rabbit_amqqueue_process.erl +++ b/src/rabbit_amqqueue_process.erl @@ -137,7 +137,6 @@ init(Q) -> init_with_backing_queue_state(Q, BQ, BQS, RateTRef) -> ?LOGDEBUG("Queue starting - ~p~n", [Q]), - process_flag(trap_exit, true), process_args(#q{q = Q#amqqueue{pid = self()}, exclusive_consumer = none, has_had_consumers = false, -- cgit v1.2.1 From b1bebd2cf15126d813995350334f7562fe3dcd84 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Sun, 12 Dec 2010 23:53:06 +0000 Subject: Alter the result of BQ:requeue so that it also returns the guids of the requeued messages. This makes it match the spec of ack closely and is more logical. Also, entirely coincidentally, happens to be necessary for HA... --- include/rabbit_backing_queue_spec.hrl | 2 +- src/rabbit_amqqueue_process.erl | 4 +++- src/rabbit_tests.erl | 3 ++- src/rabbit_variable_queue.erl | 4 ++-- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/include/rabbit_backing_queue_spec.hrl b/include/rabbit_backing_queue_spec.hrl index 0f831a7d..a330fe1e 100644 --- a/include/rabbit_backing_queue_spec.hrl +++ b/include/rabbit_backing_queue_spec.hrl @@ -68,7 +68,7 @@ (rabbit_types:txn(), fun (() -> any()), message_properties_transformer(), state()) -> {[ack()], state()}). -spec(requeue/3 :: ([ack()], message_properties_transformer(), state()) - -> state()). + -> {[rabbit_guid:guid()], state()}). -spec(len/1 :: (state()) -> non_neg_integer()). -spec(is_empty/1 :: (state()) -> boolean()). -spec(set_ram_duration_target/2 :: diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl index c1972c26..1c4a3716 100644 --- a/src/rabbit_amqqueue_process.erl +++ b/src/rabbit_amqqueue_process.erl @@ -544,7 +544,9 @@ deliver_or_enqueue(Delivery, State) -> requeue_and_run(AckTags, State = #q{backing_queue = BQ, ttl=TTL}) -> maybe_run_queue_via_backing_queue( fun (BQS) -> - BQ:requeue(AckTags, reset_msg_expiry_fun(TTL), BQS) + {_Guids, BQS1} = + BQ:requeue(AckTags, reset_msg_expiry_fun(TTL), BQS), + BQS1 end, State). fetch(AckRequired, State = #q{backing_queue_state = BQS, diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl index 3343bb99..7f6c5d3d 100644 --- a/src/rabbit_tests.erl +++ b/src/rabbit_tests.erl @@ -2047,7 +2047,8 @@ test_variable_queue_all_the_bits_not_covered_elsewhere2(VQ0) -> VQ1 = rabbit_variable_queue:set_ram_duration_target(0, VQ0), VQ2 = variable_queue_publish(false, 4, VQ1), {VQ3, AckTags} = variable_queue_fetch(2, false, false, 4, VQ2), - VQ4 = rabbit_variable_queue:requeue(AckTags, fun(X) -> X end, VQ3), + {_Guids, VQ4} = + rabbit_variable_queue:requeue(AckTags, fun(X) -> X end, VQ3), VQ5 = rabbit_variable_queue:idle_timeout(VQ4), _VQ6 = rabbit_variable_queue:terminate(VQ5), VQ7 = rabbit_variable_queue:init(test_amqqueue(true), true, diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl index d1da2c89..acbbe458 100644 --- a/src/rabbit_variable_queue.erl +++ b/src/rabbit_variable_queue.erl @@ -716,7 +716,7 @@ tx_commit(Txn, Fun, MsgPropsFun, end)}. requeue(AckTags, MsgPropsFun, State) -> - {_Guids, State1} = + {Guids, State1} = ack(fun msg_store_release/3, fun (#msg_status { msg = Msg, msg_props = MsgProps }, State1) -> {_SeqId, State2} = publish(Msg, MsgPropsFun(MsgProps), @@ -732,7 +732,7 @@ requeue(AckTags, MsgPropsFun, State) -> State3 end, AckTags, State), - a(reduce_memory_use(State1)). + {Guids, a(reduce_memory_use(State1))}. len(#vqstate { len = Len }) -> Len. -- cgit v1.2.1 From d8c97cf013c19cb19ba0d3235b4b030f4ed2690b Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Mon, 13 Dec 2010 13:58:38 +0000 Subject: Extend state conversion function --- src/rabbit_amqqueue_process.erl | 40 ++++++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl index 1c4a3716..a7468936 100644 --- a/src/rabbit_amqqueue_process.erl +++ b/src/rabbit_amqqueue_process.erl @@ -48,7 +48,7 @@ handle_info/2, handle_pre_hibernate/1, prioritise_call/3, prioritise_cast/2, prioritise_info/2]). --export([init_with_backing_queue_state/4]). +-export([init_with_backing_queue_state/6]). % Queue's state -record(q, {q, @@ -131,22 +131,30 @@ init(Q) -> guid_to_channel = dict:new()}, hibernate, {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. -init_with_backing_queue_state(Q, BQ, BQS, RateTRef) -> +init_with_backing_queue_state(Q, BQ, BQS, RateTRef, AckTags, Deliveries) -> ?LOGDEBUG("Queue starting - ~p~n", [Q]), - process_args(#q{q = Q#amqqueue{pid = self()}, - exclusive_consumer = none, - has_had_consumers = false, - backing_queue = BQ, - backing_queue_state = BQS, - active_consumers = queue:new(), - blocked_consumers = queue:new(), - expires = undefined, - sync_timer_ref = undefined, - rate_timer_ref = RateTRef, - expiry_timer_ref = undefined, - ttl = undefined, - stats_timer = rabbit_event:init_stats_timer(), - guid_to_channel = dict:new()}). + State = requeue_and_run( + AckTags, + process_args( + #q{q = Q#amqqueue{pid = self()}, + exclusive_consumer = none, + has_had_consumers = false, + backing_queue = BQ, + backing_queue_state = BQS, + active_consumers = queue:new(), + blocked_consumers = queue:new(), + expires = undefined, + sync_timer_ref = undefined, + rate_timer_ref = RateTRef, + expiry_timer_ref = undefined, + ttl = undefined, + stats_timer = rabbit_event:init_stats_timer(), + guid_to_channel = dict:new()})), + lists:foldl( + fun (Delivery, StateN) -> + {_Delivered, StateN1} = deliver_or_enqueue(Delivery, StateN), + StateN1 + end, State, Deliveries). terminate(shutdown, State = #q{backing_queue = BQ}) -> terminate_shutdown(fun (BQS) -> BQ:terminate(BQS) end, State); -- cgit v1.2.1 From fbd954e2f89208dd033baf80d9dc8cd07bc5285c Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Tue, 14 Dec 2010 17:52:13 +0000 Subject: inline --- src/rabbit_amqqueue.erl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl index 1e83265f..e4bc9f76 100644 --- a/src/rabbit_amqqueue.erl +++ b/src/rabbit_amqqueue.erl @@ -488,10 +488,9 @@ on_node_down(Node) -> rabbit_misc:execute_mnesia_transaction( fun () -> qlc:e(qlc:q([delete_queue(QueueName) || #amqqueue{name = QueueName, pid = Pid, - extra_pids = EPids} + extra_pids = []} <- mnesia:table(rabbit_queue), - node(Pid) == Node, - [] =:= EPids])) + node(Pid) == Node])) end))). delete_queue(QueueName) -> -- cgit v1.2.1 From c9a9f9d93ce6e66ff1c02ad5d03636307d9ecb68 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Thu, 16 Dec 2010 15:00:36 +0000 Subject: Move the various files from rabbit-ha to rabbit-server as the ha work can't exist as a plugin --- include/gm_specs.hrl | 28 + src/gm.erl | 1312 +++++++++++++++++++++++++++++++ src/gm_test.erl | 120 +++ src/rabbit_mirror_queue_coordinator.erl | 125 +++ src/rabbit_mirror_queue_master.erl | 221 ++++++ src/rabbit_mirror_queue_misc.erl | 42 + src/rabbit_mirror_queue_slave.erl | 481 +++++++++++ src/rabbit_mirror_queue_slave_sup.erl | 54 ++ 8 files changed, 2383 insertions(+) create mode 100644 include/gm_specs.hrl create mode 100644 src/gm.erl create mode 100644 src/gm_test.erl create mode 100644 src/rabbit_mirror_queue_coordinator.erl create mode 100644 src/rabbit_mirror_queue_master.erl create mode 100644 src/rabbit_mirror_queue_misc.erl create mode 100644 src/rabbit_mirror_queue_slave.erl create mode 100644 src/rabbit_mirror_queue_slave_sup.erl diff --git a/include/gm_specs.hrl b/include/gm_specs.hrl new file mode 100644 index 00000000..7f607755 --- /dev/null +++ b/include/gm_specs.hrl @@ -0,0 +1,28 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is VMware, Inc. +%% Copyright (c) 2007-2010 VMware, Inc. All rights reserved. +%% + +-ifdef(use_specs). + +-type(callback_result() :: 'ok' | {'stop', any()}). +-type(args() :: [any()]). +-type(members() :: [pid()]). + +-spec(joined/2 :: (args(), members()) -> callback_result()). +-spec(members_changed/3 :: (args(), members(), members()) -> callback_result()). +-spec(handle_msg/3 :: (args(), pid(), any()) -> callback_result()). +-spec(terminate/2 :: (args(), term()) -> any()). + +-endif. diff --git a/src/gm.erl b/src/gm.erl new file mode 100644 index 00000000..47971bd4 --- /dev/null +++ b/src/gm.erl @@ -0,0 +1,1312 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is VMware, Inc. +%% Copyright (c) 2007-2010 VMware, Inc. All rights reserved. +%% + +-module(gm). + +%% Guaranteed Multicast +%% ==================== +%% +%% This module provides the ability to create named groups of +%% processes to which members can be dynamically added and removed, +%% and for messages to be broadcast within the group that are +%% guaranteed to reach all members of the group during the lifetime of +%% the message. The lifetime of a message is defined as being, at a +%% minimum, the time from which the message is first sent to any +%% member of the group, up until the time at which it is known by the +%% member who published the message that the message has reached all +%% group members. +%% +%% The guarantee given is that provided a message, once sent, makes it +%% to members who do not all leave the group, the message will +%% continue to propagate to all group members. +%% +%% Another way of stating the guarantee is that if member P publishes +%% messages m and m', then for all members P', if P' is a member of +%% the group prior to the publication of m, and P' receives m', then +%% P' will receive m. +%% +%% Note that only local-ordering is enforced: i.e. if member P sends +%% message m and then message m', then for-all members P', if P' +%% receives m and m', then they will receive m' after m. Causality +%% ordering is _not_ enforced. I.e. if member P receives message m +%% and as a result publishes message m', there is no guarantee that +%% other members P' will receive m before m'. +%% +%% +%% API Use +%% ------- +%% +%% Mnesia must be started. Use the idempotent create_tables/0 function +%% to create the tables required. +%% +%% start_link/3 +%% Provide the group name, the callback module name, and a list of any +%% arguments you wish to be passed into the callback module's +%% functions. The joined/1 will be called when we have joined the +%% group, and the list of arguments will have appended to it a list of +%% the current members of the group. See the comments in +%% behaviour_info/1 below for further details of the callback +%% functions. +%% +%% leave/1 +%% Provide the Pid. Removes the Pid from the group. The callback +%% terminate/1 function will be called. +%% +%% broadcast/2 +%% Provide the Pid and a Message. The message will be sent to all +%% members of the group as per the guarantees given above. This is a +%% cast and the function call will return immediately. There is no +%% guarantee that the message will reach any member of the group. +%% +%% confirmed_broadcast/2 +%% Provide the Pid and a Message. As per broadcast/2 except that this +%% is a call, not a cast, and only returns 'ok' once the Message has +%% reached every member of the group. Do not call +%% confirmed_broadcast/2 directly from the callback module otherwise +%% you will deadlock the entire group. +%% +%% group_members/1 +%% Provide the Pid. Returns a list of the current group members. +%% +%% +%% Implementation Overview +%% ----------------------- +%% +%% One possible means of implementation would be a fan-out from the +%% sender to every member of the group. This would require that the +%% group is fully connected, and, in the event that the original +%% sender of the message disappears from the group before the message +%% has made it to every member of the group, raises questions as to +%% who is responsible for sending on the message to new group members. +%% In particular, the issue is with [ Pid ! Msg || Pid <- Members ] - +%% if the sender dies part way through, who is responsible for +%% ensuring that the remaining Members receive the Msg? In the event +%% that within the group, messages sent are broadcast from a subset of +%% the members, the fan-out arrangement has the potential to +%% substantially impact the CPU and network workload of such members, +%% as such members would have to accommodate the cost of sending each +%% message to every group member. +%% +%% Instead, if the members of the group are arranged in a chain, then +%% it becomes easier to reason about who within the group has received +%% each message and who has not. It eases issues of responsibility: in +%% the event of a group member disappearing, the nearest upstream +%% member of the chain is responsible for ensuring that messages +%% continue to propagate down the chain. It also results in equal +%% distribution of sending and receiving workload, even if all +%% messages are being sent from just a single group member. This +%% configuration has the further advantage that it is not necessary +%% for every group member to know of every other group member, and +%% even that a group member does not have to be accessible from all +%% other group members. +%% +%% Performance is kept high by permitting pipelining and all +%% communication between joined group members is asynchronous. In the +%% chain A -> B -> C -> D, if A sends a message to the group, it will +%% not directly contact C or D. However, it must know that D receives +%% the message (in addition to B and C) before it can consider the +%% message fully sent. A simplistic implementation would require that +%% D replies to C, C replies to B and B then replies to A. This would +%% result in a propagation delay of twice the length of the chain. It +%% would also require, in the event of the failure of C, that D knows +%% to directly contact B and issue the necessary replies. Instead, the +%% chain forms a ring: D sends the message on to A: D does not +%% distinguish A as the sender, merely as the next member (downstream) +%% within the chain (which has now become a ring). When A receives +%% from D messages that A sent, it knows that all members have +%% received the message. However, the message is not dead yet: if C +%% died as B was sending to C, then B would need to detect the death +%% of C and forward the message on to D instead: thus every node has +%% to remember every message published until it is told that it can +%% forget about the message. This is essential not just for dealing +%% with failure of members, but also for the addition of new members. +%% +%% Thus once A receives the message back again, it then sends to B an +%% acknowledgement for the message, indicating that B can now forget +%% about the message. B does so, and forwards the ack to C. C forgets +%% the message, and forwards the ack to D, which forgets the message +%% and finally forwards the ack back to A. At this point, A takes no +%% further action: the message and its acknowledgement have made it to +%% every member of the group. The message is now dead, and any new +%% member joining the group at this point will not receive the +%% message. +%% +%% We therefore have two roles: +%% +%% 1. The sender, who upon receiving their own messages back, must +%% then send out acknowledgements, and upon receiving their own +%% acknowledgements back perform no further action. +%% +%% 2. The other group members who upon receiving messages and +%% acknowledgements must update their own internal state accordingly +%% (the sending member must also do this in order to be able to +%% accommodate failures), and forwards messages on to their downstream +%% neighbours. +%% +%% +%% Implementation: It gets trickier +%% -------------------------------- +%% +%% Chain A -> B -> C -> D +%% +%% A publishes a message which B receives. A now dies. B and D will +%% detect the death of A, and will link up, thus the chain is now B -> +%% C -> D. B forwards A's message on to C, who forwards it to D, who +%% forwards it to B. Thus B is now responsible for A's messages - both +%% publications and acknowledgements that were in flight at the point +%% at which A died. Even worse is that this is transitive: after B +%% forwards A's message to C, B dies as well. Now C is not only +%% responsible for B's in-flight messages, but is also responsible for +%% A's in-flight messages. +%% +%% Lemma 1: A member can only determine which dead members they have +%% inherited responsibility for if there is a total ordering on the +%% conflicting additions and subtractions of members from the group. +%% +%% Consider the simultaneous death of B and addition of B' that +%% transitions a chain from A -> B -> C to A -> B' -> C. Either B' or +%% C is responsible for in-flight messages from B. It is easy to +%% ensure that at least one of them thinks they have inherited B, but +%% if we do not ensure that exactly one of them inherits B, then we +%% could have B' converting publishes to acks, which then will crash C +%% as C does not believe it has issued acks for those messages. +%% +%% More complex scenarios are easy to concoct: A -> B -> C -> D -> E +%% becoming A -> C' -> E. Who has inherited which of B, C and D? +%% +%% However, for non-conflicting membership changes, only a partial +%% ordering is required. For example, A -> B -> C becoming A -> A' -> +%% B. The addition of A', between A and B can have no conflicts with +%% the death of C: it is clear that A has inherited C's messages. +%% +%% For ease of implementation, we adopt the simple solution, of +%% imposing a total order on all membership changes. +%% +%% On the death of a member, it is ensured the dead member's +%% neighbours become aware of the death, and the upstream neighbour +%% now sends to its new downstream neighbour its state, including the +%% messages pending acknowledgement. The downstream neighbour can then +%% use this to calculate which publishes and acknowledgements it has +%% missed out on, due to the death of its old upstream. Thus the +%% downstream can catch up, and continues the propagation of messages +%% through the group. +%% +%% Lemma 2: When a member is joining, it must synchronously +%% communicate with its upstream member in order to receive its +%% starting state atomically with its addition to the group. +%% +%% New members must start with the same state as their nearest +%% upstream neighbour. This ensures that it is not surprised by +%% acknowledgements they are sent, and that should their downstream +%% neighbour die, they are able to send the correct state to their new +%% downstream neighbour to ensure it can catch up. Thus in the +%% transition A -> B -> C becomes A -> A' -> B -> C becomes A -> A' -> +%% C, A' must start with the state of A, so that it can send C the +%% correct state when B dies, allowing C to detect any missed +%% messages. +%% +%% If A' starts by adding itself to the group membership, A could then +%% die, without A' having received the necessary state from A. This +%% would leave A' responsible for in-flight messages from A, but +%% having the least knowledge of all, of those messages. Thus A' must +%% start by synchronously calling A, which then immediately sends A' +%% back its state. A then adds A' to the group. If A dies at this +%% point then A' will be able to see this (as A' will fail to appear +%% in the group membership), and thus A' will ignore the state it +%% receives from A, and will simply repeat the process, trying to now +%% join downstream from some other member. This ensures that should +%% the upstream die as soon as the new member has been joined, the new +%% member is guaranteed to receive the correct state, allowing it to +%% correctly process messages inherited due to the death of its +%% upstream neighbour. +%% +%% The canonical definition of the group membership is held by a +%% distributed database. Whilst this allows the total ordering of +%% changes to be achieved, it is nevertheless undesirable to have to +%% query this database for the current view, upon receiving each +%% message. Instead, we wish for members to be able to cache a view of +%% the group membership, which then requires a cache invalidation +%% mechanism. Each member maintains its own view of the group +%% membership. Thus when the group's membership changes, members may +%% need to become aware of such changes in order to be able to +%% accurately process messages they receive. Because of the +%% requirement of a total ordering of conflicting membership changes, +%% it is not possible to use the guaranteed broadcast mechanism to +%% communicate these changes: to achieve the necessary ordering, it +%% would be necessary for such messages to be published by exactly one +%% member, which can not be guaranteed given that such a member could +%% die. +%% +%% The total ordering we enforce on membership changes gives rise to a +%% view version number: every change to the membership creates a +%% different view, and the total ordering permits a simple +%% monotonically increasing view version number. +%% +%% Lemma 3: If a message is sent from a member that holds view version +%% N, it can be correctly processed by any member receiving the +%% message with a view version >= N. +%% +%% Initially, let us suppose that each view contains the ordering of +%% every member that was ever part of the group. Dead members are +%% marked as such. Thus we have a ring of members, some of which are +%% dead, and are thus inherited by the nearest alive downstream +%% member. +%% +%% In the chain A -> B -> C, all three members initially have view +%% version 1, which reflects reality. B publishes a message, which is +%% forward by C to A. B now dies, which A notices very quickly. Thus A +%% updates the view, creating version 2. It now forwards B's +%% publication, sending that message to its new downstream neighbour, +%% C. This happens before C is aware of the death of B. C must become +%% aware of the view change before it interprets the message its +%% received, otherwise it will fail to learn of the death of B, and +%% thus will not realise it has inherited B's messages (and will +%% likely crash). +%% +%% Thus very simply, we have that each subsequent view contains more +%% information than the preceding view. +%% +%% However, to avoid the views growing indefinitely, we need to be +%% able to delete members which have died _and_ for which no messages +%% are in-flight. This requires that upon inheriting a dead member, we +%% know the last publication sent by the dead member (this is easy: we +%% inherit a member because we are the nearest downstream member which +%% implies that we know at least as much than everyone else about the +%% publications of the dead member), and we know the earliest message +%% for which the acknowledgement is still in flight. +%% +%% In the chain A -> B -> C, when B dies, A will send to C its state +%% (as C is the new downstream from A), allowing C to calculate which +%% messages it has missed out on (described above). At this point, C +%% also inherits B's messages. If that state from A also includes the +%% last message published by B for which an acknowledgement has been +%% seen, then C knows exactly which further acknowledgements it must +%% receive (also including issuing acknowledgements for publications +%% still in-flight that it receives), after which it is known there +%% are no more messages in flight for B, thus all evidence that B was +%% ever part of the group can be safely removed from the canonical +%% group membership. +%% +%% Thus, for every message that a member sends, it includes with that +%% message its view version. When a member receives a message it will +%% update its view from the canonical copy, should its view be older +%% than the view version included in the message it has received. +%% +%% The state held by each member therefore includes the messages from +%% each publisher pending acknowledgement, the last publication seen +%% from that publisher, and the last acknowledgement from that +%% publisher. In the case of the member's own publications or +%% inherited members, this last acknowledgement seen state indicates +%% the last acknowledgement retired, rather than sent. +%% +%% +%% Proof sketch +%% ------------ +%% +%% We need to prove that with the provided operational semantics, we +%% can never reach a state that is not well formed from a well-formed +%% starting state. +%% +%% Operational semantics (small step): straight-forward message +%% sending, process monitoring, state updates. +%% +%% Well formed state: dead members inherited by exactly one non-dead +%% member; for every entry in anyone's pending-acks, either (the +%% publication of the message is in-flight downstream from the member +%% and upstream from the publisher) or (the acknowledgement of the +%% message is in-flight downstream from the publisher and upstream +%% from the member). +%% +%% Proof by induction on the applicable operational semantics. +%% +%% +%% Related work +%% ------------ +%% +%% The ring configuration and double traversal of messages around the +%% ring is similar (though developed independently) to the LCR +%% protocol by [Levy 2008]. However, LCR differs in several +%% ways. Firstly, by using vector clocks, it enforces a total order of +%% message delivery, which is unnecessary for our purposes. More +%% significantly, it is built on top of a "group communication system" +%% which performs the group management functions, taking +%% responsibility away from the protocol as to how to cope with safely +%% adding and removing members. When membership changes do occur, the +%% protocol stipulates that every member must perform communication +%% with every other member of the group, to ensure all outstanding +%% deliveries complete, before the entire group transitions to the new +%% view. This, in total, requires two sets of all-to-all synchronous +%% communications. +%% +%% This is not only rather inefficient, but also does not explain what +%% happens upon the failure of a member during this process. It does +%% though entirely avoid the need for inheritance of responsibility of +%% dead members that our protocol incorporates. +%% +%% In [Marandi et al 2010], a Paxos-based protocol is described. This +%% work explicitly focuses on the efficiency of communication. LCR +%% (and our protocol too) are more efficient, but at the cost of +%% higher latency. The Ring-Paxos protocol is itself built on top of +%% IP-multicast, which rules it out for many applications where +%% point-to-point communication is all that can be required. They also +%% have an excellent related work section which I really ought to +%% read... +%% +%% +%% [Levy 2008] The Complexity of Reliable Distributed Storage, 2008. +%% [Marandi et al 2010] Ring Paxos: A High-Throughput Atomic Broadcast +%% Protocol + + +-behaviour(gen_server2). + +-export([create_tables/0, start_link/3, leave/1, broadcast/2, + confirmed_broadcast/2, group_members/1]). + +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, + code_change/3]). + +-export([behaviour_info/1]). + +-export([add_to_rabbit_mnesia/0]). + +-define(GROUP_TABLE, gm_group). +-define(HIBERNATE_AFTER_MIN, 1000). +-define(DESIRED_HIBERNATE, 10000). +-define(SETS, ordsets). + +-record(state, + { self, + left, + right, + group_name, + module, + view, + pub_count, + members_state, + callback_args, + confirms + }). + +-record(gm_group, { name, version, members }). + +-record(view_member, { id, aliases, left, right }). + +-record(member, { pending_ack, last_pub, last_ack }). + +-define(TABLE, {?GROUP_TABLE, [{record_name, gm_group}, + {attributes, record_info(fields, gm_group)}]}). +-define(TABLE_MATCH, {match, #gm_group { _ = '_' }}). + +-rabbit_boot_step({gm_tables, + [{description, "add GM tables to rabbit_mnesia"}, + {mfa, {?MODULE, add_to_rabbit_mnesia, []}}, + {enables, database}]}). + +-define(TAG, '$gm'). + +-ifdef(use_specs). + +-export_type([group_name/0]). + +-type(group_name() :: any()). + +-spec(create_tables/0 :: () -> 'ok'). +-spec(start_link/3 :: (group_name(), atom(), [any()]) -> + {'ok', pid()} | {'error', any()}). +-spec(leave/1 :: (pid()) -> 'ok'). +-spec(broadcast/2 :: (pid(), any()) -> 'ok'). +-spec(confirmed_broadcast/2 :: (pid(), any()) -> 'ok'). +-spec(group_members/1 :: (pid()) -> [pid()]). + +-endif. + +behaviour_info(callbacks) -> + [ + %% Called when we've successfully joined the group. Supplied with + %% Args provided in start_link, plus current group members. + {joined, 2}, + + %% Supplied with Args provided in start_link, the list of new + %% members and the list of members previously known to us that + %% have since died. Note that if a member joins and dies very + %% quickly, it's possible that we will never see that member + %% appear in either births or deaths. However we are guaranteed + %% that (1) we will see a member joining either in the births + %% here, or in the members passed to joined/1 before receiving + %% any messages from it; and (2) we will not see members die that + %% we have not seen born (or supplied in the members to + %% joined/1). + {members_changed, 3}, + + %% Supplied with Args provided in start_link, the sender, and the + %% message. This does get called for messages injected by this + %% member, however, in such cases, there is no special + %% significance of this call: it does not indicate that the + %% message has made it to any other members, let alone all other + %% members. + {handle_msg, 3}, + + %% Called on gm member termination as per rules in gen_server, + %% with the Args provided in start_link plus the termination + %% Reason. + {terminate, 2} + ]; +behaviour_info(_Other) -> + undefined. + +create_tables() -> + create_tables([?TABLE]). + +create_tables([]) -> + ok; +create_tables([{Table, Attributes} | Tables]) -> + case mnesia:create_table(Table, Attributes) of + {atomic, ok} -> create_tables(Tables); + {aborted, {already_exists, gm_group}} -> create_tables(Tables); + Err -> Err + end. + +add_to_rabbit_mnesia() -> + {Name, Attributes} = ?TABLE, + ok = rabbit_mnesia:add_table_definition( + {Name, [?TABLE_MATCH | Attributes]}). + +start_link(GroupName, Module, Args) -> + gen_server2:start_link(?MODULE, [GroupName, Module, Args], []). + +leave(Server) -> + gen_server2:cast(Server, leave). + +broadcast(Server, Msg) -> + gen_server2:cast(Server, {broadcast, Msg}). + +confirmed_broadcast(Server, Msg) -> + gen_server2:call(Server, {confirmed_broadcast, Msg}, infinity). + +group_members(Server) -> + gen_server2:call(Server, group_members, infinity). + + +init([GroupName, Module, Args]) -> + random:seed(now()), + gen_server2:cast(self(), join), + Self = self(), + {ok, #state { self = Self, + left = {Self, undefined}, + right = {Self, undefined}, + group_name = GroupName, + module = Module, + view = undefined, + pub_count = 0, + members_state = undefined, + callback_args = Args, + confirms = queue:new() }, hibernate, + {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. + + +handle_call({confirmed_broadcast, _Msg}, _From, + State = #state { members_state = undefined }) -> + reply(not_joined, State); + +handle_call({confirmed_broadcast, Msg}, _From, + State = #state { self = Self, + right = {Self, undefined}, + module = Module, + callback_args = Args }) -> + handle_callback_result({Module:handle_msg(Args, Self, Msg), ok, State}); + +handle_call({confirmed_broadcast, Msg}, From, State) -> + internal_broadcast(Msg, From, State); + +handle_call(group_members, _From, + State = #state { members_state = undefined }) -> + reply(not_joined, State); + +handle_call(group_members, _From, State = #state { view = View }) -> + reply(alive_view_members(View), State); + +handle_call({add_on_right, _NewMember}, _From, + State = #state { members_state = undefined }) -> + reply(not_ready, State); + +handle_call({add_on_right, NewMember}, _From, + State = #state { self = Self, + group_name = GroupName, + view = View, + members_state = MembersState, + module = Module, + callback_args = Args }) -> + Group = record_new_member_in_group( + GroupName, Self, NewMember, + fun (Group1) -> + View1 = group_to_view(Group1), + ok = send_right(NewMember, View1, + {catchup, Self, prepare_members_state( + MembersState)}) + end), + View2 = group_to_view(Group), + State1 = check_neighbours(State #state { view = View2 }), + Result = callback_view_changed(Args, Module, View, View2), + handle_callback_result({Result, {ok, Group}, State1}). + + +handle_cast({?TAG, ReqVer, Msg}, + State = #state { view = View, + group_name = GroupName, + module = Module, + callback_args = Args }) -> + {Result, State1} = + case needs_view_update(ReqVer, View) of + true -> + View1 = group_to_view(read_group(GroupName)), + {callback_view_changed(Args, Module, View, View1), + check_neighbours(State #state { view = View1 })}; + false -> + {ok, State} + end, + handle_callback_result( + if_callback_success( + Result, + fun (_Result1, State2) -> handle_msg(Msg, State2) end, + fun (Result1, State2) -> {Result1, State2} end, + State1)); + +handle_cast({broadcast, _Msg}, State = #state { members_state = undefined }) -> + noreply(State); + +handle_cast({broadcast, Msg}, + State = #state { self = Self, + right = {Self, undefined}, + module = Module, + callback_args = Args }) -> + handle_callback_result({Module:handle_msg(Args, Self, Msg), State}); + +handle_cast({broadcast, Msg}, State) -> + internal_broadcast(Msg, none, State); + +handle_cast(join, State = #state { self = Self, + group_name = GroupName, + members_state = undefined, + module = Module, + callback_args = Args }) -> + View = join_group(Self, GroupName), + MembersState = + case alive_view_members(View) of + [Self] -> blank_member_state(); + _ -> undefined + end, + State1 = check_neighbours(State #state { view = View, + members_state = MembersState }), + handle_callback_result( + {Module:joined(Args, all_known_members(View)), State1}); + +handle_cast(leave, State) -> + {stop, normal, State}. + + +handle_info({'DOWN', MRef, process, _Pid, _Reason}, + State = #state { self = Self, + left = Left, + right = Right, + group_name = GroupName, + confirms = Confirms }) -> + Member = case {Left, Right} of + {{Member1, MRef}, _} -> Member1; + {_, {Member1, MRef}} -> Member1; + _ -> undefined + end, + case Member of + undefined -> + noreply(State); + _ -> + View1 = + group_to_view(record_dead_member_in_group(Member, GroupName)), + State1 = State #state { view = View1 }, + {Result, State2} = + case alive_view_members(View1) of + [Self] -> + maybe_erase_aliases( + State1 #state { + members_state = blank_member_state(), + confirms = purge_confirms(Confirms) }); + _ -> + {ok, State1} + end, + handle_callback_result({Result, check_neighbours(State2)}) + end. + + +terminate(Reason, #state { module = Module, + callback_args = Args }) -> + Module:terminate(Args, Reason). + + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + + +handle_msg(check_neighbours, State) -> + %% no-op - it's already been done by the calling handle_cast + {ok, State}; + +handle_msg({catchup, Left, MembersStateLeft}, + State = #state { self = Self, + left = {Left, _MRefL}, + right = {Right, _MRefR}, + view = View, + members_state = undefined }) -> + ok = send_right(Right, View, {catchup, Self, MembersStateLeft}), + MembersStateLeft1 = build_members_state(MembersStateLeft), + {ok, State #state { members_state = MembersStateLeft1 }}; + +handle_msg({catchup, Left, MembersStateLeft}, + State = #state { self = Self, + left = {Left, _MRefL}, + view = View, + members_state = MembersState }) + when MembersState =/= undefined -> + MembersStateLeft1 = build_members_state(MembersStateLeft), + AllMembers = lists:usort(dict:fetch_keys(MembersState) ++ + dict:fetch_keys(MembersStateLeft1)), + {MembersState1, Activity} = + lists:foldl( + fun (Id, MembersStateActivity) -> + #member { pending_ack = PALeft, last_ack = LA } = + find_member_or_blank(Id, MembersStateLeft1), + with_member_acc( + fun (#member { pending_ack = PA } = Member, Activity1) -> + case is_member_alias(Id, Self, View) of + true -> + {_AcksInFlight, Pubs, _PA1} = + find_prefix_common_suffix(PALeft, PA), + {Member #member { last_ack = LA }, + activity_cons(Id, pubs_from_queue(Pubs), + [], Activity1)}; + false -> + {Acks, _Common, Pubs} = + find_prefix_common_suffix(PA, PALeft), + {Member, + activity_cons(Id, pubs_from_queue(Pubs), + acks_from_queue(Acks), + Activity1)} + end + end, Id, MembersStateActivity) + end, {MembersState, activity_nil()}, AllMembers), + handle_msg({activity, Left, activity_finalise(Activity)}, + State #state { members_state = MembersState1 }); + +handle_msg({catchup, _NotLeft, _MembersState}, State) -> + {ok, State}; + +handle_msg({activity, Left, Activity}, + State = #state { self = Self, + left = {Left, _MRefL}, + module = Module, + view = View, + members_state = MembersState, + confirms = Confirms, + callback_args = Args }) + when MembersState =/= undefined -> + {MembersState1, {Confirms1, Activity1}} = + lists:foldl( + fun ({Id, Pubs, Acks}, MembersStateConfirmsActivity) -> + with_member_acc( + fun (Member = #member { pending_ack = PA, + last_pub = LP, + last_ack = LA }, + {Confirms2, Activity2}) -> + case is_member_alias(Id, Self, View) of + true -> + {ToAck, PA1} = + find_common(queue_from_pubs(Pubs), PA, + queue:new()), + LA1 = last_ack(Acks, LA), + AckNums = acks_from_queue(ToAck), + Confirms3 = maybe_confirm( + Self, Id, Confirms2, AckNums), + {Member #member { pending_ack = PA1, + last_ack = LA1 }, + {Confirms3, + activity_cons( + Id, [], AckNums, Activity2)}}; + false -> + PA1 = apply_acks(Acks, join_pubs(PA, Pubs)), + LA1 = last_ack(Acks, LA), + LP1 = last_pub(Pubs, LP), + {Member #member { pending_ack = PA1, + last_pub = LP1, + last_ack = LA1 }, + {Confirms2, + activity_cons(Id, Pubs, Acks, Activity2)}} + end + end, Id, MembersStateConfirmsActivity) + end, {MembersState, {Confirms, activity_nil()}}, Activity), + State1 = State #state { members_state = MembersState1, + confirms = Confirms1 }, + Activity3 = activity_finalise(Activity1), + {Result, State2} = maybe_erase_aliases(State1), + ok = maybe_send_activity(Activity3, State2), + if_callback_success( + Result, + fun (_Result1, State3) -> {callback(Args, Module, Activity3), State3} end, + fun (Result1, State3) -> {Result1, State3} end, + State2); + +handle_msg({activity, _NotLeft, _Activity}, State) -> + {ok, State}. + + +noreply(State) -> + ok = a(State), + {noreply, State, hibernate}. + +reply(Reply, State) -> + ok = a(State), + {reply, Reply, State, hibernate}. + +a(#state { view = undefined }) -> + ok; +a(#state { self = Self, + left = {Left, _MRefL}, + view = View }) -> + #view_member { left = Left } = fetch_view_member(Self, View), + ok. + +internal_broadcast(Msg, From, State = #state { self = Self, + pub_count = PubCount, + members_state = MembersState, + module = Module, + confirms = Confirms, + callback_args = Args }) -> + PubMsg = {PubCount, Msg}, + Activity = activity_cons(Self, [PubMsg], [], activity_nil()), + ok = maybe_send_activity(activity_finalise(Activity), State), + MembersState1 = + with_member( + fun (Member = #member { pending_ack = PA }) -> + Member #member { pending_ack = queue:in(PubMsg, PA) } + end, Self, MembersState), + Confirms1 = case From of + none -> Confirms; + _ -> queue:in({PubCount, From}, Confirms) + end, + handle_callback_result({Module:handle_msg(Args, Self, Msg), + State #state { pub_count = PubCount + 1, + members_state = MembersState1, + confirms = Confirms1 }}). + + +%% --------------------------------------------------------------------------- +%% View construction and inspection +%% --------------------------------------------------------------------------- + +needs_view_update(ReqVer, {Ver, _View}) -> + Ver < ReqVer. + +view_version({Ver, _View}) -> + Ver. + +is_member_alive({dead, _Member}) -> false; +is_member_alive(_) -> true. + +is_member_alias(Self, Self, _View) -> + true; +is_member_alias(Member, Self, View) -> + ?SETS:is_element(Member, + ((fetch_view_member(Self, View)) #view_member.aliases)). + +dead_member_id({dead, Member}) -> Member. + +store_view_member(VMember = #view_member { id = Id }, {Ver, View}) -> + {Ver, dict:store(Id, VMember, View)}. + +with_view_member(Fun, View, Id) -> + store_view_member(Fun(fetch_view_member(Id, View)), View). + +fetch_view_member(Id, {_Ver, View}) -> + dict:fetch(Id, View). + +find_view_member(Id, {_Ver, View}) -> + dict:find(Id, View). + +blank_view(Ver) -> + {Ver, dict:new()}. + +alive_view_members({_Ver, View}) -> + dict:fetch_keys(View). + +all_known_members({_Ver, View}) -> + dict:fold( + fun (Member, #view_member { aliases = Aliases }, Acc) -> + ?SETS:to_list(Aliases) ++ [Member | Acc] + end, [], View). + +group_to_view(#gm_group { members = Members, version = Ver }) -> + Alive = lists:filter(fun is_member_alive/1, Members), + [_|_] = Alive, %% ASSERTION - can't have all dead members + add_aliases(link_view(Alive ++ Alive ++ Alive, blank_view(Ver)), Members). + +link_view([Left, Middle, Right | Rest], View) -> + case find_view_member(Middle, View) of + error -> + link_view( + [Middle, Right | Rest], + store_view_member(#view_member { id = Middle, + aliases = ?SETS:new(), + left = Left, + right = Right }, View)); + {ok, _} -> + View + end; +link_view(_, View) -> + View. + +add_aliases(View, Members) -> + Members1 = ensure_alive_suffix(Members), + {EmptyDeadSet, View1} = + lists:foldl( + fun (Member, {DeadAcc, ViewAcc}) -> + case is_member_alive(Member) of + true -> + {?SETS:new(), + with_view_member( + fun (VMember = + #view_member { aliases = Aliases }) -> + VMember #view_member { + aliases = ?SETS:union(Aliases, DeadAcc) } + end, ViewAcc, Member)}; + false -> + {?SETS:add_element(dead_member_id(Member), DeadAcc), + ViewAcc} + end + end, {?SETS:new(), View}, Members1), + 0 = ?SETS:size(EmptyDeadSet), %% ASSERTION + View1. + +ensure_alive_suffix(Members) -> + queue:to_list(ensure_alive_suffix1(queue:from_list(Members))). + +ensure_alive_suffix1(MembersQ) -> + {{value, Member}, MembersQ1} = queue:out_r(MembersQ), + case is_member_alive(Member) of + true -> MembersQ; + false -> ensure_alive_suffix1(queue:in_r(Member, MembersQ1)) + end. + + +%% --------------------------------------------------------------------------- +%% View modification +%% --------------------------------------------------------------------------- + +join_group(Self, GroupName) -> + join_group(Self, GroupName, read_group(GroupName)). + +join_group(Self, GroupName, {error, not_found}) -> + join_group(Self, GroupName, prune_or_create_group(Self, GroupName)); +join_group(Self, _GroupName, #gm_group { members = [Self] } = Group) -> + group_to_view(Group); +join_group(Self, GroupName, #gm_group { members = Members } = Group) -> + case lists:member(Self, Members) of + true -> + group_to_view(Group); + false -> + case lists:filter(fun is_member_alive/1, Members) of + [] -> + join_group(Self, GroupName, + prune_or_create_group(Self, GroupName)); + Alive -> + Left = lists:nth(random:uniform(length(Alive)), Alive), + try + case gen_server2:call( + Left, {add_on_right, Self}, infinity) of + {ok, Group1} -> group_to_view(Group1); + not_ready -> join_group(Self, GroupName) + end + catch + exit:{R, _} + when R =:= noproc; R =:= normal; R =:= shutdown -> + join_group( + Self, GroupName, + record_dead_member_in_group(Left, GroupName)) + end + end + end. + +read_group(GroupName) -> + case mnesia:dirty_read(?GROUP_TABLE, GroupName) of + [] -> {error, not_found}; + [Group] -> Group + end. + +prune_or_create_group(Self, GroupName) -> + {atomic, Group} = + mnesia:sync_transaction( + fun () -> GroupNew = #gm_group { name = GroupName, + members = [Self], + version = 0 }, + case mnesia:read(?GROUP_TABLE, GroupName) of + [] -> + mnesia:write(GroupNew), + GroupNew; + [Group1 = #gm_group { members = Members }] -> + case lists:any(fun is_member_alive/1, Members) of + true -> Group1; + false -> mnesia:write(GroupNew), + GroupNew + end + end + end), + Group. + +record_dead_member_in_group(Member, GroupName) -> + {atomic, Group} = + mnesia:sync_transaction( + fun () -> [Group1 = #gm_group { members = Members, version = Ver }] = + mnesia:read(?GROUP_TABLE, GroupName), + case lists:splitwith( + fun (Member1) -> Member1 =/= Member end, Members) of + {_Members1, []} -> %% not found - already recorded dead + Group1; + {Members1, [Member | Members2]} -> + Members3 = Members1 ++ [{dead, Member} | Members2], + Group2 = Group1 #gm_group { members = Members3, + version = Ver + 1 }, + mnesia:write(Group2), + Group2 + end + end), + Group. + +record_new_member_in_group(GroupName, Left, NewMember, Fun) -> + {atomic, Group} = + mnesia:sync_transaction( + fun () -> + [#gm_group { members = Members, version = Ver } = Group1] = + mnesia:read(?GROUP_TABLE, GroupName), + {Prefix, [Left | Suffix]} = + lists:splitwith(fun (M) -> M =/= Left end, Members), + Members1 = Prefix ++ [Left, NewMember | Suffix], + Group2 = Group1 #gm_group { members = Members1, + version = Ver + 1 }, + ok = Fun(Group2), + mnesia:write(Group2), + Group2 + end), + Group. + +erase_members_in_group(Members, GroupName) -> + DeadMembers = [{dead, Id} || Id <- Members], + {atomic, Group} = + mnesia:sync_transaction( + fun () -> + [Group1 = #gm_group { members = [_|_] = Members1, + version = Ver }] = + mnesia:read(?GROUP_TABLE, GroupName), + case Members1 -- DeadMembers of + Members1 -> Group1; + Members2 -> Group2 = + Group1 #gm_group { members = Members2, + version = Ver + 1 }, + mnesia:write(Group2), + Group2 + end + end), + Group. + +maybe_erase_aliases(State = #state { self = Self, + group_name = GroupName, + view = View, + members_state = MembersState, + module = Module, + callback_args = Args }) -> + #view_member { aliases = Aliases } = fetch_view_member(Self, View), + {Erasable, MembersState1} + = ?SETS:fold( + fun (Id, {ErasableAcc, MembersStateAcc} = Acc) -> + #member { last_pub = LP, last_ack = LA } = + find_member_or_blank(Id, MembersState), + case can_erase_view_member(Self, Id, LA, LP) of + true -> {[Id | ErasableAcc], + erase_member(Id, MembersStateAcc)}; + false -> Acc + end + end, {[], MembersState}, Aliases), + State1 = State #state { members_state = MembersState1 }, + case Erasable of + [] -> {ok, State1}; + _ -> View1 = group_to_view( + erase_members_in_group(Erasable, GroupName)), + {callback_view_changed(Args, Module, View, View1), + State1 #state { view = View1 }} + end. + +can_erase_view_member(Self, Self, _LA, _LP) -> false; +can_erase_view_member(_Self, _Id, N, N) -> true; +can_erase_view_member(_Self, _Id, _LA, _LP) -> false. + + +%% --------------------------------------------------------------------------- +%% View monitoring and maintanence +%% --------------------------------------------------------------------------- + +ensure_neighbour(_Ver, Self, {Self, undefined}, Self) -> + {Self, undefined}; +ensure_neighbour(Ver, Self, {Self, undefined}, RealNeighbour) -> + ok = gen_server2:cast(RealNeighbour, {?TAG, Ver, check_neighbours}), + {RealNeighbour, maybe_monitor(RealNeighbour, Self)}; +ensure_neighbour(_Ver, _Self, {RealNeighbour, MRef}, RealNeighbour) -> + {RealNeighbour, MRef}; +ensure_neighbour(Ver, Self, {RealNeighbour, MRef}, Neighbour) -> + true = erlang:demonitor(MRef), + Msg = {?TAG, Ver, check_neighbours}, + ok = gen_server2:cast(RealNeighbour, Msg), + ok = case Neighbour of + Self -> ok; + _ -> gen_server2:cast(Neighbour, Msg) + end, + {Neighbour, maybe_monitor(Neighbour, Self)}. + +maybe_monitor(Self, Self) -> + undefined; +maybe_monitor(Other, _Self) -> + erlang:monitor(process, Other). + +check_neighbours(State = #state { self = Self, + left = Left, + right = Right, + view = View }) -> + #view_member { left = VLeft, right = VRight } + = fetch_view_member(Self, View), + Ver = view_version(View), + Left1 = ensure_neighbour(Ver, Self, Left, VLeft), + Right1 = ensure_neighbour(Ver, Self, Right, VRight), + State1 = State #state { left = Left1, right = Right1 }, + ok = maybe_send_catchup(Right, State1), + State1. + +maybe_send_catchup(Right, #state { right = Right }) -> + ok; +maybe_send_catchup(_Right, #state { self = Self, + right = {Self, undefined} }) -> + ok; +maybe_send_catchup(_Right, #state { members_state = undefined }) -> + ok; +maybe_send_catchup(_Right, #state { self = Self, + right = {Right, _MRef}, + view = View, + members_state = MembersState }) -> + send_right(Right, View, + {catchup, Self, prepare_members_state(MembersState)}). + + +%% --------------------------------------------------------------------------- +%% Catch_up delta detection +%% --------------------------------------------------------------------------- + +find_prefix_common_suffix(A, B) -> + {Prefix, A1} = find_prefix(A, B, queue:new()), + {Common, Suffix} = find_common(A1, B, queue:new()), + {Prefix, Common, Suffix}. + +%% Returns the elements of A that occur before the first element of B, +%% plus the remainder of A. +find_prefix(A, B, Prefix) -> + case {queue:out(A), queue:out(B)} of + {{{value, Val}, _A1}, {{value, Val}, _B1}} -> + {Prefix, A}; + {{empty, A1}, {{value, _A}, _B1}} -> + {Prefix, A1}; + {{{value, {NumA, _MsgA} = Val}, A1}, + {{value, {NumB, _MsgB}}, _B1}} when NumA < NumB -> + find_prefix(A1, B, queue:in(Val, Prefix)); + {_, {empty, _B1}} -> + {A, Prefix} %% Prefix well be empty here + end. + +%% A should be a prefix of B. Returns the commonality plus the +%% remainder of B. +find_common(A, B, Common) -> + case {queue:out(A), queue:out(B)} of + {{{value, Val}, A1}, {{value, Val}, B1}} -> + find_common(A1, B1, queue:in(Val, Common)); + {{empty, _A}, _} -> + {Common, B} + end. + + +%% --------------------------------------------------------------------------- +%% Members helpers +%% --------------------------------------------------------------------------- + +with_member(Fun, Id, MembersState) -> + store_member( + Id, Fun(find_member_or_blank(Id, MembersState)), MembersState). + +with_member_acc(Fun, Id, {MembersState, Acc}) -> + {MemberState, Acc1} = Fun(find_member_or_blank(Id, MembersState), Acc), + {store_member(Id, MemberState, MembersState), Acc1}. + +find_member_or_blank(Id, MembersState) -> + case dict:find(Id, MembersState) of + {ok, Result} -> Result; + error -> blank_member() + end. + +erase_member(Id, MembersState) -> + dict:erase(Id, MembersState). + +blank_member() -> + #member { pending_ack = queue:new(), last_pub = -1, last_ack = -1 }. + +blank_member_state() -> + dict:new(). + +store_member(Id, MemberState, MembersState) -> + dict:store(Id, MemberState, MembersState). + +prepare_members_state(MembersState) -> + dict:to_list(MembersState). + +build_members_state(MembersStateList) -> + dict:from_list(MembersStateList). + + +%% --------------------------------------------------------------------------- +%% Activity assembly +%% --------------------------------------------------------------------------- + +activity_nil() -> + queue:new(). + +activity_cons(_Id, [], [], Tail) -> + Tail; +activity_cons(Sender, Pubs, Acks, Tail) -> + queue:in({Sender, Pubs, Acks}, Tail). + +activity_finalise(Activity) -> + queue:to_list(Activity). + +maybe_send_activity([], _State) -> + ok; +maybe_send_activity(Activity, #state { self = Self, + right = {Right, _MRefR}, + view = View }) -> + send_right(Right, View, {activity, Self, Activity}). + +send_right(Right, View, Msg) -> + ok = gen_server2:cast(Right, {?TAG, view_version(View), Msg}). + +callback(Args, Module, Activity) -> + lists:foldl( + fun ({Id, Pubs, _Acks}, ok) -> + lists:foldl(fun ({_PubNum, Pub}, ok) -> + Module:handle_msg(Args, Id, Pub); + (_, Error) -> + Error + end, ok, Pubs); + (_, Error) -> + Error + end, ok, Activity). + +callback_view_changed(Args, Module, OldView, NewView) -> + OldMembers = all_known_members(OldView), + NewMembers = all_known_members(NewView), + Births = NewMembers -- OldMembers, + Deaths = OldMembers -- NewMembers, + case {Births, Deaths} of + {[], []} -> ok; + _ -> Module:members_changed(Args, Births, Deaths) + end. + +handle_callback_result({Result, State}) -> + if_callback_success( + Result, + fun (_Result, State1) -> noreply(State1) end, + fun ({stop, Reason}, State1) -> {stop, Reason, State1} end, + State); +handle_callback_result({Result, Reply, State}) -> + if_callback_success( + Result, + fun (_Result, State1) -> reply(Reply, State1) end, + fun ({stop, Reason}, State1) -> {stop, Reason, Reply, State1} end, + State). + +if_callback_success(ok, True, _False, State) -> + True(ok, State); +if_callback_success({become, Module, Args} = Result, True, _False, State) -> + True(Result, State #state { module = Module, + callback_args = Args }); +if_callback_success({stop, _Reason} = Result, _True, False, State) -> + False(Result, State). + +maybe_confirm(_Self, _Id, Confirms, []) -> + Confirms; +maybe_confirm(Self, Self, Confirms, [PubNum | PubNums]) -> + case queue:out(Confirms) of + {empty, _Confirms} -> + Confirms; + {{value, {PubNum, From}}, Confirms1} -> + gen_server2:reply(From, ok), + maybe_confirm(Self, Self, Confirms1, PubNums); + {{value, {PubNum1, _From}}, _Confirms} when PubNum1 > PubNum -> + maybe_confirm(Self, Self, Confirms, PubNums) + end; +maybe_confirm(_Self, _Id, Confirms, _PubNums) -> + Confirms. + +purge_confirms(Confirms) -> + [gen_server2:reply(From, ok) || {_PubNum, From} <- queue:to_list(Confirms)], + queue:new(). + + +%% --------------------------------------------------------------------------- +%% Msg transformation +%% --------------------------------------------------------------------------- + +acks_from_queue(Q) -> + [PubNum || {PubNum, _Msg} <- queue:to_list(Q)]. + +pubs_from_queue(Q) -> + queue:to_list(Q). + +queue_from_pubs(Pubs) -> + queue:from_list(Pubs). + +apply_acks([], Pubs) -> + Pubs; +apply_acks([PubNum | Acks], Pubs) -> + {{value, {PubNum, _Msg}}, Pubs1} = queue:out(Pubs), + apply_acks(Acks, Pubs1). + +join_pubs(Q, []) -> + Q; +join_pubs(Q, Pubs) -> + queue:join(Q, queue_from_pubs(Pubs)). + +last_ack([], LA) -> + LA; +last_ack(List, LA) -> + LA1 = lists:last(List), + true = LA1 > LA, %% ASSERTION + LA1. + +last_pub([], LP) -> + LP; +last_pub(List, LP) -> + {PubNum, _Msg} = lists:last(List), + true = PubNum > LP, %% ASSERTION + PubNum. diff --git a/src/gm_test.erl b/src/gm_test.erl new file mode 100644 index 00000000..aebfbb69 --- /dev/null +++ b/src/gm_test.erl @@ -0,0 +1,120 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is VMware, Inc. +%% Copyright (c) 2007-2010 VMware, Inc. All rights reserved. +%% + +-module(gm_test). + +-export([test/0]). +-export([joined/2, members_changed/3, handle_msg/3, terminate/2]). + +-behaviour(gm). + +-include("gm_specs.hrl"). + +get_state() -> + get(state). + +with_state(Fun) -> + put(state, Fun(get_state())). + +inc() -> + case 1 + get(count) of + 100000 -> Now = os:timestamp(), + Start = put(ts, Now), + Diff = timer:now_diff(Now, Start), + Rate = 100000 / (Diff / 1000000), + io:format("~p seeing ~p msgs/sec~n", [self(), Rate]), + put(count, 0); + N -> put(count, N) + end. + +joined([], Members) -> + io:format("Joined ~p (~p members)~n", [self(), length(Members)]), + put(state, dict:from_list([{Member, empty} || Member <- Members])), + put(count, 0), + put(ts, os:timestamp()), + ok. + +members_changed([], Births, Deaths) -> + with_state( + fun (State) -> + State1 = + lists:foldl( + fun (Born, StateN) -> + false = dict:is_key(Born, StateN), + dict:store(Born, empty, StateN) + end, State, Births), + lists:foldl( + fun (Died, StateN) -> + true = dict:is_key(Died, StateN), + dict:erase(Died, StateN) + end, State1, Deaths) + end), + ok. + +handle_msg([], From, {test_msg, Num}) -> + inc(), + with_state( + fun (State) -> + ok = case dict:find(From, State) of + {ok, empty} -> ok; + {ok, Num} -> ok; + {ok, Num1} when Num < Num1 -> + exit({{from, From}, + {duplicate_delivery_of, Num1}, + {expecting, Num}}); + {ok, Num1} -> + exit({{from, From}, + {missing_delivery_of, Num}, + {received_early, Num1}}) + end, + dict:store(From, Num + 1, State) + end), + ok. + +terminate([], Reason) -> + io:format("Left ~p (~p)~n", [self(), Reason]), + ok. + +spawn_member() -> + spawn_link( + fun () -> + random:seed(now()), + %% start up delay of no more than 10 seconds + timer:sleep(random:uniform(10000)), + {ok, Pid} = gm:start_link(?MODULE, ?MODULE, []), + Start = random:uniform(10000), + send_loop(Pid, Start, Start + random:uniform(10000)), + gm:leave(Pid), + spawn_more() + end). + +spawn_more() -> + [spawn_member() || _ <- lists:seq(1, 4 - random:uniform(4))]. + +send_loop(_Pid, Target, Target) -> + ok; +send_loop(Pid, Count, Target) when Target > Count -> + case random:uniform(3) of + 3 -> gm:confirmed_broadcast(Pid, {test_msg, Count}); + _ -> gm:broadcast(Pid, {test_msg, Count}) + end, + timer:sleep(random:uniform(5) - 1), %% sleep up to 4 ms + send_loop(Pid, Count + 1, Target). + +test() -> + ok = gm:create_tables(), + spawn_member(), + spawn_member(). diff --git a/src/rabbit_mirror_queue_coordinator.erl b/src/rabbit_mirror_queue_coordinator.erl new file mode 100644 index 00000000..fb650144 --- /dev/null +++ b/src/rabbit_mirror_queue_coordinator.erl @@ -0,0 +1,125 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is VMware, Inc. +%% Copyright (c) 2007-2010 VMware, Inc. All rights reserved. +%% + +-module(rabbit_mirror_queue_coordinator). + +-export([start_link/2, add_slave/2, get_gm/1]). + +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, + code_change/3]). + +-export([joined/2, members_changed/3, handle_msg/3]). + +-behaviour(gen_server2). +-behaviour(gm). + +-include("rabbit.hrl"). +-include("gm_specs.hrl"). + +-record(state, { q, + gm + }). + +-define(ONE_SECOND, 1000). + +start_link(Queue, GM) -> + gen_server2:start_link(?MODULE, [Queue, GM], []). + +add_slave(CPid, SlaveNode) -> + gen_server2:cast(CPid, {add_slave, SlaveNode}). + +get_gm(CPid) -> + gen_server2:call(CPid, get_gm, infinity). + +%% --------------------------------------------------------------------------- +%% gen_server +%% --------------------------------------------------------------------------- + +init([#amqqueue { name = QueueName } = Q, GM]) -> + GM1 = case GM of + undefined -> + ok = gm:create_tables(), + {ok, GM2} = gm:start_link(QueueName, ?MODULE, [self()]), + receive {joined, GM2, _Members} -> + ok + end, + GM2; + _ -> + true = link(GM), + GM + end, + {ok, _TRef} = + timer:apply_interval(?ONE_SECOND, gm, broadcast, [GM1, heartbeat]), + {ok, #state { q = Q, gm = GM1 }, hibernate, + {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. + +handle_call(get_gm, _From, State = #state { gm = GM }) -> + reply(GM, State). + +handle_cast({add_slave, Node}, State = #state { q = Q }) -> + Result = rabbit_mirror_queue_slave_sup:start_child(Node, [Q]), + rabbit_log:info("Adding slave node for queue ~p: ~p~n", + [Q #amqqueue.name, Result]), + noreply(State); + +handle_cast({gm_deaths, Deaths}, + State = #state { q = #amqqueue { name = QueueName } }) -> + rabbit_log:info("Master ~p saw deaths ~p for queue ~p~n", + [self(), Deaths, QueueName]), + Node = node(), + Node = node(rabbit_mirror_queue_misc:remove_from_queue(QueueName, Deaths)), + noreply(State). + +handle_info(Msg, State) -> + {stop, {unexpected_info, Msg}, State}. + +terminate(_Reason, #state{}) -> + %% gen_server case + ok; +terminate([_CPid], _Reason) -> + %% gm case + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%% --------------------------------------------------------------------------- +%% GM +%% --------------------------------------------------------------------------- + +joined([CPid], Members) -> + CPid ! {joined, self(), Members}, + ok. + +members_changed([_CPid], _Births, []) -> + ok; +members_changed([CPid], _Births, Deaths) -> + ok = gen_server2:cast(CPid, {gm_deaths, Deaths}). + +handle_msg([_CPid], _From, heartbeat) -> + ok; +handle_msg([_CPid], _From, _Msg) -> + ok. + +%% --------------------------------------------------------------------------- +%% Others +%% --------------------------------------------------------------------------- + +noreply(State) -> + {noreply, State, hibernate}. + +reply(Reply, State) -> + {reply, Reply, State, hibernate}. diff --git a/src/rabbit_mirror_queue_master.erl b/src/rabbit_mirror_queue_master.erl new file mode 100644 index 00000000..2299c3d1 --- /dev/null +++ b/src/rabbit_mirror_queue_master.erl @@ -0,0 +1,221 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is VMware, Inc. +%% Copyright (c) 2007-2010 VMware, Inc. All rights reserved. +%% + +-module(rabbit_mirror_queue_master). + +-export([init/2, terminate/1, delete_and_terminate/1, + purge/1, publish/4, publish_delivered/5, fetch/2, ack/2, + tx_publish/5, tx_ack/3, tx_rollback/2, tx_commit/4, + requeue/3, len/1, is_empty/1, dropwhile/2, + set_ram_duration_target/2, ram_duration/1, + needs_idle_timeout/1, idle_timeout/1, handle_pre_hibernate/1, + status/1]). + +-export([start/1, stop/0]). + +-export([promote_backing_queue_state/4]). + +-behaviour(rabbit_backing_queue). + +-include("rabbit.hrl"). + +-record(state, { gm, + coordinator, + backing_queue, + backing_queue_state, + set_delivered + }). + +%% --------------------------------------------------------------------------- +%% Backing queue +%% --------------------------------------------------------------------------- + +start(_DurableQueues) -> + %% This will never get called as this module will never be + %% installed as the default BQ implementation. + exit({not_valid_for_generic_backing_queue, ?MODULE}). + +stop() -> + %% Same as start/1. + exit({not_valid_for_generic_backing_queue, ?MODULE}). + +init(#amqqueue { arguments = Args, durable = false } = Q, Recover) -> + {ok, CPid} = rabbit_mirror_queue_coordinator:start_link(Q, undefined), + GM = rabbit_mirror_queue_coordinator:get_gm(CPid), + {_Type, Nodes} = rabbit_misc:table_lookup(Args, <<"x-mirror">>), + [rabbit_mirror_queue_coordinator:add_slave(CPid, binary_to_atom(Node, utf8)) + || {longstr, Node} <- Nodes], + {ok, BQ} = application:get_env(backing_queue_module), + BQS = BQ:init(Q, Recover), + #state { gm = GM, + coordinator = CPid, + backing_queue = BQ, + backing_queue_state = BQS, + set_delivered = 0 }. + +promote_backing_queue_state(CPid, BQ, BQS, GM) -> + #state { gm = GM, + coordinator = CPid, + backing_queue = BQ, + backing_queue_state = BQS, + set_delivered = BQ:len(BQS) }. + +terminate(State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> + %% Backing queue termination. The queue is going down but + %% shouldn't be deleted. Most likely safe shutdown of this + %% node. Thus just let some other slave take over. + State #state { backing_queue_state = BQ:terminate(BQS) }. + +delete_and_terminate(State = #state { gm = GM, + backing_queue = BQ, + backing_queue_state = BQS }) -> + ok = gm:broadcast(GM, delete_and_terminate), + State #state { backing_queue_state = BQ:delete_and_terminate(BQS), + set_delivered = 0 }. + +purge(State = #state { gm = GM, + backing_queue = BQ, + backing_queue_state = BQS }) -> + ok = gm:broadcast(GM, {set_length, 0}), + {Count, BQS1} = BQ:purge(BQS), + {Count, State #state { backing_queue_state = BQS1, + set_delivered = 0 }}. + +publish(Msg = #basic_message { guid = Guid }, + MsgProps, ChPid, State = #state { gm = GM, + backing_queue = BQ, + backing_queue_state = BQS }) -> + ok = gm:broadcast(GM, {publish, false, Guid, MsgProps, ChPid}), + BQS1 = BQ:publish(Msg, MsgProps, ChPid, BQS), + State #state { backing_queue_state = BQS1 }. + +publish_delivered(AckRequired, Msg = #basic_message { guid = Guid }, + MsgProps, ChPid, + State = #state { gm = GM, + backing_queue = BQ, + backing_queue_state = BQS }) -> + ok = gm:broadcast(GM, {publish, {true, AckRequired}, Guid, MsgProps, ChPid}), + {AckTag, BQS1} = BQ:publish_delivered(AckRequired, Msg, MsgProps, ChPid, BQS), + {AckTag, State #state { backing_queue_state = BQS1 }}. + +dropwhile(Fun, State = #state { gm = GM, + backing_queue = BQ, + backing_queue_state = BQS, + set_delivered = SetDelivered }) -> + Len = BQ:len(BQS), + BQS1 = BQ:dropwhile(Fun, BQS), + Dropped = Len - BQ:len(BQS1), + SetDelivered1 = lists:max([0, SetDelivered - Dropped]), + ok = gm:broadcast(GM, {set_length, BQ:len(BQS1)}), + State #state { backing_queue_state = BQS1, + set_delivered = SetDelivered1 }. + +fetch(AckRequired, State = #state { gm = GM, + backing_queue = BQ, + backing_queue_state = BQS, + set_delivered = SetDelivered }) -> + {Result, BQS1} = BQ:fetch(AckRequired, BQS), + State1 = State #state { backing_queue_state = BQS1 }, + case Result of + empty -> + {Result, State1}; + {#basic_message { guid = Guid } = Message, IsDelivered, AckTag, + Remaining} -> + ok = gm:broadcast(GM, {fetch, AckRequired, Guid, Remaining}), + IsDelivered1 = IsDelivered orelse SetDelivered > 0, + SetDelivered1 = lists:max([0, SetDelivered - 1]), + {{Message, IsDelivered1, AckTag, Remaining}, + State1 #state { set_delivered = SetDelivered1 }} + end. + +ack(AckTags, State = #state { gm = GM, + backing_queue = BQ, + backing_queue_state = BQS }) -> + {Guids, BQS1} = BQ:ack(AckTags, BQS), + case Guids of + [] -> ok; + _ -> ok = gm:broadcast(GM, {ack, Guids}) + end, + {Guids, State #state { backing_queue_state = BQS1 }}. + +tx_publish(Txn, Msg, MsgProps, ChPid, #state {} = State) -> + %% gm:broadcast(GM, {tx_publish, Txn, Guid, MsgProps, ChPid}) + State. + +tx_ack(Txn, AckTags, #state {} = State) -> + %% gm:broadcast(GM, {tx_ack, Txn, Guids}) + State. + +tx_rollback(Txn, #state {} = State) -> + %% gm:broadcast(GM, {tx_rollback, Txn}) + {[], State}. + +tx_commit(Txn, PostCommitFun, MsgPropsFun, #state {} = State) -> + %% Maybe don't want to transmit the MsgPropsFun but what choice do + %% we have? OTOH, on the slaves, things won't be expiring on their + %% own (props are interpreted by amqqueue, not vq), so if the msg + %% props aren't quite the same, that doesn't matter. + %% + %% The PostCommitFun is actually worse - we need to prevent that + %% from being invoked until we have confirmation from all the + %% slaves that they've done everything up to there. + %% + %% In fact, transactions are going to need work seeing as it's at + %% this point that VQ mentions amqqueue, which will thus not work + %% on the slaves - we need to make sure that all the slaves do the + %% tx_commit_post_msg_store at the same point, and then when they + %% all confirm that (scatter/gather), we can finally invoke the + %% PostCommitFun. + %% + %% Another idea is that the slaves are actually driven with + %% pubacks and thus only the master needs to support txns + %% directly. + {[], State}. + +requeue(AckTags, MsgPropsFun, State = #state { gm = GM, + backing_queue = BQ, + backing_queue_state = BQS }) -> + {Guids, BQS1} = BQ:requeue(AckTags, MsgPropsFun, BQS), + ok = gm:broadcast(GM, {requeue, MsgPropsFun, Guids}), + {Guids, State #state { backing_queue_state = BQS1 }}. + +len(#state { backing_queue = BQ, backing_queue_state = BQS}) -> + BQ:len(BQS). + +is_empty(#state { backing_queue = BQ, backing_queue_state = BQS}) -> + BQ:is_empty(BQS). + +set_ram_duration_target(Target, State = #state { backing_queue = BQ, + backing_queue_state = BQS}) -> + State #state { backing_queue_state = + BQ:set_ram_duration_target(Target, BQS) }. + +ram_duration(State = #state { backing_queue = BQ, backing_queue_state = BQS}) -> + {Result, BQS1} = BQ:ram_duration(BQS), + {Result, State #state { backing_queue_state = BQS1 }}. + +needs_idle_timeout(#state { backing_queue = BQ, backing_queue_state = BQS}) -> + BQ:needs_idle_timeout(BQS). + +idle_timeout(#state { backing_queue = BQ, backing_queue_state = BQS}) -> + BQ:idle_timeout(BQS). + +handle_pre_hibernate(State = #state { backing_queue = BQ, + backing_queue_state = BQS}) -> + State #state { backing_queue_state = BQ:handle_pre_hibernate(BQS) }. + +status(#state { backing_queue = BQ, backing_queue_state = BQS}) -> + BQ:status(BQS). diff --git a/src/rabbit_mirror_queue_misc.erl b/src/rabbit_mirror_queue_misc.erl new file mode 100644 index 00000000..d37ebe1f --- /dev/null +++ b/src/rabbit_mirror_queue_misc.erl @@ -0,0 +1,42 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is VMware, Inc. +%% Copyright (c) 2007-2010 VMware, Inc. All rights reserved. +%% + +-module(rabbit_mirror_queue_misc). + +-export([remove_from_queue/2]). + +-include("rabbit.hrl"). + +remove_from_queue(QueueName, DeadPids) -> + DeadNodes = [node(DeadPid) || DeadPid <- DeadPids], + rabbit_misc:execute_mnesia_transaction( + fun () -> + [Q = #amqqueue { pid = QPid, + extra_pids = EPids }] = + mnesia:read({rabbit_queue, QueueName}), + [QPid1 | EPids1] = + [Pid || Pid <- [QPid | EPids], + not lists:member(node(Pid), DeadNodes)], + case {{QPid, EPids}, {QPid1, EPids1}} of + {Same, Same} -> + QPid; + _ -> + Q1 = Q #amqqueue { pid = QPid1, + extra_pids = EPids1 }, + mnesia:write(rabbit_queue, Q1, write), + QPid1 + end + end). diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl new file mode 100644 index 00000000..452cbd5a --- /dev/null +++ b/src/rabbit_mirror_queue_slave.erl @@ -0,0 +1,481 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is VMware, Inc. +%% Copyright (c) 2007-2010 VMware, Inc. All rights reserved. +%% + +-module(rabbit_mirror_queue_slave). + +%% We join the GM group before we add ourselves to the amqqueue +%% record. As a result: +%% 1. We can receive msgs from GM that correspond to messages we will +%% never receive from publishers. +%% 2. When we receive a message from publishers, we must receive a +%% message from the GM group for it. +%% 3. However, that instruction from the GM group can arrive either +%% before or after the actual message. We need to be able to +%% distinguish between GM instructions arriving early, and case (1) +%% above. +%% +%% All instructions from the GM group must be processed in the order +%% in which they're received. +%% +%% Thus, we need a queue per sender, and a queue for GM instructions. +%% +%% On receipt of a GM group instruction, three things are possible: +%% 1. The queue of publisher messages is empty. Thus store the GM +%% instruction to the instrQ. +%% 2. The head of the queue of publisher messages has a message that +%% matches the GUID of the GM instruction. Remove the message, and +%% route appropriately. +%% 3. The head of the queue of publisher messages has a message that +%% does not match the GUID of the GM instruction. Throw away the GM +%% instruction: the GM instruction must correspond to a message +%% that we'll never receive. If it did not, then before the current +%% instruction, we would have received an instruction for the +%% message at the head of this queue, thus the head of the queue +%% would have been removed and processed. +%% +%% On receipt of a publisher message, three things are possible: +%% 1. The queue of GM group instructions is empty. Add the message to +%% the relevant queue and await instructions from the GM. +%% 2. The head of the queue of GM group instructions has an +%% instruction matching the GUID of the message. Remove that +%% instruction and act on it. Attempt to process the rest of the +%% instrQ. +%% 3. The head of the queue of GM group instructions has an +%% instruction that does not match the GUID of the message. If the +%% message is from the same publisher as is referred to by the +%% instruction then throw away the GM group instruction and repeat +%% - attempt to match against the next instruction if there is one: +%% The instruction thrown away was for a message we'll never +%% receive. +%% +%% In all cases, we are relying heavily on order preserving messaging +%% both from the GM group and from the publishers. + +-export([start_link/1, set_maximum_since_use/2]). + +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, + code_change/3, handle_pre_hibernate/1]). + +-export([joined/2, members_changed/3, handle_msg/3]). + +-behaviour(gen_server2). +-behaviour(gm). + +-include("rabbit.hrl"). +-include("gm_specs.hrl"). + +-record(state, { q, + gm, + master_node, + backing_queue, + backing_queue_state, + rate_timer_ref, + + sender_queues, %% :: Pid -> MsgQ + guid_ack, %% :: Guid -> AckTag + instructions %% :: InstrQ + }). + +-define(RAM_DURATION_UPDATE_INTERVAL, 5000). + +start_link(Q) -> + gen_server2:start_link(?MODULE, [Q], []). + +set_maximum_since_use(QPid, Age) -> + gen_server2:cast(QPid, {set_maximum_since_use, Age}). + +init([#amqqueue { name = QueueName } = Q]) -> + process_flag(trap_exit, true), %% amqqueue_process traps exits too. + ok = gm:create_tables(), + {ok, GM} = gm:start_link(QueueName, ?MODULE, [self()]), + receive {joined, GM} -> + ok + end, + Self = self(), + Node = node(), + case rabbit_misc:execute_mnesia_transaction( + fun () -> + [Q1 = #amqqueue { pid = QPid, extra_pids = EPids }] = + mnesia:read({rabbit_queue, QueueName}), + case [Pid || Pid <- [QPid | EPids], node(Pid) =:= Node] of + [] -> + EPids1 = EPids ++ [Self], + mnesia:write(rabbit_queue, + Q1 #amqqueue { extra_pids = EPids1 }, + write), + {ok, QPid}; + _ -> + {error, node_already_present} + end + end) of + {ok, MPid} -> + ok = file_handle_cache:register_callback( + rabbit_amqqueue, set_maximum_since_use, [self()]), + ok = rabbit_memory_monitor:register( + self(), {rabbit_amqqueue, set_ram_duration_target, + [self()]}), + {ok, BQ} = application:get_env(backing_queue_module), + BQS = BQ:init(Q, false), + {ok, #state { q = Q, + gm = GM, + master_node = node(MPid), + backing_queue = BQ, + backing_queue_state = BQS, + rate_timer_ref = undefined, + + sender_queues = dict:new(), + guid_ack = dict:new(), + instructions = queue:new() + }, hibernate, + {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, + ?DESIRED_HIBERNATE}}; + {error, Error} -> + {stop, Error} + end. + +handle_call({deliver_immediately, Delivery = #delivery {}}, From, State) -> + %% Synchronous, "immediate" delivery mode + gen_server2:reply(From, false), %% master may deliver it, not us + handle_process_result(enqueue_message(Delivery, State)); + +handle_call({deliver, Delivery = #delivery {}}, From, State) -> + %% Synchronous, "mandatory" delivery mode + gen_server2:reply(From, true), %% amqqueue throws away the result anyway + handle_process_result(enqueue_message(Delivery, State)); + +handle_call({gm_deaths, Deaths}, From, + State = #state { q = #amqqueue { name = QueueName }, + gm = GM, + master_node = MNode }) -> + rabbit_log:info("Slave ~p saw deaths ~p for queue ~p~n", + [self(), Deaths, QueueName]), + case {node(), node(rabbit_mirror_queue_misc:remove_from_queue( + QueueName, Deaths))} of + {_Node, MNode} -> + reply(ok, State); + {Node, Node} -> + promote_me(From, State); + {_Node, MNode1} -> + gen_server2:reply(From, ok), + ok = gm:broadcast(GM, heartbeat), + noreply(State #state { master_node = MNode1 }) + end. + + +handle_cast({gm, Instruction}, State = #state { instructions = InstrQ }) -> + State1 = State #state { instructions = queue:in(Instruction, InstrQ) }, + case queue:is_empty(InstrQ) of + true -> handle_process_result(process_instructions(State1)); + false -> noreply(State1) + end; + +handle_cast({deliver, Delivery = #delivery {}}, State) -> + %% Asynchronous, non-"mandatory", non-"immediate" deliver mode. + handle_process_result(enqueue_message(Delivery, State)); + +handle_cast({set_maximum_since_use, Age}, State) -> + ok = file_handle_cache:set_maximum_since_use(Age), + noreply(State); + +handle_cast({set_ram_duration_target, Duration}, + State = #state { backing_queue = BQ, + backing_queue_state = BQS }) -> + BQS1 = BQ:set_ram_duration_target(Duration, BQS), + noreply(State #state { backing_queue_state = BQS1 }); + +handle_cast(update_ram_duration, + State = #state { backing_queue = BQ, + backing_queue_state = BQS }) -> + {RamDuration, BQS1} = BQ:ram_duration(BQS), + DesiredDuration = + rabbit_memory_monitor:report_ram_duration(self(), RamDuration), + BQS2 = BQ:set_ram_duration_target(DesiredDuration, BQS1), + noreply(State #state { rate_timer_ref = just_measured, + backing_queue_state = BQS2 }). + +handle_info(Msg, State) -> + {stop, {unexpected_info, Msg}, State}. + +%% If the Reason is shutdown, or {shutdown, _}, it is not the queue +%% being deleted: it's just the node going down. Even though we're a +%% slave, we have no idea whether or not we'll be the only copy coming +%% back up. Thus we must assume we will be, and preserve anything we +%% have on disk. +terminate(Reason, #state { q = Q, + gm = GM, + backing_queue = BQ, + backing_queue_state = BQS, + rate_timer_ref = RateTRef }) -> + ok = gm:leave(GM), + QueueState = rabbit_amqqueue_process:init_with_backing_queue_state( + Q, BQ, BQS, RateTRef, [], []), + rabbit_amqqueue_process:terminate(Reason, QueueState); +terminate([_SPid], _Reason) -> + %% gm case + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +handle_pre_hibernate(State = #state { backing_queue = BQ, + backing_queue_state = BQS }) -> + %% mainly copied from amqqueue_process + BQS1 = BQ:handle_pre_hibernate(BQS), + %% no activity for a while == 0 egress and ingress rates + DesiredDuration = + rabbit_memory_monitor:report_ram_duration(self(), infinity), + BQS2 = BQ:set_ram_duration_target(DesiredDuration, BQS1), + {hibernate, stop_rate_timer(State #state { backing_queue_state = BQS2 })}. + +%% --------------------------------------------------------------------------- +%% GM +%% --------------------------------------------------------------------------- + +joined([SPid], _Members) -> + SPid ! {joined, self()}, + ok. + +members_changed([_SPid], _Births, []) -> + ok; +members_changed([SPid], _Births, Deaths) -> + rabbit_misc:with_exit_handler( + fun () -> {stop, normal} end, + fun () -> + case gen_server2:call(SPid, {gm_deaths, Deaths}) of + ok -> + ok; + {promote, CPid} -> + {become, rabbit_mirror_queue_coordinator, [CPid]} + end + end). + +handle_msg([_SPid], _From, heartbeat) -> + ok; +handle_msg([SPid], _From, Msg) -> + ok = gen_server2:cast(SPid, {gm, Msg}). + +%% --------------------------------------------------------------------------- +%% Others +%% --------------------------------------------------------------------------- + +handle_process_result({continue, State}) -> noreply(State); +handle_process_result({stop, State}) -> {stop, normal, State}. + +promote_me(From, #state { q = Q, + gm = GM, + backing_queue = BQ, + backing_queue_state = BQS, + rate_timer_ref = RateTRef, + sender_queues = SQ, + guid_ack = GA }) -> + rabbit_log:info("Promoting slave ~p for queue ~p~n", + [self(), Q #amqqueue.name]), + {ok, CPid} = rabbit_mirror_queue_coordinator:start_link(Q, GM), + true = unlink(GM), + gen_server2:reply(From, {promote, CPid}), + ok = gm:confirmed_broadcast(GM, heartbeat), + MasterState = rabbit_mirror_queue_master:promote_backing_queue_state( + CPid, BQ, BQS, GM), + %% We have to do the requeue via this init because otherwise we + %% don't have access to the relevent MsgPropsFun. Also, we are + %% already in mnesia as the master queue pid. Thus we cannot just + %% publish stuff by sending it to ourself - we must pass it + %% through to this init, otherwise we can violate ordering + %% constraints. + AckTags = [AckTag || {_Guid, AckTag} <- dict:to_list(GA)], + Deliveries = lists:append([queue:to_list(PubQ) + || {_ChPid, PubQ} <- dict:to_list(SQ)]), + QueueState = rabbit_amqqueue_process:init_with_backing_queue_state( + Q, rabbit_mirror_queue_master, MasterState, RateTRef, + AckTags, Deliveries), + {become, rabbit_amqqueue_process, QueueState, hibernate}. + +noreply(State) -> + {noreply, next_state(State), hibernate}. + +reply(Reply, State) -> + {reply, Reply, next_state(State), hibernate}. + +next_state(State) -> + ensure_rate_timer(State). + +%% copied+pasted from amqqueue_process +ensure_rate_timer(State = #state { rate_timer_ref = undefined }) -> + {ok, TRef} = timer:apply_after( + ?RAM_DURATION_UPDATE_INTERVAL, + rabbit_amqqueue, update_ram_duration, + [self()]), + State #state { rate_timer_ref = TRef }; +ensure_rate_timer(State = #state { rate_timer_ref = just_measured }) -> + State #state { rate_timer_ref = undefined }; +ensure_rate_timer(State) -> + State. + +stop_rate_timer(State = #state { rate_timer_ref = undefined }) -> + State; +stop_rate_timer(State = #state { rate_timer_ref = just_measured }) -> + State #state { rate_timer_ref = undefined }; +stop_rate_timer(State = #state { rate_timer_ref = TRef }) -> + {ok, cancel} = timer:cancel(TRef), + State #state { rate_timer_ref = undefined }. + +enqueue_message(Delivery = #delivery { sender = ChPid }, + State = #state { sender_queues = SQ }) -> + Q = case dict:find(ChPid, SQ) of + {ok, Q1} -> Q1; + error -> queue:new() + end, + SQ1 = dict:store(ChPid, queue:in(Delivery, Q), SQ), + State1 = State #state { sender_queues = SQ1 }, + case queue:is_empty(Q) of + true -> process_instructions(State1); + false -> {continue, State1} + end. + +process_instructions(State = #state { instructions = InstrQ }) -> + case queue:out(InstrQ) of + {empty, _InstrQ} -> + {continue, State}; + {{value, Instr}, InstrQ1} -> + case process_instruction(Instr, State) of + {processed, State1} -> + process_instructions( + State1 #state { instructions = InstrQ1 }); + {stop, State1} -> + {stop, State1 #state { instructions = InstrQ1 }}; + blocked -> + {continue, State} + end + end. + +process_instruction({publish, Deliver, Guid, MsgProps, ChPid}, + State = #state { sender_queues = SQ, + backing_queue = BQ, + backing_queue_state = BQS, + guid_ack = GA }) -> + case dict:find(ChPid, SQ) of + error -> + blocked; + {ok, Q} -> + case queue:out(Q) of + {empty, _Q} -> + blocked; + {{value, #delivery { + message = Msg = #basic_message { guid = Guid } }}, Q1} -> + State1 = State #state { sender_queues = + dict:store(ChPid, Q1, SQ) }, + {processed, + case Deliver of + false -> + BQS1 = BQ:publish(Msg, MsgProps, ChPid, BQS), + State1 #state {backing_queue_state = BQS1 }; + {true, AckRequired} -> + {AckTag, BQS1} = BQ:publish_delivered( + AckRequired, Msg, MsgProps, + ChPid, BQS), + GA1 = case AckRequired of + true -> dict:store(Guid, AckTag, GA); + false -> GA + end, + State1 #state { backing_queue_state = BQS1, + guid_ack = GA1 } + end}; + {{value, #delivery {}}, _Q1} -> + %% throw away the instruction: we'll never receive + %% the message to which it corresponds. + {processed, State} + end + end; +process_instruction({set_length, Length}, + State = #state { backing_queue = BQ, + backing_queue_state = BQS }) -> + QLen = BQ:len(BQS), + ToDrop = QLen - Length, + {processed, + case ToDrop > 0 of + true -> BQS1 = lists:foldl( + fun (const, BQSN) -> BQ:fetch(false, BQSN) end, + BQS, lists:duplicate(ToDrop, const)), + State #state { backing_queue_state = BQS1 }; + false -> State + end}; +process_instruction({fetch, AckRequired, Guid, Remaining}, + State = #state { backing_queue = BQ, + backing_queue_state = BQS, + guid_ack = GA }) -> + QLen = BQ:len(BQS), + {processed, + case QLen - 1 of + Remaining -> + {{_Msg, _IsDelivered, AckTag, Remaining}, BQS1} = + BQ:fetch(AckRequired, BQS), + GA1 = case AckRequired of + true -> dict:store(Guid, AckTag, GA); + false -> GA + end, + State #state { backing_queue_state = BQS1, + guid_ack = GA1 }; + Other when Other < Remaining -> + %% we must be shorter than the master + State + end}; +process_instruction({ack, Guids}, + State = #state { backing_queue = BQ, + backing_queue_state = BQS, + guid_ack = GA }) -> + {AckTags, GA1} = guids_to_acktags(Guids, GA), + {Guids1, BQS1} = BQ:ack(AckTags, BQS), + [] = Guids1 -- Guids, %% ASSERTION + {processed, State #state { guid_ack = GA1, + backing_queue_state = BQS1 }}; +process_instruction({requeue, MsgPropsFun, Guids}, + State = #state { backing_queue = BQ, + backing_queue_state = BQS, + guid_ack = GA }) -> + {AckTags, GA1} = guids_to_acktags(Guids, GA), + {processed, + case length(AckTags) =:= length(Guids) of + true -> + {Guids, BQS1} = BQ:requeue(AckTags, MsgPropsFun, BQS), + State #state { guid_ack = GA1, + backing_queue_state = BQS1 }; + false -> + %% the only thing we can safely do is nuke out our BQ and + %% GA + {_Count, BQS1} = BQ:purge(BQS), + {Guids, BQS2} = ack_all(BQ, GA, BQS1), + State #state { guid_ack = dict:new(), + backing_queue_state = BQS2 } + end}; +process_instruction(delete_and_terminate, + State = #state { backing_queue = BQ, + backing_queue_state = BQS }) -> + {stop, State #state { + backing_queue_state = BQ:delete_and_terminate(BQS) }}. + +guids_to_acktags(Guids, GA) -> + {AckTags, GA1} = + lists:foldl(fun (Guid, {AckTagsN, GAN}) -> + case dict:find(Guid, GA) of + error -> {AckTagsN, GAN}; + {ok, AckTag} -> {[AckTag | AckTagsN], + dict:erase(Guid, GAN)} + end + end, {[], GA}, Guids), + {lists:reverse(AckTags), GA1}. + +ack_all(BQ, GA, BQS) -> + BQ:ack([AckTag || {_Guid, AckTag} <- dict:to_list(GA)], BQS). diff --git a/src/rabbit_mirror_queue_slave_sup.erl b/src/rabbit_mirror_queue_slave_sup.erl new file mode 100644 index 00000000..6658e6c3 --- /dev/null +++ b/src/rabbit_mirror_queue_slave_sup.erl @@ -0,0 +1,54 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is VMware, Inc. +%% Copyright (c) 2007-2010 VMware, Inc. All rights reserved. +%% + +-module(rabbit_mirror_queue_slave_sup). + +-rabbit_boot_step({mirror_queue_slave_sup, + [{description, "mirror queue slave sup"}, + {mfa, {rabbit_mirror_queue_slave_sup, start, []}}, + {requires, core_initialized}, + {enables, queue_sup_queue_recovery}]}). + +-behaviour(supervisor2). + +-export([start/0, start_link/0, start_child/2]). + +-export([init/1]). + +-include_lib("rabbit.hrl"). + +-define(SERVER, ?MODULE). + +start() -> + {ok, _} = + supervisor:start_child( + rabbit_sup, + {rabbit_mirror_queue_slave_sup, + {rabbit_mirror_queue_slave_sup, start_link, []}, + transient, infinity, supervisor, [rabbit_mirror_queue_slave_sup]}), + ok. + +start_link() -> + supervisor2:start_link({local, ?SERVER}, ?MODULE, []). + +start_child(Node, Args) -> + supervisor2:start_child({?SERVER, Node}, Args). + +init([]) -> + {ok, {{simple_one_for_one_terminate, 10, 10}, + [{rabbit_mirror_queue_slave, + {rabbit_mirror_queue_slave, start_link, []}, + temporary, ?MAX_WAIT, worker, [rabbit_mirror_queue_slave]}]}}. -- cgit v1.2.1 From 7e7eebb1ee7c4911b2250a02373cd8ff6fca3351 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Thu, 16 Dec 2010 15:37:57 +0000 Subject: Make use of the wonderful new registry. --- src/gm.erl | 12 +++++++----- src/rabbit.erl | 13 ++++++------- src/rabbit_mnesia.erl | 20 ++++++++------------ src/rabbit_registry.erl | 3 ++- 4 files changed, 23 insertions(+), 25 deletions(-) diff --git a/src/gm.erl b/src/gm.erl index 47971bd4..d41b7a1c 100644 --- a/src/gm.erl +++ b/src/gm.erl @@ -372,6 +372,7 @@ -behaviour(gen_server2). +-behaviour(rabbit_mnesia). -export([create_tables/0, start_link/3, leave/1, broadcast/2, confirmed_broadcast/2, group_members/1]). @@ -381,7 +382,7 @@ -export([behaviour_info/1]). --export([add_to_rabbit_mnesia/0]). +-export([table_definitions/0]). -define(GROUP_TABLE, gm_group). -define(HIBERNATE_AFTER_MIN, 1000). @@ -413,7 +414,9 @@ -rabbit_boot_step({gm_tables, [{description, "add GM tables to rabbit_mnesia"}, - {mfa, {?MODULE, add_to_rabbit_mnesia, []}}, + {mfa, {rabbit_registry, register, + [mnesia, <<"gm">>, ?MODULE]}}, + {requires, rabbit_registry}, {enables, database}]}). -define(TAG, '$gm'). @@ -480,10 +483,9 @@ create_tables([{Table, Attributes} | Tables]) -> Err -> Err end. -add_to_rabbit_mnesia() -> +table_definitions() -> {Name, Attributes} = ?TABLE, - ok = rabbit_mnesia:add_table_definition( - {Name, [?TABLE_MATCH | Attributes]}). + [{Name, [?TABLE_MATCH | Attributes]}]. start_link(GroupName, Module, Args) -> gen_server2:start_link(?MODULE, [GroupName, Module, Args], []). diff --git a/src/rabbit.erl b/src/rabbit.erl index 2ebfdecf..d46c62b6 100644 --- a/src/rabbit.erl +++ b/src/rabbit.erl @@ -51,6 +51,12 @@ []}}, {enables, external_infrastructure}]}). +-rabbit_boot_step({rabbit_registry, + [{description, "plugin registry"}, + {mfa, {rabbit_sup, start_child, + [rabbit_registry]}}, + {enables, external_infrastructure}]}). + -rabbit_boot_step({database, [{mfa, {rabbit_mnesia, init, []}}, {enables, external_infrastructure}]}). @@ -69,13 +75,6 @@ -rabbit_boot_step({external_infrastructure, [{description, "external infrastructure ready"}]}). --rabbit_boot_step({rabbit_registry, - [{description, "plugin registry"}, - {mfa, {rabbit_sup, start_child, - [rabbit_registry]}}, - {requires, external_infrastructure}, - {enables, kernel_ready}]}). - -rabbit_boot_step({rabbit_log, [{description, "logging server"}, {mfa, {rabbit_sup, start_restartable_child, diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl index c97988d0..1d2c3640 100644 --- a/src/rabbit_mnesia.erl +++ b/src/rabbit_mnesia.erl @@ -34,11 +34,12 @@ -export([ensure_mnesia_dir/0, dir/0, status/0, init/0, is_db_empty/0, cluster/1, force_cluster/1, reset/0, force_reset/0, - is_clustered/0, empty_ram_only_tables/0, copy_db/1, - add_table_definition/1]). + is_clustered/0, empty_ram_only_tables/0, copy_db/1]). -export([table_names/0]). +-export([behaviour_info/1]). + %% create_tables/0 exported for helping embed RabbitMQ in or alongside %% other mnesia-using Erlang applications, such as ejabberd -export([create_tables/0]). @@ -72,6 +73,9 @@ %%---------------------------------------------------------------------------- +behaviour_info(callbacks) -> [{table_definitions, 0}]; +behaviour_info(_Other) -> undefined. + status() -> [{nodes, case mnesia:system_info(is_running) of yes -> [{Key, Nodes} || @@ -214,17 +218,9 @@ table_definitions() -> {match, #amqqueue{name = queue_name_match(), _='_'}}]}] ++ plugin_table_definitions(). -%% TODO: re-work this abuse of the application env as a register with -%% the generic registry that should be landing at some point. -add_table_definition(Def) -> - ok = application:set_env(rabbit, plugin_mnesia_tables, - [Def | plugin_table_definitions()], infinity). - plugin_table_definitions() -> - case application:get_env(rabbit, plugin_mnesia_tables) of - {ok, Defs} -> Defs; - undefined -> [] - end. + lists:append([Mod:table_definitions() + || {_Type, Mod} <- rabbit_registry:lookup_all(mnesia)]). binding_match() -> #binding{source = exchange_name_match(), diff --git a/src/rabbit_registry.erl b/src/rabbit_registry.erl index 7a3fcb51..935cf1d0 100644 --- a/src/rabbit_registry.erl +++ b/src/rabbit_registry.erl @@ -111,7 +111,8 @@ sanity_check_module(ClassModule, Module) -> end. class_module(exchange) -> rabbit_exchange_type; -class_module(auth_mechanism) -> rabbit_auth_mechanism. +class_module(auth_mechanism) -> rabbit_auth_mechanism; +class_module(mnesia) -> rabbit_mnesia. %%--------------------------------------------------------------------------- -- cgit v1.2.1 From a6781f983a5c23281a94e153901b016e3ac7fdb8 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Thu, 16 Dec 2010 18:13:20 +0000 Subject: extra_pids => mirror_pids; Also rip out changes to the registry and just hard code that rabbit_mnesia takes care of gm - the problem being that we need access to the table dfns even when rabbit app is stopped, thus the registry would be down --- include/rabbit.hrl | 2 +- src/gm.erl | 8 -------- src/rabbit_amqqueue.erl | 6 +++--- src/rabbit_mirror_queue_misc.erl | 14 +++++++------- src/rabbit_mirror_queue_slave.erl | 8 ++++---- src/rabbit_mnesia.erl | 11 +---------- src/rabbit_registry.erl | 3 +-- src/rabbit_router.erl | 4 ++-- src/rabbit_types.erl | 2 +- 9 files changed, 20 insertions(+), 38 deletions(-) diff --git a/include/rabbit.hrl b/include/rabbit.hrl index 9a74503c..421d5fba 100644 --- a/include/rabbit.hrl +++ b/include/rabbit.hrl @@ -54,7 +54,7 @@ -record(exchange, {name, type, durable, auto_delete, arguments}). -record(amqqueue, {name, durable, auto_delete, exclusive_owner = none, - arguments, pid, extra_pids}). + arguments, pid, mirror_pids}). %% mnesia doesn't like unary records, so we add a dummy 'value' field -record(route, {binding, value = const}). diff --git a/src/gm.erl b/src/gm.erl index d41b7a1c..0a6e346a 100644 --- a/src/gm.erl +++ b/src/gm.erl @@ -372,7 +372,6 @@ -behaviour(gen_server2). --behaviour(rabbit_mnesia). -export([create_tables/0, start_link/3, leave/1, broadcast/2, confirmed_broadcast/2, group_members/1]). @@ -412,13 +411,6 @@ {attributes, record_info(fields, gm_group)}]}). -define(TABLE_MATCH, {match, #gm_group { _ = '_' }}). --rabbit_boot_step({gm_tables, - [{description, "add GM tables to rabbit_mnesia"}, - {mfa, {rabbit_registry, register, - [mnesia, <<"gm">>, ?MODULE]}}, - {requires, rabbit_registry}, - {enables, database}]}). - -define(TAG, '$gm'). -ifdef(use_specs). diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl index e4bc9f76..731bd234 100644 --- a/src/rabbit_amqqueue.erl +++ b/src/rabbit_amqqueue.erl @@ -207,7 +207,7 @@ declare(QueueName, Durable, AutoDelete, Args, Owner) -> arguments = Args, exclusive_owner = Owner, pid = none, - extra_pids = []}), + mirror_pids = []}), case gen_server2:call(Q#amqqueue.pid, {init, false}) of not_found -> rabbit_misc:not_found(QueueName); Q1 -> Q1 @@ -488,7 +488,7 @@ on_node_down(Node) -> rabbit_misc:execute_mnesia_transaction( fun () -> qlc:e(qlc:q([delete_queue(QueueName) || #amqqueue{name = QueueName, pid = Pid, - extra_pids = []} + mirror_pids = []} <- mnesia:table(rabbit_queue), node(Pid) == Node])) end))). @@ -503,7 +503,7 @@ pseudo_queue(QueueName, Pid) -> auto_delete = false, arguments = [], pid = Pid, - extra_pids = []}. + mirror_pids = []}. safe_delegate_call_ok(F, Pids) -> {_, Bad} = delegate:invoke(Pids, diff --git a/src/rabbit_mirror_queue_misc.erl b/src/rabbit_mirror_queue_misc.erl index d37ebe1f..237bf080 100644 --- a/src/rabbit_mirror_queue_misc.erl +++ b/src/rabbit_mirror_queue_misc.erl @@ -24,18 +24,18 @@ remove_from_queue(QueueName, DeadPids) -> DeadNodes = [node(DeadPid) || DeadPid <- DeadPids], rabbit_misc:execute_mnesia_transaction( fun () -> - [Q = #amqqueue { pid = QPid, - extra_pids = EPids }] = + [Q = #amqqueue { pid = QPid, + mirror_pids = MPids }] = mnesia:read({rabbit_queue, QueueName}), - [QPid1 | EPids1] = - [Pid || Pid <- [QPid | EPids], + [QPid1 | MPids1] = + [Pid || Pid <- [QPid | MPids], not lists:member(node(Pid), DeadNodes)], - case {{QPid, EPids}, {QPid1, EPids1}} of + case {{QPid, MPids}, {QPid1, MPids1}} of {Same, Same} -> QPid; _ -> - Q1 = Q #amqqueue { pid = QPid1, - extra_pids = EPids1 }, + Q1 = Q #amqqueue { pid = QPid1, + mirror_pids = MPids1 }, mnesia:write(rabbit_queue, Q1, write), QPid1 end diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index 452cbd5a..a9429ab8 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -108,13 +108,13 @@ init([#amqqueue { name = QueueName } = Q]) -> Node = node(), case rabbit_misc:execute_mnesia_transaction( fun () -> - [Q1 = #amqqueue { pid = QPid, extra_pids = EPids }] = + [Q1 = #amqqueue { pid = QPid, mirror_pids = MPids }] = mnesia:read({rabbit_queue, QueueName}), - case [Pid || Pid <- [QPid | EPids], node(Pid) =:= Node] of + case [Pid || Pid <- [QPid | MPids], node(Pid) =:= Node] of [] -> - EPids1 = EPids ++ [Self], + MPids1 = MPids ++ [Self], mnesia:write(rabbit_queue, - Q1 #amqqueue { extra_pids = EPids1 }, + Q1 #amqqueue { mirror_pids = MPids1 }, write), {ok, QPid}; _ -> diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl index 1d2c3640..399bf1e0 100644 --- a/src/rabbit_mnesia.erl +++ b/src/rabbit_mnesia.erl @@ -38,8 +38,6 @@ -export([table_names/0]). --export([behaviour_info/1]). - %% create_tables/0 exported for helping embed RabbitMQ in or alongside %% other mnesia-using Erlang applications, such as ejabberd -export([create_tables/0]). @@ -73,9 +71,6 @@ %%---------------------------------------------------------------------------- -behaviour_info(callbacks) -> [{table_definitions, 0}]; -behaviour_info(_Other) -> undefined. - status() -> [{nodes, case mnesia:system_info(is_running) of yes -> [{Key, Nodes} || @@ -216,11 +211,7 @@ table_definitions() -> [{record_name, amqqueue}, {attributes, record_info(fields, amqqueue)}, {match, #amqqueue{name = queue_name_match(), _='_'}}]}] - ++ plugin_table_definitions(). - -plugin_table_definitions() -> - lists:append([Mod:table_definitions() - || {_Type, Mod} <- rabbit_registry:lookup_all(mnesia)]). + ++ gm:table_definitions(). binding_match() -> #binding{source = exchange_name_match(), diff --git a/src/rabbit_registry.erl b/src/rabbit_registry.erl index 935cf1d0..7a3fcb51 100644 --- a/src/rabbit_registry.erl +++ b/src/rabbit_registry.erl @@ -111,8 +111,7 @@ sanity_check_module(ClassModule, Module) -> end. class_module(exchange) -> rabbit_exchange_type; -class_module(auth_mechanism) -> rabbit_auth_mechanism; -class_module(mnesia) -> rabbit_mnesia. +class_module(auth_mechanism) -> rabbit_auth_mechanism. %%--------------------------------------------------------------------------- diff --git a/src/rabbit_router.erl b/src/rabbit_router.erl index a4ad7fbc..66fc4070 100644 --- a/src/rabbit_router.erl +++ b/src/rabbit_router.erl @@ -117,8 +117,8 @@ check_delivery(_ , _ , {_ , Qs}) -> {routed, Qs}. lookup_qpids(QNames) -> lists:foldl(fun (QName, QPids) -> case mnesia:dirty_read({rabbit_queue, QName}) of - [#amqqueue{pid = QPid, extra_pids = EPids}] -> - EPids ++ [QPid | QPids]; + [#amqqueue{pid = QPid, mirror_pids = MPids}] -> + MPids ++ [QPid | QPids]; [] -> QPids end diff --git a/src/rabbit_types.erl b/src/rabbit_types.erl index bc1f9d7e..9eca964b 100644 --- a/src/rabbit_types.erl +++ b/src/rabbit_types.erl @@ -138,7 +138,7 @@ exclusive_owner :: rabbit_types:maybe(pid()), arguments :: rabbit_framing:amqp_table(), pid :: rabbit_types:maybe(pid()), - extra_pids :: [pid()]}). + mirror_pids :: [pid()]}). -type(exchange() :: #exchange{name :: rabbit_exchange:name(), -- cgit v1.2.1 From 34b1a1eb682015345c1b5bc0d3623ac1cba27bdd Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Thu, 16 Dec 2010 18:37:44 +0000 Subject: Permit dynamic inspection of the current mirror queues. This isn't particularly lovely because the master queue itself does not know, and thus has to do an mnesia read. However, for the time being it should do --- src/rabbit_amqqueue_process.erl | 6 +++++- src/rabbit_control.erl | 6 ++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl index f8ec4ec8..4dd48457 100644 --- a/src/rabbit_amqqueue_process.erl +++ b/src/rabbit_amqqueue_process.erl @@ -89,7 +89,8 @@ messages, consumers, memory, - backing_queue_status + backing_queue_status, + mirror_pids ]). -define(CREATION_EVENT_KEYS, @@ -761,6 +762,9 @@ i(memory, _) -> M; i(backing_queue_status, #q{backing_queue_state = BQS, backing_queue = BQ}) -> BQ:status(BQS); +i(mirror_pids, #q{q = #amqqueue{name = Name}}) -> + {ok, #amqqueue{mirror_pids = MPids}} = rabbit_amqqueue:lookup(Name), + MPids; i(Item, _) -> throw({bad_argument, Item}). diff --git a/src/rabbit_control.erl b/src/rabbit_control.erl index df55d961..4a0bd25e 100644 --- a/src/rabbit_control.erl +++ b/src/rabbit_control.erl @@ -353,6 +353,12 @@ format_info_item([{TableEntryKey, TableEntryType, _TableEntryValue} | _] = Value) when is_binary(TableEntryKey) andalso is_atom(TableEntryType) -> io_lib:format("~1000000000000p", [prettify_amqp_table(Value)]); +format_info_item([T | _] = Value) + when is_tuple(T) orelse is_pid(T) orelse is_binary(T) orelse is_atom(T) orelse + is_list(T) -> + "[" ++ + lists:nthtail(2, lists:append( + [", " ++ format_info_item(E) || E <- Value])) ++ "]"; format_info_item(Value) -> io_lib:format("~w", [Value]). -- cgit v1.2.1 From 05374b4d1542334c85915a9ec27d79ba5d5c08f4 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Fri, 17 Dec 2010 00:31:15 +0000 Subject: Support maybe_run_queue_via_backing_queue in the slaves, and add some comments about where to deal with confirmations. I think. Assuming my understanding of pubacks is right. --- src/rabbit_mirror_queue_master.erl | 2 +- src/rabbit_mirror_queue_slave.erl | 25 ++++++++++++++++++++++--- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/src/rabbit_mirror_queue_master.erl b/src/rabbit_mirror_queue_master.erl index 2299c3d1..0d64ab8e 100644 --- a/src/rabbit_mirror_queue_master.erl +++ b/src/rabbit_mirror_queue_master.erl @@ -52,7 +52,7 @@ stop() -> %% Same as start/1. exit({not_valid_for_generic_backing_queue, ?MODULE}). -init(#amqqueue { arguments = Args, durable = false } = Q, Recover) -> +init(#amqqueue { arguments = Args } = Q, Recover) -> {ok, CPid} = rabbit_mirror_queue_coordinator:start_link(Q, undefined), GM = rabbit_mirror_queue_coordinator:get_gm(CPid), {_Type, Nodes} = rabbit_misc:table_lookup(Args, <<"x-mirror">>), diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index a9429ab8..ac49b10b 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -86,7 +86,9 @@ sender_queues, %% :: Pid -> MsgQ guid_ack, %% :: Guid -> AckTag - instructions %% :: InstrQ + instructions, %% :: InstrQ + + guid_to_channel %% for confirms }). -define(RAM_DURATION_UPDATE_INTERVAL, 5000). @@ -138,7 +140,9 @@ init([#amqqueue { name = QueueName } = Q]) -> sender_queues = dict:new(), guid_ack = dict:new(), - instructions = queue:new() + instructions = queue:new(), + + guid_to_channel = dict:new() }, hibernate, {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}; @@ -172,8 +176,14 @@ handle_call({gm_deaths, Deaths}, From, gen_server2:reply(From, ok), ok = gm:broadcast(GM, heartbeat), noreply(State #state { master_node = MNode1 }) - end. + end; +handle_call({maybe_run_queue_via_backing_queue, Fun}, _From, State) -> + reply(ok, maybe_run_queue_via_backing_queue(Fun, State)). + + +handle_cast({maybe_run_queue_via_backing_queue, Fun}, State) -> + noreply(maybe_run_queue_via_backing_queue(Fun, State)); handle_cast({gm, Instruction}, State = #state { instructions = InstrQ }) -> State1 = State #state { instructions = queue:in(Instruction, InstrQ) }, @@ -271,6 +281,12 @@ handle_msg([SPid], _From, Msg) -> %% Others %% --------------------------------------------------------------------------- +maybe_run_queue_via_backing_queue( + Fun, State = #state { backing_queue_state = BQS }) -> + %% TODO: some CONFIRM-like thing with these Guids + {_Guids, BQS1} = Fun(BQS), + State #state { backing_queue_state = BQS1 }. + handle_process_result({continue, State}) -> noreply(State); handle_process_result({stop, State}) -> {stop, normal, State}. @@ -380,6 +396,7 @@ process_instruction({publish, Deliver, Guid, MsgProps, ChPid}, {processed, case Deliver of false -> + %% RECORD CONFIRM - modify MsgProps BQS1 = BQ:publish(Msg, MsgProps, ChPid, BQS), State1 #state {backing_queue_state = BQS1 }; {true, AckRequired} -> @@ -439,6 +456,7 @@ process_instruction({ack, Guids}, {AckTags, GA1} = guids_to_acktags(Guids, GA), {Guids1, BQS1} = BQ:ack(AckTags, BQS), [] = Guids1 -- Guids, %% ASSERTION + %% CONFIRM - persistent but delivered faster than disk sync {processed, State #state { guid_ack = GA1, backing_queue_state = BQS1 }}; process_instruction({requeue, MsgPropsFun, Guids}, @@ -457,6 +475,7 @@ process_instruction({requeue, MsgPropsFun, Guids}, %% GA {_Count, BQS1} = BQ:purge(BQS), {Guids, BQS2} = ack_all(BQ, GA, BQS1), + %% CONFIRM these Guids State #state { guid_ack = dict:new(), backing_queue_state = BQS2 } end}; -- cgit v1.2.1 From c0bf0c0b7d471fd20d1a6ec4cd09365a2f4f4749 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Fri, 17 Dec 2010 00:31:55 +0000 Subject: Correct places to do confirmation stuff --- src/rabbit_mirror_queue_slave.erl | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index ac49b10b..7fb13c5c 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -456,7 +456,6 @@ process_instruction({ack, Guids}, {AckTags, GA1} = guids_to_acktags(Guids, GA), {Guids1, BQS1} = BQ:ack(AckTags, BQS), [] = Guids1 -- Guids, %% ASSERTION - %% CONFIRM - persistent but delivered faster than disk sync {processed, State #state { guid_ack = GA1, backing_queue_state = BQS1 }}; process_instruction({requeue, MsgPropsFun, Guids}, @@ -475,7 +474,6 @@ process_instruction({requeue, MsgPropsFun, Guids}, %% GA {_Count, BQS1} = BQ:purge(BQS), {Guids, BQS2} = ack_all(BQ, GA, BQS1), - %% CONFIRM these Guids State #state { guid_ack = dict:new(), backing_queue_state = BQS2 } end}; -- cgit v1.2.1 From bf91d41b3684cda8c5c15bda13cf616d53116530 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Fri, 17 Dec 2010 12:05:43 +0000 Subject: That might just be enough to support confirms --- src/rabbit_mirror_queue_slave.erl | 64 +++++++++++++++++++++++++++++---------- 1 file changed, 48 insertions(+), 16 deletions(-) diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index 7fb13c5c..d4623bf5 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -282,10 +282,34 @@ handle_msg([SPid], _From, Msg) -> %% --------------------------------------------------------------------------- maybe_run_queue_via_backing_queue( - Fun, State = #state { backing_queue_state = BQS }) -> - %% TODO: some CONFIRM-like thing with these Guids - {_Guids, BQS1} = Fun(BQS), - State #state { backing_queue_state = BQS1 }. + Fun, State = #state { backing_queue_state = BQS, + guid_to_channel = GTC }) -> + {Guids, BQS1} = Fun(BQS), + GTC1 = lists:foldl(fun maybe_confirm_message/2, GTC, Guids), + State #state { backing_queue_state = BQS1, + guid_to_channel = GTC1 }. + +record_confirm_or_confirm(#delivery { msg_seq_no = undefined }, _Q, GTC) -> + GTC; +record_confirm_or_confirm( + #delivery { sender = ChPid, + message = #basic_message { is_persistent = true, + guid = Guid }, + msg_seq_no = MsgSeqNo }, #amqqueue { durable = true }, GTC) -> + dict:store(Guid, {ChPid, MsgSeqNo}, GTC); +record_confirm_or_confirm(#delivery { sender = ChPid, msg_seq_no = MsgSeqNo }, + _Q, GTC) -> + ok = rabbit_channel:confirm(ChPid, MsgSeqNo), + GTC. + +maybe_confirm_message(Guid, GTC) -> + case dict:find(Guid, GTC) of + {ok, {ChPid, MsgSeqNo}} when MsgSeqNo =/= undefined -> + ok = rabbit_channel:confirm(ChPid, MsgSeqNo), + dict:erase(Guid, GTC); + error -> + GTC + end. handle_process_result({continue, State}) -> noreply(State); handle_process_result({stop, State}) -> {stop, normal, State}. @@ -361,7 +385,7 @@ enqueue_message(Delivery = #delivery { sender = ChPid }, false -> {continue, State1} end. -process_instructions(State = #state { instructions = InstrQ }) -> +process_instructions(State = #state { instructions = InstrQ }) -> case queue:out(InstrQ) of {empty, _InstrQ} -> {continue, State}; @@ -378,10 +402,12 @@ process_instructions(State = #state { instructions = InstrQ }) -> end. process_instruction({publish, Deliver, Guid, MsgProps, ChPid}, - State = #state { sender_queues = SQ, + State = #state { q = Q, + sender_queues = SQ, backing_queue = BQ, backing_queue_state = BQS, - guid_ack = GA }) -> + guid_ack = GA, + guid_to_channel = GTC }) -> case dict:find(ChPid, SQ) of error -> blocked; @@ -389,26 +415,32 @@ process_instruction({publish, Deliver, Guid, MsgProps, ChPid}, case queue:out(Q) of {empty, _Q} -> blocked; - {{value, #delivery { - message = Msg = #basic_message { guid = Guid } }}, Q1} -> + {{value, Delivery = #delivery { + message = Msg = #basic_message { guid = Guid } }}, + Q1} -> State1 = State #state { sender_queues = dict:store(ChPid, Q1, SQ) }, + GTC1 = record_confirm_or_confirm(Delivery, Q, GTC), {processed, case Deliver of false -> - %% RECORD CONFIRM - modify MsgProps BQS1 = BQ:publish(Msg, MsgProps, ChPid, BQS), - State1 #state {backing_queue_state = BQS1 }; + State1 #state { backing_queue_state = BQS1, + guid_to_channel = GTC1 }; {true, AckRequired} -> {AckTag, BQS1} = BQ:publish_delivered( AckRequired, Msg, MsgProps, ChPid, BQS), - GA1 = case AckRequired of - true -> dict:store(Guid, AckTag, GA); - false -> GA - end, + {GA1, GTC2} = + case AckRequired of + true -> + {dict:store(Guid, AckTag, GA), GTC1}; + false -> + {GA, maybe_confirm_message(Guid, GTC1)} + end, State1 #state { backing_queue_state = BQS1, - guid_ack = GA1 } + guid_ack = GA1, + guid_to_channel = GTC2 } end}; {{value, #delivery {}}, _Q1} -> %% throw away the instruction: we'll never receive -- cgit v1.2.1 From a8ba00e17e58ce3aa3d20d510566f9d901a072fa Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Fri, 17 Dec 2010 13:51:02 +0000 Subject: Accidental unification going on of two things named Q; fix a bug which led to repeated calls to BQ:delete_and_terminate (which turns out not to be idempotent) --- src/rabbit_mirror_queue_slave.erl | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index d4623bf5..166f473a 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -224,6 +224,10 @@ handle_info(Msg, State) -> %% slave, we have no idea whether or not we'll be the only copy coming %% back up. Thus we must assume we will be, and preserve anything we %% have on disk. +terminate(_Reason, #state { backing_queue_state = undefined }) -> + %% We've received a delete_and_terminate from gm, thus nothing to + %% do here. + ok; terminate(Reason, #state { q = Q, gm = GM, backing_queue = BQ, @@ -411,15 +415,15 @@ process_instruction({publish, Deliver, Guid, MsgProps, ChPid}, case dict:find(ChPid, SQ) of error -> blocked; - {ok, Q} -> - case queue:out(Q) of - {empty, _Q} -> + {ok, MQ} -> + case queue:out(MQ) of + {empty, _MQ} -> blocked; {{value, Delivery = #delivery { message = Msg = #basic_message { guid = Guid } }}, - Q1} -> + MQ1} -> State1 = State #state { sender_queues = - dict:store(ChPid, Q1, SQ) }, + dict:store(ChPid, MQ1, SQ) }, GTC1 = record_confirm_or_confirm(Delivery, Q, GTC), {processed, case Deliver of @@ -442,7 +446,7 @@ process_instruction({publish, Deliver, Guid, MsgProps, ChPid}, guid_ack = GA1, guid_to_channel = GTC2 } end}; - {{value, #delivery {}}, _Q1} -> + {{value, #delivery {}}, _MQ1} -> %% throw away the instruction: we'll never receive %% the message to which it corresponds. {processed, State} @@ -512,8 +516,8 @@ process_instruction({requeue, MsgPropsFun, Guids}, process_instruction(delete_and_terminate, State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> - {stop, State #state { - backing_queue_state = BQ:delete_and_terminate(BQS) }}. + BQ:delete_and_terminate(BQS), + {stop, State #state { backing_queue_state = undefined }}. guids_to_acktags(Guids, GA) -> {AckTags, GA1} = -- cgit v1.2.1 From 586a9cf3740489e8ef95fd0e51bf7aacda9ab8b9 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Fri, 17 Dec 2010 14:57:13 +0000 Subject: Fix a race condition that can occur on queue deletion. Also change boot order to make sure the msg_stores are started before us (and thus stopped after us) --- src/rabbit_mirror_queue_coordinator.erl | 9 ++++++--- src/rabbit_mirror_queue_misc.erl | 32 ++++++++++++++++++-------------- src/rabbit_mirror_queue_slave.erl | 14 ++++++++------ src/rabbit_mirror_queue_slave_sup.erl | 4 ++-- 4 files changed, 34 insertions(+), 25 deletions(-) diff --git a/src/rabbit_mirror_queue_coordinator.erl b/src/rabbit_mirror_queue_coordinator.erl index fb650144..6303952d 100644 --- a/src/rabbit_mirror_queue_coordinator.erl +++ b/src/rabbit_mirror_queue_coordinator.erl @@ -79,9 +79,12 @@ handle_cast({gm_deaths, Deaths}, State = #state { q = #amqqueue { name = QueueName } }) -> rabbit_log:info("Master ~p saw deaths ~p for queue ~p~n", [self(), Deaths, QueueName]), - Node = node(), - Node = node(rabbit_mirror_queue_misc:remove_from_queue(QueueName, Deaths)), - noreply(State). + case rabbit_mirror_queue_misc:remove_from_queue(QueueName, Deaths) of + {ok, Pid} when node(Pid) =:= node() -> + noreply(State); + {error, not_found} -> + {stop, normal, State} + end. handle_info(Msg, State) -> {stop, {unexpected_info, Msg}, State}. diff --git a/src/rabbit_mirror_queue_misc.erl b/src/rabbit_mirror_queue_misc.erl index 237bf080..05602076 100644 --- a/src/rabbit_mirror_queue_misc.erl +++ b/src/rabbit_mirror_queue_misc.erl @@ -24,19 +24,23 @@ remove_from_queue(QueueName, DeadPids) -> DeadNodes = [node(DeadPid) || DeadPid <- DeadPids], rabbit_misc:execute_mnesia_transaction( fun () -> - [Q = #amqqueue { pid = QPid, - mirror_pids = MPids }] = - mnesia:read({rabbit_queue, QueueName}), - [QPid1 | MPids1] = - [Pid || Pid <- [QPid | MPids], - not lists:member(node(Pid), DeadNodes)], - case {{QPid, MPids}, {QPid1, MPids1}} of - {Same, Same} -> - QPid; - _ -> - Q1 = Q #amqqueue { pid = QPid1, - mirror_pids = MPids1 }, - mnesia:write(rabbit_queue, Q1, write), - QPid1 + %% Someone else could have deleted the queue before we + %% get here. + case mnesia:read({rabbit_queue, QueueName}) of + [] -> {error, not_found}; + [Q = #amqqueue { pid = QPid, + mirror_pids = MPids }] -> + [QPid1 | MPids1] = + [Pid || Pid <- [QPid | MPids], + not lists:member(node(Pid), DeadNodes)], + case {{QPid, MPids}, {QPid1, MPids1}} of + {Same, Same} -> + {ok, QPid}; + _ -> + Q1 = Q #amqqueue { pid = QPid1, + mirror_pids = MPids1 }, + mnesia:write(rabbit_queue, Q1, write), + {ok, QPid1} + end end end). diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index 166f473a..f124bc9e 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -166,16 +166,18 @@ handle_call({gm_deaths, Deaths}, From, master_node = MNode }) -> rabbit_log:info("Slave ~p saw deaths ~p for queue ~p~n", [self(), Deaths, QueueName]), - case {node(), node(rabbit_mirror_queue_misc:remove_from_queue( - QueueName, Deaths))} of - {_Node, MNode} -> + case rabbit_mirror_queue_misc:remove_from_queue(QueueName, Deaths) of + {ok, Pid} when node(Pid) =:= MNode -> reply(ok, State); - {Node, Node} -> + {ok, Pid} when node(Pid) =:= node() -> promote_me(From, State); - {_Node, MNode1} -> + {ok, Pid} -> gen_server2:reply(From, ok), ok = gm:broadcast(GM, heartbeat), - noreply(State #state { master_node = MNode1 }) + noreply(State #state { master_node = node(Pid) }); + {error, not_found} -> + gen_server2:reply(From, ok), + {stop, normal, State} end; handle_call({maybe_run_queue_via_backing_queue, Fun}, _From, State) -> diff --git a/src/rabbit_mirror_queue_slave_sup.erl b/src/rabbit_mirror_queue_slave_sup.erl index 6658e6c3..80c0520c 100644 --- a/src/rabbit_mirror_queue_slave_sup.erl +++ b/src/rabbit_mirror_queue_slave_sup.erl @@ -19,8 +19,8 @@ -rabbit_boot_step({mirror_queue_slave_sup, [{description, "mirror queue slave sup"}, {mfa, {rabbit_mirror_queue_slave_sup, start, []}}, - {requires, core_initialized}, - {enables, queue_sup_queue_recovery}]}). + {requires, queue_sup_queue_recovery}, + {enables, routing_ready}]}). -behaviour(supervisor2). -- cgit v1.2.1 From af2b47438dfe451b2fcd508dbf868e599478636e Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Fri, 17 Dec 2010 16:42:34 +0000 Subject: Ensure that promoted slaves monitor the queue's exclusive owner --- src/rabbit_amqqueue_process.erl | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl index 4dd48457..601f28e3 100644 --- a/src/rabbit_amqqueue_process.erl +++ b/src/rabbit_amqqueue_process.erl @@ -132,8 +132,13 @@ init(Q) -> guid_to_channel = dict:new()}, hibernate, {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. -init_with_backing_queue_state(Q, BQ, BQS, RateTRef, AckTags, Deliveries) -> +init_with_backing_queue_state(Q = #amqqueue{exclusive_owner = Owner}, BQ, BQS, + RateTRef, AckTags, Deliveries) -> ?LOGDEBUG("Queue starting - ~p~n", [Q]), + case Owner of + none -> ok; + _ -> erlang:monitor(process, Owner) + end, State = requeue_and_run( AckTags, process_args( -- cgit v1.2.1 From 7b526a7941abafe79d8280d46577e215a8e1600b Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Sun, 19 Dec 2010 00:54:51 +0000 Subject: Fixed a bug caused by revision 454fbb9127bd in rabbit-ha from where gm.erl came; Make gm_tests more robust; Avoid creating endless funs all the time for every message (substantial performance gain). Abstract use of dicts - expected use case is relatively small groups, thus orddict would normally be more appropriate --- src/gm.erl | 118 ++++++++++++++++++++++++++++---------------------------- src/gm_test.erl | 10 ++++- 2 files changed, 66 insertions(+), 62 deletions(-) diff --git a/src/gm.erl b/src/gm.erl index 0a6e346a..6a2c9c48 100644 --- a/src/gm.erl +++ b/src/gm.erl @@ -387,6 +387,7 @@ -define(HIBERNATE_AFTER_MIN, 1000). -define(DESIRED_HIBERNATE, 10000). -define(SETS, ordsets). +-define(DICT, orddict). -record(state, { self, @@ -574,10 +575,7 @@ handle_cast({?TAG, ReqVer, Msg}, end, handle_callback_result( if_callback_success( - Result, - fun (_Result1, State2) -> handle_msg(Msg, State2) end, - fun (Result1, State2) -> {Result1, State2} end, - State1)); + Result, fun handle_msg_true/3, fun handle_msg_false/3, Msg, State1)); handle_cast({broadcast, _Msg}, State = #state { members_state = undefined }) -> noreply(State); @@ -617,6 +615,9 @@ handle_info({'DOWN', MRef, process, _Pid, _Reason}, left = Left, right = Right, group_name = GroupName, + view = View, + module = Module, + callback_args = Args, confirms = Confirms }) -> Member = case {Left, Right} of {{Member1, MRef}, _} -> Member1; @@ -638,7 +639,11 @@ handle_info({'DOWN', MRef, process, _Pid, _Reason}, members_state = blank_member_state(), confirms = purge_confirms(Confirms) }); _ -> - {ok, State1} + %% here we won't be pointing out any deaths: + %% the concern is that there maybe births + %% which we'd otherwise miss. + {callback_view_changed(Args, Module, View, View1), + State1} end, handle_callback_result({Result, check_neighbours(State2)}) end. @@ -674,8 +679,8 @@ handle_msg({catchup, Left, MembersStateLeft}, members_state = MembersState }) when MembersState =/= undefined -> MembersStateLeft1 = build_members_state(MembersStateLeft), - AllMembers = lists:usort(dict:fetch_keys(MembersState) ++ - dict:fetch_keys(MembersStateLeft1)), + AllMembers = lists:usort(?DICT:fetch_keys(MembersState) ++ + ?DICT:fetch_keys(MembersStateLeft1)), {MembersState1, Activity} = lists:foldl( fun (Id, MembersStateActivity) -> @@ -709,11 +714,9 @@ handle_msg({catchup, _NotLeft, _MembersState}, State) -> handle_msg({activity, Left, Activity}, State = #state { self = Self, left = {Left, _MRefL}, - module = Module, view = View, members_state = MembersState, - confirms = Confirms, - callback_args = Args }) + confirms = Confirms }) when MembersState =/= undefined -> {MembersState1, {Confirms1, Activity1}} = lists:foldl( @@ -755,31 +758,18 @@ handle_msg({activity, Left, Activity}, {Result, State2} = maybe_erase_aliases(State1), ok = maybe_send_activity(Activity3, State2), if_callback_success( - Result, - fun (_Result1, State3) -> {callback(Args, Module, Activity3), State3} end, - fun (Result1, State3) -> {Result1, State3} end, - State2); + Result, fun activity_true/3, fun activity_false/3, Activity3, State2); handle_msg({activity, _NotLeft, _Activity}, State) -> {ok, State}. noreply(State) -> - ok = a(State), {noreply, State, hibernate}. reply(Reply, State) -> - ok = a(State), {reply, Reply, State, hibernate}. -a(#state { view = undefined }) -> - ok; -a(#state { self = Self, - left = {Left, _MRefL}, - view = View }) -> - #view_member { left = Left } = fetch_view_member(Self, View), - ok. - internal_broadcast(Msg, From, State = #state { self = Self, pub_count = PubCount, members_state = MembersState, @@ -826,25 +816,25 @@ is_member_alias(Member, Self, View) -> dead_member_id({dead, Member}) -> Member. store_view_member(VMember = #view_member { id = Id }, {Ver, View}) -> - {Ver, dict:store(Id, VMember, View)}. + {Ver, ?DICT:store(Id, VMember, View)}. with_view_member(Fun, View, Id) -> store_view_member(Fun(fetch_view_member(Id, View)), View). fetch_view_member(Id, {_Ver, View}) -> - dict:fetch(Id, View). + ?DICT:fetch(Id, View). find_view_member(Id, {_Ver, View}) -> - dict:find(Id, View). + ?DICT:find(Id, View). blank_view(Ver) -> - {Ver, dict:new()}. + {Ver, ?DICT:new()}. alive_view_members({_Ver, View}) -> - dict:fetch_keys(View). + ?DICT:fetch_keys(View). all_known_members({_Ver, View}) -> - dict:fold( + ?DICT:fold( fun (Member, #view_member { aliases = Aliases }, Acc) -> ?SETS:to_list(Aliases) ++ [Member | Acc] end, [], View). @@ -1155,28 +1145,28 @@ with_member_acc(Fun, Id, {MembersState, Acc}) -> {store_member(Id, MemberState, MembersState), Acc1}. find_member_or_blank(Id, MembersState) -> - case dict:find(Id, MembersState) of + case ?DICT:find(Id, MembersState) of {ok, Result} -> Result; error -> blank_member() end. erase_member(Id, MembersState) -> - dict:erase(Id, MembersState). + ?DICT:erase(Id, MembersState). blank_member() -> #member { pending_ack = queue:new(), last_pub = -1, last_ack = -1 }. blank_member_state() -> - dict:new(). + ?DICT:new(). store_member(Id, MemberState, MembersState) -> - dict:store(Id, MemberState, MembersState). + ?DICT:store(Id, MemberState, MembersState). prepare_members_state(MembersState) -> - dict:to_list(MembersState). + ?DICT:to_list(MembersState). build_members_state(MembersStateList) -> - dict:from_list(MembersStateList). + ?DICT:from_list(MembersStateList). %% --------------------------------------------------------------------------- @@ -1228,24 +1218,34 @@ callback_view_changed(Args, Module, OldView, NewView) -> handle_callback_result({Result, State}) -> if_callback_success( - Result, - fun (_Result, State1) -> noreply(State1) end, - fun ({stop, Reason}, State1) -> {stop, Reason, State1} end, - State); + Result, fun no_reply_true/3, fun no_reply_false/3, undefined, State); handle_callback_result({Result, Reply, State}) -> if_callback_success( - Result, - fun (_Result, State1) -> reply(Reply, State1) end, - fun ({stop, Reason}, State1) -> {stop, Reason, Reply, State1} end, - State). - -if_callback_success(ok, True, _False, State) -> - True(ok, State); -if_callback_success({become, Module, Args} = Result, True, _False, State) -> - True(Result, State #state { module = Module, - callback_args = Args }); -if_callback_success({stop, _Reason} = Result, _True, False, State) -> - False(Result, State). + Result, fun reply_true/3, fun reply_false/3, Reply, State). + +no_reply_true (_Result, _Undefined, State) -> noreply(State). +no_reply_false({stop, Reason}, _Undefined, State) -> {stop, Reason, State}. + +reply_true (_Result, Reply, State) -> reply(Reply, State). +reply_false({stop, Reason}, Reply, State) -> {stop, Reason, Reply, State}. + +handle_msg_true (_Result, Msg, State) -> handle_msg(Msg, State). +handle_msg_false(Result, _Msg, State) -> {Result, State}. + +activity_true(_Result, Activity, State = #state { module = Module, + callback_args = Args }) -> + {callback(Args, Module, Activity), State}. +activity_false(Result, _Activity, State) -> + {Result, State}. + +if_callback_success(ok, True, _False, Arg, State) -> + True(ok, Arg, State); +if_callback_success( + {become, Module, Args} = Result, True, _False, Arg, State) -> + True(Result, Arg, State #state { module = Module, + callback_args = Args }); +if_callback_success({stop, _Reason} = Result, _True, False, Arg, State) -> + False(Result, Arg, State). maybe_confirm(_Self, _Id, Confirms, []) -> Confirms; @@ -1282,14 +1282,12 @@ queue_from_pubs(Pubs) -> apply_acks([], Pubs) -> Pubs; -apply_acks([PubNum | Acks], Pubs) -> - {{value, {PubNum, _Msg}}, Pubs1} = queue:out(Pubs), - apply_acks(Acks, Pubs1). - -join_pubs(Q, []) -> - Q; -join_pubs(Q, Pubs) -> - queue:join(Q, queue_from_pubs(Pubs)). +apply_acks(List, Pubs) -> + {_, Pubs1} = queue:split(length(List), Pubs), + Pubs1. + +join_pubs(Q, []) -> Q; +join_pubs(Q, Pubs) -> queue:join(Q, queue_from_pubs(Pubs)). last_ack([], LA) -> LA; diff --git a/src/gm_test.erl b/src/gm_test.erl index aebfbb69..e8f28598 100644 --- a/src/gm_test.erl +++ b/src/gm_test.erl @@ -59,7 +59,7 @@ members_changed([], Births, Deaths) -> lists:foldl( fun (Died, StateN) -> true = dict:is_key(Died, StateN), - dict:erase(Died, StateN) + dict:store(Died, died, StateN) end, State1, Deaths) end), ok. @@ -69,6 +69,9 @@ handle_msg([], From, {test_msg, Num}) -> with_state( fun (State) -> ok = case dict:find(From, State) of + {ok, died} -> + exit({{from, From}, + {received_posthumous_delivery, Num}}); {ok, empty} -> ok; {ok, Num} -> ok; {ok, Num1} when Num < Num1 -> @@ -78,7 +81,10 @@ handle_msg([], From, {test_msg, Num}) -> {ok, Num1} -> exit({{from, From}, {missing_delivery_of, Num}, - {received_early, Num1}}) + {received_early, Num1}}); + error -> + exit({{from, From}, + {received_premature_delivery, Num}}) end, dict:store(From, Num + 1, State) end), -- cgit v1.2.1 From dfd985400ac482349797f430978b773292eaea0f Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Sun, 19 Dec 2010 01:10:14 +0000 Subject: When you send a msg to another node, if the local node doesn't already have a connection to the destination node, it has to contact epmd and try and resolve the remote node. This takes time. Thus, in the event of a distributed gm group, it's very important that we record asap when a member dies as that member might be our downstream, to which we're sending, and it might be on another node. Thus promote the DOWN messages. Because of the inherent races going on, gm is built to cope with this anyway. This has the nice benefit that promotion of slaves to master in the event of failure of master is now pretty much instantaneous --- src/gm.erl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/gm.erl b/src/gm.erl index 6a2c9c48..baf46471 100644 --- a/src/gm.erl +++ b/src/gm.erl @@ -377,7 +377,7 @@ confirmed_broadcast/2, group_members/1]). -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, - code_change/3]). + code_change/3, prioritise_info/2]). -export([behaviour_info/1]). @@ -658,6 +658,10 @@ code_change(_OldVsn, State, _Extra) -> {ok, State}. +prioritise_info({'DOWN', _MRef, process, _Pid, _Reason}, _State) -> 1; +prioritise_info(_ , _State) -> 0. + + handle_msg(check_neighbours, State) -> %% no-op - it's already been done by the calling handle_cast {ok, State}; -- cgit v1.2.1 From b73f2e5f8dd433fce76e1a8dee20596d6dbfd144 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Mon, 20 Dec 2010 00:26:33 +0000 Subject: Right, well the fake handling code in master is fine. The fake handling code in slave is utterly wrong. However, I need to sleep --- src/rabbit_mirror_queue_master.erl | 63 +++++++++++++------ src/rabbit_mirror_queue_slave.erl | 125 ++++++++++++++++++++++++++----------- 2 files changed, 135 insertions(+), 53 deletions(-) diff --git a/src/rabbit_mirror_queue_master.erl b/src/rabbit_mirror_queue_master.erl index 0d64ab8e..4628796f 100644 --- a/src/rabbit_mirror_queue_master.erl +++ b/src/rabbit_mirror_queue_master.erl @@ -26,7 +26,7 @@ -export([start/1, stop/0]). --export([promote_backing_queue_state/4]). +-export([promote_backing_queue_state/5]). -behaviour(rabbit_backing_queue). @@ -36,7 +36,8 @@ coordinator, backing_queue, backing_queue_state, - set_delivered + set_delivered, + fakes }). %% --------------------------------------------------------------------------- @@ -64,14 +65,16 @@ init(#amqqueue { arguments = Args } = Q, Recover) -> coordinator = CPid, backing_queue = BQ, backing_queue_state = BQS, - set_delivered = 0 }. + set_delivered = 0, + fakes = sets:new() }. -promote_backing_queue_state(CPid, BQ, BQS, GM) -> +promote_backing_queue_state(CPid, BQ, BQS, GM, Fakes) -> #state { gm = GM, coordinator = CPid, backing_queue = BQ, backing_queue_state = BQS, - set_delivered = BQ:len(BQS) }. + set_delivered = BQ:len(BQS), + fakes = Fakes }. terminate(State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> %% Backing queue termination. The queue is going down but @@ -126,30 +129,54 @@ dropwhile(Fun, State = #state { gm = GM, fetch(AckRequired, State = #state { gm = GM, backing_queue = BQ, backing_queue_state = BQS, - set_delivered = SetDelivered }) -> + set_delivered = SetDelivered, + fakes = Fakes }) -> {Result, BQS1} = BQ:fetch(AckRequired, BQS), - State1 = State #state { backing_queue_state = BQS1 }, case Result of empty -> - {Result, State1}; + {Result, State #state { backing_queue_state = BQS1 }}; {#basic_message { guid = Guid } = Message, IsDelivered, AckTag, Remaining} -> - ok = gm:broadcast(GM, {fetch, AckRequired, Guid, Remaining}), - IsDelivered1 = IsDelivered orelse SetDelivered > 0, SetDelivered1 = lists:max([0, SetDelivered - 1]), - {{Message, IsDelivered1, AckTag, Remaining}, - State1 #state { set_delivered = SetDelivered1 }} + case sets:is_element(Guid, Fakes) of + true -> + {BQS2, Fakes1} = + case AckRequired of + true -> {[Guid], BQS3} = BQ:ack([AckTag], BQS1), + {BQS3, Fakes}; + false -> {BQS1, sets:del_element(Guid, Fakes)} + end, + ok = gm:broadcast(GM, {fetch, false, Guid, Remaining}), + fetch(AckRequired, + State #state { backing_queue_state = BQS2, + set_delivered = SetDelivered1, + fakes = Fakes1 }); + false -> + ok = gm:broadcast(GM, + {fetch, AckRequired, Guid, Remaining}), + IsDelivered1 = IsDelivered orelse SetDelivered > 0, + Fakes1 = case SetDelivered + SetDelivered1 of + 1 -> sets:new(); %% transition to 0 + _ -> Fakes + end, + {{Message, IsDelivered1, AckTag, Remaining}, + State #state { backing_queue_state = BQS1, + set_delivered = SetDelivered1, + fakes = Fakes1 }} + end end. ack(AckTags, State = #state { gm = GM, backing_queue = BQ, - backing_queue_state = BQS }) -> + backing_queue_state = BQS, + fakes = Fakes }) -> {Guids, BQS1} = BQ:ack(AckTags, BQS), - case Guids of - [] -> ok; - _ -> ok = gm:broadcast(GM, {ack, Guids}) - end, - {Guids, State #state { backing_queue_state = BQS1 }}. + Fakes1 = case Guids of + [] -> Fakes; + _ -> ok = gm:broadcast(GM, {ack, Guids}), + sets:difference(Fakes, sets:from_list(Guids)) + end, + {Guids, State #state { backing_queue_state = BQS1, fakes = Fakes1 }}. tx_publish(Txn, Msg, MsgProps, ChPid, #state {} = State) -> %% gm:broadcast(GM, {tx_publish, Txn, Guid, MsgProps, ChPid}) diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index f124bc9e..0134787c 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -75,6 +75,7 @@ -behaviour(gm). -include("rabbit.hrl"). +-include("rabbit_framing.hrl"). -include("gm_specs.hrl"). -record(state, { q, @@ -87,6 +88,7 @@ sender_queues, %% :: Pid -> MsgQ guid_ack, %% :: Guid -> AckTag instructions, %% :: InstrQ + fakes, %% :: Set Guid guid_to_channel %% for confirms }). @@ -141,6 +143,7 @@ init([#amqqueue { name = QueueName } = Q]) -> sender_queues = dict:new(), guid_ack = dict:new(), instructions = queue:new(), + fakes = sets:new(), guid_to_channel = dict:new() }, hibernate, @@ -190,7 +193,7 @@ handle_cast({maybe_run_queue_via_backing_queue, Fun}, State) -> handle_cast({gm, Instruction}, State = #state { instructions = InstrQ }) -> State1 = State #state { instructions = queue:in(Instruction, InstrQ) }, case queue:is_empty(InstrQ) of - true -> handle_process_result(process_instructions(State1)); + true -> handle_process_result(process_instructions(false, State1)); false -> noreply(State1) end; @@ -320,21 +323,25 @@ maybe_confirm_message(Guid, GTC) -> handle_process_result({continue, State}) -> noreply(State); handle_process_result({stop, State}) -> {stop, normal, State}. -promote_me(From, #state { q = Q, - gm = GM, - backing_queue = BQ, - backing_queue_state = BQS, - rate_timer_ref = RateTRef, - sender_queues = SQ, - guid_ack = GA }) -> +promote_me(From, State = #state { q = Q }) -> rabbit_log:info("Promoting slave ~p for queue ~p~n", [self(), Q #amqqueue.name]), + #state { gm = GM, + backing_queue = BQ, + backing_queue_state = BQS, + rate_timer_ref = RateTRef, + sender_queues = SQ, + guid_ack = GA, + instructions = Instr, + fakes = Fakes } = + process_instructions(true, State), + true = queue:is_empty(Instr), %% ASSERTION {ok, CPid} = rabbit_mirror_queue_coordinator:start_link(Q, GM), true = unlink(GM), gen_server2:reply(From, {promote, CPid}), ok = gm:confirmed_broadcast(GM, heartbeat), MasterState = rabbit_mirror_queue_master:promote_backing_queue_state( - CPid, BQ, BQS, GM), + CPid, BQ, BQS, GM, Fakes), %% We have to do the requeue via this init because otherwise we %% don't have access to the relevent MsgPropsFun. Also, we are %% already in mnesia as the master queue pid. Thus we cannot just @@ -387,19 +394,19 @@ enqueue_message(Delivery = #delivery { sender = ChPid }, SQ1 = dict:store(ChPid, queue:in(Delivery, Q), SQ), State1 = State #state { sender_queues = SQ1 }, case queue:is_empty(Q) of - true -> process_instructions(State1); + true -> process_instructions(false, State1); false -> {continue, State1} end. -process_instructions(State = #state { instructions = InstrQ }) -> +process_instructions(Flush, State = #state { instructions = InstrQ }) -> case queue:out(InstrQ) of {empty, _InstrQ} -> {continue, State}; {{value, Instr}, InstrQ1} -> - case process_instruction(Instr, State) of + case process_instruction(Flush, Instr, State) of {processed, State1} -> process_instructions( - State1 #state { instructions = InstrQ1 }); + Flush, State1 #state { instructions = InstrQ1 }); {stop, State1} -> {stop, State1 #state { instructions = InstrQ1 }}; blocked -> @@ -407,20 +414,38 @@ process_instructions(State = #state { instructions = InstrQ }) -> end end. -process_instruction({publish, Deliver, Guid, MsgProps, ChPid}, +process_instruction(Flush, {publish, Deliver, Guid, MsgProps, ChPid} = Instr, State = #state { q = Q, sender_queues = SQ, backing_queue = BQ, backing_queue_state = BQS, guid_ack = GA, - guid_to_channel = GTC }) -> + guid_to_channel = GTC, + fakes = Fakes }) -> case dict:find(ChPid, SQ) of error -> - blocked; + case Flush of + true -> MQ = queue:from_list([fake_delivery(Q, Guid, ChPid)]), + State1 = State #state { + sender_queues = dict:store(ChPid, MQ, SQ), + fakes = sets:add_element(Guid, Fakes) }, + process_instruction(Flush, Instr, State1); + false -> blocked + end; {ok, MQ} -> case queue:out(MQ) of {empty, _MQ} -> - blocked; + case Flush of + true -> + MQ1 = queue:in_r(fake_delivery(Q, Guid, ChPid), MQ), + SQ1 = dict:store(ChPid, MQ1, SQ), + State1 = State #state { + sender_queues = SQ1, + fakes = sets:add_element(Guid, Fakes) }, + process_instruction(Flush, Instr, State1); + false -> + blocked + end; {{value, Delivery = #delivery { message = Msg = #basic_message { guid = Guid } }}, MQ1} -> @@ -449,28 +474,41 @@ process_instruction({publish, Deliver, Guid, MsgProps, ChPid}, guid_to_channel = GTC2 } end}; {{value, #delivery {}}, _MQ1} -> - %% throw away the instruction: we'll never receive - %% the message to which it corresponds. - {processed, State} + MQ1 = queue:in_r(fake_delivery(Q, Guid, ChPid), MQ), + State1 = State #state { + sender_queues = dict:store(ChPid, MQ1, SQ), + fakes = sets:add_element(Guid, Fakes) }, + process_instruction(Flush, Instr, State1) end end; -process_instruction({set_length, Length}, +process_instruction(_Flush, {set_length, Length}, State = #state { backing_queue = BQ, - backing_queue_state = BQS }) -> + backing_queue_state = BQS, + fakes = Fakes }) -> QLen = BQ:len(BQS), ToDrop = QLen - Length, {processed, case ToDrop > 0 of - true -> BQS1 = lists:foldl( - fun (const, BQSN) -> BQ:fetch(false, BQSN) end, - BQS, lists:duplicate(ToDrop, const)), - State #state { backing_queue_state = BQS1 }; - false -> State + true -> + {Guids, BQS1} = + lists:foldl( + fun (const, {GuidsN, BQSN}) -> + {{#basic_message { guid = Guid }, _IsDelivered, + _AckTag, _Remaining}, BQSN1} = + BQ:fetch(false, BQSN), + {[Guid | GuidsN], BQSN1} + end, BQS, lists:duplicate(ToDrop, const)), + Fakes1 = sets:difference(Fakes, sets:from_list(Guids)), + State #state { backing_queue_state = BQS1, + fakes = Fakes1 }; + false -> + State end}; -process_instruction({fetch, AckRequired, Guid, Remaining}, +process_instruction(_Flush, {fetch, AckRequired, Guid, Remaining}, State = #state { backing_queue = BQ, backing_queue_state = BQS, - guid_ack = GA }) -> + guid_ack = GA, + fakes = Fakes }) -> QLen = BQ:len(BQS), {processed, case QLen - 1 of @@ -481,22 +519,28 @@ process_instruction({fetch, AckRequired, Guid, Remaining}, true -> dict:store(Guid, AckTag, GA); false -> GA end, + Fakes1 = sets:del_element(Guid, Fakes), State #state { backing_queue_state = BQS1, - guid_ack = GA1 }; + guid_ack = GA1, + fakes = Fakes1 }; Other when Other < Remaining -> %% we must be shorter than the master + false = sets:is_element(Guid, Fakes), %% ASSERTION State end}; -process_instruction({ack, Guids}, +process_instruction(_Flush, {ack, Guids}, State = #state { backing_queue = BQ, backing_queue_state = BQS, - guid_ack = GA }) -> + guid_ack = GA, + fakes = Fakes }) -> {AckTags, GA1} = guids_to_acktags(Guids, GA), {Guids1, BQS1} = BQ:ack(AckTags, BQS), [] = Guids1 -- Guids, %% ASSERTION + Fakes1 = sets:difference(Fakes, sets:from_list(Guids)), {processed, State #state { guid_ack = GA1, - backing_queue_state = BQS1 }}; -process_instruction({requeue, MsgPropsFun, Guids}, + backing_queue_state = BQS1, + fakes = Fakes1 }}; +process_instruction(_Flush, {requeue, MsgPropsFun, Guids}, State = #state { backing_queue = BQ, backing_queue_state = BQS, guid_ack = GA }) -> @@ -515,7 +559,7 @@ process_instruction({requeue, MsgPropsFun, Guids}, State #state { guid_ack = dict:new(), backing_queue_state = BQS2 } end}; -process_instruction(delete_and_terminate, +process_instruction(_Flush, delete_and_terminate, State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> BQ:delete_and_terminate(BQS), @@ -534,3 +578,14 @@ guids_to_acktags(Guids, GA) -> ack_all(BQ, GA, BQS) -> BQ:ack([AckTag || {_Guid, AckTag} <- dict:to_list(GA)], BQS). + +fake_delivery(#amqqueue { name = QueueName }, Guid, ChPid) -> + ExchangeName = rabbit_misc:r(QueueName, exchange, <<>>), + Msg = (rabbit_basic:message(ExchangeName, <<>>, #'P_basic'{}, <<>>)) + #basic_message { guid = Guid }, + #delivery { mandatory = false, + immediate = false, + txn = none, + sender = ChPid, + message = Msg, + msg_seq_no = undefined }. -- cgit v1.2.1 From ac7bba488a805bf0e8248c42f861ccd7cb3aba63 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Mon, 20 Dec 2010 13:56:21 +0000 Subject: Revert the previous changeset as I've decided to solve this differently --- src/rabbit_mirror_queue_master.erl | 63 ++++++------------- src/rabbit_mirror_queue_slave.erl | 125 +++++++++++-------------------------- 2 files changed, 53 insertions(+), 135 deletions(-) diff --git a/src/rabbit_mirror_queue_master.erl b/src/rabbit_mirror_queue_master.erl index 4628796f..0d64ab8e 100644 --- a/src/rabbit_mirror_queue_master.erl +++ b/src/rabbit_mirror_queue_master.erl @@ -26,7 +26,7 @@ -export([start/1, stop/0]). --export([promote_backing_queue_state/5]). +-export([promote_backing_queue_state/4]). -behaviour(rabbit_backing_queue). @@ -36,8 +36,7 @@ coordinator, backing_queue, backing_queue_state, - set_delivered, - fakes + set_delivered }). %% --------------------------------------------------------------------------- @@ -65,16 +64,14 @@ init(#amqqueue { arguments = Args } = Q, Recover) -> coordinator = CPid, backing_queue = BQ, backing_queue_state = BQS, - set_delivered = 0, - fakes = sets:new() }. + set_delivered = 0 }. -promote_backing_queue_state(CPid, BQ, BQS, GM, Fakes) -> +promote_backing_queue_state(CPid, BQ, BQS, GM) -> #state { gm = GM, coordinator = CPid, backing_queue = BQ, backing_queue_state = BQS, - set_delivered = BQ:len(BQS), - fakes = Fakes }. + set_delivered = BQ:len(BQS) }. terminate(State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> %% Backing queue termination. The queue is going down but @@ -129,54 +126,30 @@ dropwhile(Fun, State = #state { gm = GM, fetch(AckRequired, State = #state { gm = GM, backing_queue = BQ, backing_queue_state = BQS, - set_delivered = SetDelivered, - fakes = Fakes }) -> + set_delivered = SetDelivered }) -> {Result, BQS1} = BQ:fetch(AckRequired, BQS), + State1 = State #state { backing_queue_state = BQS1 }, case Result of empty -> - {Result, State #state { backing_queue_state = BQS1 }}; + {Result, State1}; {#basic_message { guid = Guid } = Message, IsDelivered, AckTag, Remaining} -> + ok = gm:broadcast(GM, {fetch, AckRequired, Guid, Remaining}), + IsDelivered1 = IsDelivered orelse SetDelivered > 0, SetDelivered1 = lists:max([0, SetDelivered - 1]), - case sets:is_element(Guid, Fakes) of - true -> - {BQS2, Fakes1} = - case AckRequired of - true -> {[Guid], BQS3} = BQ:ack([AckTag], BQS1), - {BQS3, Fakes}; - false -> {BQS1, sets:del_element(Guid, Fakes)} - end, - ok = gm:broadcast(GM, {fetch, false, Guid, Remaining}), - fetch(AckRequired, - State #state { backing_queue_state = BQS2, - set_delivered = SetDelivered1, - fakes = Fakes1 }); - false -> - ok = gm:broadcast(GM, - {fetch, AckRequired, Guid, Remaining}), - IsDelivered1 = IsDelivered orelse SetDelivered > 0, - Fakes1 = case SetDelivered + SetDelivered1 of - 1 -> sets:new(); %% transition to 0 - _ -> Fakes - end, - {{Message, IsDelivered1, AckTag, Remaining}, - State #state { backing_queue_state = BQS1, - set_delivered = SetDelivered1, - fakes = Fakes1 }} - end + {{Message, IsDelivered1, AckTag, Remaining}, + State1 #state { set_delivered = SetDelivered1 }} end. ack(AckTags, State = #state { gm = GM, backing_queue = BQ, - backing_queue_state = BQS, - fakes = Fakes }) -> + backing_queue_state = BQS }) -> {Guids, BQS1} = BQ:ack(AckTags, BQS), - Fakes1 = case Guids of - [] -> Fakes; - _ -> ok = gm:broadcast(GM, {ack, Guids}), - sets:difference(Fakes, sets:from_list(Guids)) - end, - {Guids, State #state { backing_queue_state = BQS1, fakes = Fakes1 }}. + case Guids of + [] -> ok; + _ -> ok = gm:broadcast(GM, {ack, Guids}) + end, + {Guids, State #state { backing_queue_state = BQS1 }}. tx_publish(Txn, Msg, MsgProps, ChPid, #state {} = State) -> %% gm:broadcast(GM, {tx_publish, Txn, Guid, MsgProps, ChPid}) diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index 0134787c..f124bc9e 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -75,7 +75,6 @@ -behaviour(gm). -include("rabbit.hrl"). --include("rabbit_framing.hrl"). -include("gm_specs.hrl"). -record(state, { q, @@ -88,7 +87,6 @@ sender_queues, %% :: Pid -> MsgQ guid_ack, %% :: Guid -> AckTag instructions, %% :: InstrQ - fakes, %% :: Set Guid guid_to_channel %% for confirms }). @@ -143,7 +141,6 @@ init([#amqqueue { name = QueueName } = Q]) -> sender_queues = dict:new(), guid_ack = dict:new(), instructions = queue:new(), - fakes = sets:new(), guid_to_channel = dict:new() }, hibernate, @@ -193,7 +190,7 @@ handle_cast({maybe_run_queue_via_backing_queue, Fun}, State) -> handle_cast({gm, Instruction}, State = #state { instructions = InstrQ }) -> State1 = State #state { instructions = queue:in(Instruction, InstrQ) }, case queue:is_empty(InstrQ) of - true -> handle_process_result(process_instructions(false, State1)); + true -> handle_process_result(process_instructions(State1)); false -> noreply(State1) end; @@ -323,25 +320,21 @@ maybe_confirm_message(Guid, GTC) -> handle_process_result({continue, State}) -> noreply(State); handle_process_result({stop, State}) -> {stop, normal, State}. -promote_me(From, State = #state { q = Q }) -> +promote_me(From, #state { q = Q, + gm = GM, + backing_queue = BQ, + backing_queue_state = BQS, + rate_timer_ref = RateTRef, + sender_queues = SQ, + guid_ack = GA }) -> rabbit_log:info("Promoting slave ~p for queue ~p~n", [self(), Q #amqqueue.name]), - #state { gm = GM, - backing_queue = BQ, - backing_queue_state = BQS, - rate_timer_ref = RateTRef, - sender_queues = SQ, - guid_ack = GA, - instructions = Instr, - fakes = Fakes } = - process_instructions(true, State), - true = queue:is_empty(Instr), %% ASSERTION {ok, CPid} = rabbit_mirror_queue_coordinator:start_link(Q, GM), true = unlink(GM), gen_server2:reply(From, {promote, CPid}), ok = gm:confirmed_broadcast(GM, heartbeat), MasterState = rabbit_mirror_queue_master:promote_backing_queue_state( - CPid, BQ, BQS, GM, Fakes), + CPid, BQ, BQS, GM), %% We have to do the requeue via this init because otherwise we %% don't have access to the relevent MsgPropsFun. Also, we are %% already in mnesia as the master queue pid. Thus we cannot just @@ -394,19 +387,19 @@ enqueue_message(Delivery = #delivery { sender = ChPid }, SQ1 = dict:store(ChPid, queue:in(Delivery, Q), SQ), State1 = State #state { sender_queues = SQ1 }, case queue:is_empty(Q) of - true -> process_instructions(false, State1); + true -> process_instructions(State1); false -> {continue, State1} end. -process_instructions(Flush, State = #state { instructions = InstrQ }) -> +process_instructions(State = #state { instructions = InstrQ }) -> case queue:out(InstrQ) of {empty, _InstrQ} -> {continue, State}; {{value, Instr}, InstrQ1} -> - case process_instruction(Flush, Instr, State) of + case process_instruction(Instr, State) of {processed, State1} -> process_instructions( - Flush, State1 #state { instructions = InstrQ1 }); + State1 #state { instructions = InstrQ1 }); {stop, State1} -> {stop, State1 #state { instructions = InstrQ1 }}; blocked -> @@ -414,38 +407,20 @@ process_instructions(Flush, State = #state { instructions = InstrQ }) -> end end. -process_instruction(Flush, {publish, Deliver, Guid, MsgProps, ChPid} = Instr, +process_instruction({publish, Deliver, Guid, MsgProps, ChPid}, State = #state { q = Q, sender_queues = SQ, backing_queue = BQ, backing_queue_state = BQS, guid_ack = GA, - guid_to_channel = GTC, - fakes = Fakes }) -> + guid_to_channel = GTC }) -> case dict:find(ChPid, SQ) of error -> - case Flush of - true -> MQ = queue:from_list([fake_delivery(Q, Guid, ChPid)]), - State1 = State #state { - sender_queues = dict:store(ChPid, MQ, SQ), - fakes = sets:add_element(Guid, Fakes) }, - process_instruction(Flush, Instr, State1); - false -> blocked - end; + blocked; {ok, MQ} -> case queue:out(MQ) of {empty, _MQ} -> - case Flush of - true -> - MQ1 = queue:in_r(fake_delivery(Q, Guid, ChPid), MQ), - SQ1 = dict:store(ChPid, MQ1, SQ), - State1 = State #state { - sender_queues = SQ1, - fakes = sets:add_element(Guid, Fakes) }, - process_instruction(Flush, Instr, State1); - false -> - blocked - end; + blocked; {{value, Delivery = #delivery { message = Msg = #basic_message { guid = Guid } }}, MQ1} -> @@ -474,41 +449,28 @@ process_instruction(Flush, {publish, Deliver, Guid, MsgProps, ChPid} = Instr, guid_to_channel = GTC2 } end}; {{value, #delivery {}}, _MQ1} -> - MQ1 = queue:in_r(fake_delivery(Q, Guid, ChPid), MQ), - State1 = State #state { - sender_queues = dict:store(ChPid, MQ1, SQ), - fakes = sets:add_element(Guid, Fakes) }, - process_instruction(Flush, Instr, State1) + %% throw away the instruction: we'll never receive + %% the message to which it corresponds. + {processed, State} end end; -process_instruction(_Flush, {set_length, Length}, +process_instruction({set_length, Length}, State = #state { backing_queue = BQ, - backing_queue_state = BQS, - fakes = Fakes }) -> + backing_queue_state = BQS }) -> QLen = BQ:len(BQS), ToDrop = QLen - Length, {processed, case ToDrop > 0 of - true -> - {Guids, BQS1} = - lists:foldl( - fun (const, {GuidsN, BQSN}) -> - {{#basic_message { guid = Guid }, _IsDelivered, - _AckTag, _Remaining}, BQSN1} = - BQ:fetch(false, BQSN), - {[Guid | GuidsN], BQSN1} - end, BQS, lists:duplicate(ToDrop, const)), - Fakes1 = sets:difference(Fakes, sets:from_list(Guids)), - State #state { backing_queue_state = BQS1, - fakes = Fakes1 }; - false -> - State + true -> BQS1 = lists:foldl( + fun (const, BQSN) -> BQ:fetch(false, BQSN) end, + BQS, lists:duplicate(ToDrop, const)), + State #state { backing_queue_state = BQS1 }; + false -> State end}; -process_instruction(_Flush, {fetch, AckRequired, Guid, Remaining}, +process_instruction({fetch, AckRequired, Guid, Remaining}, State = #state { backing_queue = BQ, backing_queue_state = BQS, - guid_ack = GA, - fakes = Fakes }) -> + guid_ack = GA }) -> QLen = BQ:len(BQS), {processed, case QLen - 1 of @@ -519,28 +481,22 @@ process_instruction(_Flush, {fetch, AckRequired, Guid, Remaining}, true -> dict:store(Guid, AckTag, GA); false -> GA end, - Fakes1 = sets:del_element(Guid, Fakes), State #state { backing_queue_state = BQS1, - guid_ack = GA1, - fakes = Fakes1 }; + guid_ack = GA1 }; Other when Other < Remaining -> %% we must be shorter than the master - false = sets:is_element(Guid, Fakes), %% ASSERTION State end}; -process_instruction(_Flush, {ack, Guids}, +process_instruction({ack, Guids}, State = #state { backing_queue = BQ, backing_queue_state = BQS, - guid_ack = GA, - fakes = Fakes }) -> + guid_ack = GA }) -> {AckTags, GA1} = guids_to_acktags(Guids, GA), {Guids1, BQS1} = BQ:ack(AckTags, BQS), [] = Guids1 -- Guids, %% ASSERTION - Fakes1 = sets:difference(Fakes, sets:from_list(Guids)), {processed, State #state { guid_ack = GA1, - backing_queue_state = BQS1, - fakes = Fakes1 }}; -process_instruction(_Flush, {requeue, MsgPropsFun, Guids}, + backing_queue_state = BQS1 }}; +process_instruction({requeue, MsgPropsFun, Guids}, State = #state { backing_queue = BQ, backing_queue_state = BQS, guid_ack = GA }) -> @@ -559,7 +515,7 @@ process_instruction(_Flush, {requeue, MsgPropsFun, Guids}, State #state { guid_ack = dict:new(), backing_queue_state = BQS2 } end}; -process_instruction(_Flush, delete_and_terminate, +process_instruction(delete_and_terminate, State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> BQ:delete_and_terminate(BQS), @@ -578,14 +534,3 @@ guids_to_acktags(Guids, GA) -> ack_all(BQ, GA, BQS) -> BQ:ack([AckTag || {_Guid, AckTag} <- dict:to_list(GA)], BQS). - -fake_delivery(#amqqueue { name = QueueName }, Guid, ChPid) -> - ExchangeName = rabbit_misc:r(QueueName, exchange, <<>>), - Msg = (rabbit_basic:message(ExchangeName, <<>>, #'P_basic'{}, <<>>)) - #basic_message { guid = Guid }, - #delivery { mandatory = false, - immediate = false, - txn = none, - sender = ChPid, - message = Msg, - msg_seq_no = undefined }. -- cgit v1.2.1 From 43236b06f80d58380a109ac2d2c8325bf8385004 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Mon, 20 Dec 2010 15:48:31 +0000 Subject: Give in and have the master put the pub msgs themselves on the gm. Avoiding this proves far too complex in all the failure cases (the worst being when the publishing node crashes - the master can receive the msg, but not the slaves. Worse, because of complexities like delegates, it's not even straightforward to monitor the publishers in order to be sure we're not going to receive more messages from them). We continue to have all msgs directly routed to all queues. Yes, this means that normally every slave receives every message twice, but this is genuinely the simplest and most secure route and protects against failures the best. --- src/rabbit_mirror_queue_master.erl | 62 ++++++---- src/rabbit_mirror_queue_slave.erl | 246 ++++++++++++++++++------------------- 2 files changed, 159 insertions(+), 149 deletions(-) diff --git a/src/rabbit_mirror_queue_master.erl b/src/rabbit_mirror_queue_master.erl index 0d64ab8e..94e93b3e 100644 --- a/src/rabbit_mirror_queue_master.erl +++ b/src/rabbit_mirror_queue_master.erl @@ -26,7 +26,7 @@ -export([start/1, stop/0]). --export([promote_backing_queue_state/4]). +-export([promote_backing_queue_state/5]). -behaviour(rabbit_backing_queue). @@ -36,7 +36,8 @@ coordinator, backing_queue, backing_queue_state, - set_delivered + set_delivered, + seen }). %% --------------------------------------------------------------------------- @@ -64,14 +65,16 @@ init(#amqqueue { arguments = Args } = Q, Recover) -> coordinator = CPid, backing_queue = BQ, backing_queue_state = BQS, - set_delivered = 0 }. + set_delivered = 0, + seen = sets:new() }. -promote_backing_queue_state(CPid, BQ, BQS, GM) -> +promote_backing_queue_state(CPid, BQ, BQS, GM, Seen) -> #state { gm = GM, coordinator = CPid, backing_queue = BQ, backing_queue_state = BQS, - set_delivered = BQ:len(BQS) }. + set_delivered = BQ:len(BQS), + seen = Seen }. terminate(State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> %% Backing queue termination. The queue is going down but @@ -94,22 +97,31 @@ purge(State = #state { gm = GM, {Count, State #state { backing_queue_state = BQS1, set_delivered = 0 }}. -publish(Msg = #basic_message { guid = Guid }, - MsgProps, ChPid, State = #state { gm = GM, +publish(Msg = #basic_message { guid = Guid }, MsgProps, ChPid, + State = #state { gm = GM, + backing_queue = BQ, + backing_queue_state = BQS, + seen = Seen }) -> + case sets:is_element(Guid, Seen) of + true -> State #state { seen = sets:del_element(Guid, Seen) }; + false -> ok = gm:broadcast(GM, {publish, false, ChPid, MsgProps, Msg}), + BQS1 = BQ:publish(Msg, MsgProps, ChPid, BQS), + State #state { backing_queue_state = BQS1 } + end. + +publish_delivered(AckRequired, Msg = #basic_message { guid = Guid }, MsgProps, + ChPid, State = #state { gm = GM, backing_queue = BQ, - backing_queue_state = BQS }) -> - ok = gm:broadcast(GM, {publish, false, Guid, MsgProps, ChPid}), - BQS1 = BQ:publish(Msg, MsgProps, ChPid, BQS), - State #state { backing_queue_state = BQS1 }. - -publish_delivered(AckRequired, Msg = #basic_message { guid = Guid }, - MsgProps, ChPid, - State = #state { gm = GM, - backing_queue = BQ, - backing_queue_state = BQS }) -> - ok = gm:broadcast(GM, {publish, {true, AckRequired}, Guid, MsgProps, ChPid}), - {AckTag, BQS1} = BQ:publish_delivered(AckRequired, Msg, MsgProps, ChPid, BQS), - {AckTag, State #state { backing_queue_state = BQS1 }}. + backing_queue_state = BQS, + seen = Seen }) -> + case sets:is_element(Guid, Seen) of + true -> State #state { seen = sets:del_element(Guid, Seen) }; + false -> ok = gm:broadcast(GM, {publish, {true, AckRequired}, ChPid, + MsgProps, Msg}), + {AckTag, BQS1} = BQ:publish_delivered(AckRequired, Msg, + MsgProps, ChPid, BQS), + {AckTag, State #state { backing_queue_state = BQS1 }} + end. dropwhile(Fun, State = #state { gm = GM, backing_queue = BQ, @@ -126,7 +138,8 @@ dropwhile(Fun, State = #state { gm = GM, fetch(AckRequired, State = #state { gm = GM, backing_queue = BQ, backing_queue_state = BQS, - set_delivered = SetDelivered }) -> + set_delivered = SetDelivered, + seen = Seen }) -> {Result, BQS1} = BQ:fetch(AckRequired, BQS), State1 = State #state { backing_queue_state = BQS1 }, case Result of @@ -137,8 +150,13 @@ fetch(AckRequired, State = #state { gm = GM, ok = gm:broadcast(GM, {fetch, AckRequired, Guid, Remaining}), IsDelivered1 = IsDelivered orelse SetDelivered > 0, SetDelivered1 = lists:max([0, SetDelivered - 1]), + Seen1 = case SetDelivered + SetDelivered1 of + 1 -> sets:new(); %% transition to empty + _ -> Seen + end, {{Message, IsDelivered1, AckTag, Remaining}, - State1 #state { set_delivered = SetDelivered1 }} + State1 #state { set_delivered = SetDelivered1, + seen = Seen1 }} end. ack(AckTags, State = #state { gm = GM, diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index f124bc9e..deb1cc66 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -86,7 +86,7 @@ sender_queues, %% :: Pid -> MsgQ guid_ack, %% :: Guid -> AckTag - instructions, %% :: InstrQ + seen, %% Set Guid guid_to_channel %% for confirms }). @@ -140,7 +140,7 @@ init([#amqqueue { name = QueueName } = Q]) -> sender_queues = dict:new(), guid_ack = dict:new(), - instructions = queue:new(), + seen = sets:new(), guid_to_channel = dict:new() }, hibernate, @@ -153,12 +153,12 @@ init([#amqqueue { name = QueueName } = Q]) -> handle_call({deliver_immediately, Delivery = #delivery {}}, From, State) -> %% Synchronous, "immediate" delivery mode gen_server2:reply(From, false), %% master may deliver it, not us - handle_process_result(enqueue_message(Delivery, State)); + noreply(maybe_enqueue_message(Delivery, State)); handle_call({deliver, Delivery = #delivery {}}, From, State) -> %% Synchronous, "mandatory" delivery mode gen_server2:reply(From, true), %% amqqueue throws away the result anyway - handle_process_result(enqueue_message(Delivery, State)); + noreply(maybe_enqueue_message(Delivery, State)); handle_call({gm_deaths, Deaths}, From, State = #state { q = #amqqueue { name = QueueName }, @@ -187,16 +187,12 @@ handle_call({maybe_run_queue_via_backing_queue, Fun}, _From, State) -> handle_cast({maybe_run_queue_via_backing_queue, Fun}, State) -> noreply(maybe_run_queue_via_backing_queue(Fun, State)); -handle_cast({gm, Instruction}, State = #state { instructions = InstrQ }) -> - State1 = State #state { instructions = queue:in(Instruction, InstrQ) }, - case queue:is_empty(InstrQ) of - true -> handle_process_result(process_instructions(State1)); - false -> noreply(State1) - end; +handle_cast({gm, Instruction}, State) -> + handle_process_result(process_instruction(Instruction, State)); handle_cast({deliver, Delivery = #delivery {}}, State) -> %% Asynchronous, non-"mandatory", non-"immediate" deliver mode. - handle_process_result(enqueue_message(Delivery, State)); + noreply(maybe_enqueue_message(Delivery, State)); handle_cast({set_maximum_since_use, Age}, State) -> ok = file_handle_cache:set_maximum_since_use(Age), @@ -317,8 +313,8 @@ maybe_confirm_message(Guid, GTC) -> GTC end. -handle_process_result({continue, State}) -> noreply(State); -handle_process_result({stop, State}) -> {stop, normal, State}. +handle_process_result({ok, State}) -> noreply(State); +handle_process_result({stop, State}) -> {stop, normal, State}. promote_me(From, #state { q = Q, gm = GM, @@ -326,6 +322,7 @@ promote_me(From, #state { q = Q, backing_queue_state = BQS, rate_timer_ref = RateTRef, sender_queues = SQ, + seen = Seen, guid_ack = GA }) -> rabbit_log:info("Promoting slave ~p for queue ~p~n", [self(), Q #amqqueue.name]), @@ -334,7 +331,7 @@ promote_me(From, #state { q = Q, gen_server2:reply(From, {promote, CPid}), ok = gm:confirmed_broadcast(GM, heartbeat), MasterState = rabbit_mirror_queue_master:promote_backing_queue_state( - CPid, BQ, BQS, GM), + CPid, BQ, BQS, GM, Seen), %% We have to do the requeue via this init because otherwise we %% don't have access to the relevent MsgPropsFun. Also, we are %% already in mnesia as the master queue pid. Thus we cannot just @@ -378,115 +375,111 @@ stop_rate_timer(State = #state { rate_timer_ref = TRef }) -> {ok, cancel} = timer:cancel(TRef), State #state { rate_timer_ref = undefined }. -enqueue_message(Delivery = #delivery { sender = ChPid }, - State = #state { sender_queues = SQ }) -> - Q = case dict:find(ChPid, SQ) of - {ok, Q1} -> Q1; - error -> queue:new() - end, - SQ1 = dict:store(ChPid, queue:in(Delivery, Q), SQ), - State1 = State #state { sender_queues = SQ1 }, - case queue:is_empty(Q) of - true -> process_instructions(State1); - false -> {continue, State1} - end. - -process_instructions(State = #state { instructions = InstrQ }) -> - case queue:out(InstrQ) of - {empty, _InstrQ} -> - {continue, State}; - {{value, Instr}, InstrQ1} -> - case process_instruction(Instr, State) of - {processed, State1} -> - process_instructions( - State1 #state { instructions = InstrQ1 }); - {stop, State1} -> - {stop, State1 #state { instructions = InstrQ1 }}; - blocked -> - {continue, State} - end +maybe_enqueue_message( + Delivery = #delivery { message = #basic_message { guid = Guid }, + sender = ChPid }, + State = #state { q = Q, + sender_queues = SQ, + seen = Seen, + guid_to_channel = GTC }) -> + case sets:is_element(Guid, Seen) of + true -> + GTC1 = record_confirm_or_confirm(Delivery, Q, GTC), + State #state { guid_to_channel = GTC1, + seen = sets:del_element(Guid, Seen) }; + false -> + MQ = case dict:find(ChPid, SQ) of + {ok, MQ1} -> MQ1; + error -> queue:new() + end, + SQ1 = dict:store(ChPid, queue:in(Delivery, MQ), SQ), + State #state { sender_queues = SQ1 } end. -process_instruction({publish, Deliver, Guid, MsgProps, ChPid}, - State = #state { q = Q, - sender_queues = SQ, - backing_queue = BQ, - backing_queue_state = BQS, - guid_ack = GA, - guid_to_channel = GTC }) -> - case dict:find(ChPid, SQ) of - error -> - blocked; - {ok, MQ} -> - case queue:out(MQ) of - {empty, _MQ} -> - blocked; - {{value, Delivery = #delivery { - message = Msg = #basic_message { guid = Guid } }}, - MQ1} -> - State1 = State #state { sender_queues = - dict:store(ChPid, MQ1, SQ) }, - GTC1 = record_confirm_or_confirm(Delivery, Q, GTC), - {processed, - case Deliver of - false -> - BQS1 = BQ:publish(Msg, MsgProps, ChPid, BQS), - State1 #state { backing_queue_state = BQS1, - guid_to_channel = GTC1 }; - {true, AckRequired} -> - {AckTag, BQS1} = BQ:publish_delivered( - AckRequired, Msg, MsgProps, - ChPid, BQS), - {GA1, GTC2} = - case AckRequired of - true -> - {dict:store(Guid, AckTag, GA), GTC1}; - false -> - {GA, maybe_confirm_message(Guid, GTC1)} - end, - State1 #state { backing_queue_state = BQS1, - guid_ack = GA1, - guid_to_channel = GTC2 } - end}; - {{value, #delivery {}}, _MQ1} -> - %% throw away the instruction: we'll never receive - %% the message to which it corresponds. - {processed, State} - end - end; +process_instruction( + {publish, Deliver, ChPid, MsgProps, Msg = #basic_message { guid = Guid }}, + State = #state { q = Q, + sender_queues = SQ, + backing_queue = BQ, + backing_queue_state = BQS, + guid_ack = GA, + seen = Seen, + guid_to_channel = GTC }) -> + {SQ1, Seen1, GTC1} = + case dict:find(ChPid, SQ) of + error -> + {SQ, sets:add_element(Guid, Seen), GTC}; + {ok, MQ} -> + case queue:out(MQ) of + {empty, _MQ} -> + {SQ, sets:add_element(Guid, Seen), GTC}; + {{value, Delivery = #delivery { + message = #basic_message { guid = Guid } }}, + MQ1} -> + GTC2 = record_confirm_or_confirm(Delivery, Q, GTC), + {dict:store(ChPid, MQ1, SQ), Seen, GTC2}; + {{value, #delivery {}}, _MQ1} -> + %% The instruction was sent to us before we + %% were within the mirror_pids within the + %% amqqueue record. We'll never receive the + %% message directly. + {SQ, Seen, GTC} + end + end, + State1 = State #state { sender_queues = SQ1, + seen = Seen1, + guid_to_channel = GTC1 }, + {ok, + case Deliver of + false -> + BQS1 = BQ:publish(Msg, MsgProps, ChPid, BQS), + State1 #state { backing_queue_state = BQS1 }; + {true, AckRequired} -> + {AckTag, BQS1} = BQ:publish_delivered(AckRequired, Msg, MsgProps, + ChPid, BQS), + {GA1, GTC3} = case AckRequired of + true -> {dict:store(Guid, AckTag, GA), GTC1}; + false -> {GA, maybe_confirm_message(Guid, GTC1)} + end, + State1 #state { backing_queue_state = BQS1, + guid_ack = GA1, + guid_to_channel = GTC3 } + end}; process_instruction({set_length, Length}, State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> QLen = BQ:len(BQS), ToDrop = QLen - Length, - {processed, - case ToDrop > 0 of - true -> BQS1 = lists:foldl( - fun (const, BQSN) -> BQ:fetch(false, BQSN) end, - BQS, lists:duplicate(ToDrop, const)), - State #state { backing_queue_state = BQS1 }; - false -> State - end}; + {ok, case ToDrop > 0 of + true -> BQS1 = + lists:foldl( + fun (const, BQSN) -> + {{_Msg, _IsDelivered, _AckTag, _Remaining}, + BQSN1} = BQ:fetch(false, BQSN), + BQSN1 + end, BQS, lists:duplicate(ToDrop, const)), + State #state { backing_queue_state = BQS1 }; + false -> State + end}; process_instruction({fetch, AckRequired, Guid, Remaining}, State = #state { backing_queue = BQ, backing_queue_state = BQS, guid_ack = GA }) -> QLen = BQ:len(BQS), - {processed, - case QLen - 1 of - Remaining -> - {{_Msg, _IsDelivered, AckTag, Remaining}, BQS1} = - BQ:fetch(AckRequired, BQS), - GA1 = case AckRequired of - true -> dict:store(Guid, AckTag, GA); - false -> GA - end, - State #state { backing_queue_state = BQS1, - guid_ack = GA1 }; - Other when Other < Remaining -> - %% we must be shorter than the master - State - end}; + {ok, case QLen - 1 of + Remaining -> + {{_Msg, _IsDelivered, AckTag, Remaining}, BQS1} = + BQ:fetch(AckRequired, BQS), + GA1 = case AckRequired of + true -> dict:store(Guid, AckTag, GA); + false -> GA + end, + State #state { backing_queue_state = BQS1, + guid_ack = GA1 }; + Other when Other < Remaining -> + %% we must be shorter than the master + State + end}; process_instruction({ack, Guids}, State = #state { backing_queue = BQ, backing_queue_state = BQS, @@ -494,27 +487,26 @@ process_instruction({ack, Guids}, {AckTags, GA1} = guids_to_acktags(Guids, GA), {Guids1, BQS1} = BQ:ack(AckTags, BQS), [] = Guids1 -- Guids, %% ASSERTION - {processed, State #state { guid_ack = GA1, - backing_queue_state = BQS1 }}; + {ok, State #state { guid_ack = GA1, + backing_queue_state = BQS1 }}; process_instruction({requeue, MsgPropsFun, Guids}, State = #state { backing_queue = BQ, backing_queue_state = BQS, guid_ack = GA }) -> {AckTags, GA1} = guids_to_acktags(Guids, GA), - {processed, - case length(AckTags) =:= length(Guids) of - true -> - {Guids, BQS1} = BQ:requeue(AckTags, MsgPropsFun, BQS), - State #state { guid_ack = GA1, - backing_queue_state = BQS1 }; - false -> - %% the only thing we can safely do is nuke out our BQ and - %% GA - {_Count, BQS1} = BQ:purge(BQS), - {Guids, BQS2} = ack_all(BQ, GA, BQS1), - State #state { guid_ack = dict:new(), - backing_queue_state = BQS2 } - end}; + {ok, case length(AckTags) =:= length(Guids) of + true -> + {Guids, BQS1} = BQ:requeue(AckTags, MsgPropsFun, BQS), + State #state { guid_ack = GA1, + backing_queue_state = BQS1 }; + false -> + %% the only thing we can safely do is nuke out our BQ + %% and GA + {_Count, BQS1} = BQ:purge(BQS), + {Guids, BQS2} = ack_all(BQ, GA, BQS1), + State #state { guid_ack = dict:new(), + backing_queue_state = BQS2 } + end}; process_instruction(delete_and_terminate, State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> -- cgit v1.2.1 From 51d312ef31f3ec74e16750f1c206b6d36933c7be Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Mon, 20 Dec 2010 16:17:04 +0000 Subject: Remove R14-ism. Also, cute feature - if you specify no nodes by name, you mean all known nodes --- src/rabbit_mirror_queue_master.erl | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/rabbit_mirror_queue_master.erl b/src/rabbit_mirror_queue_master.erl index 94e93b3e..4f1bcc9d 100644 --- a/src/rabbit_mirror_queue_master.erl +++ b/src/rabbit_mirror_queue_master.erl @@ -57,8 +57,12 @@ init(#amqqueue { arguments = Args } = Q, Recover) -> {ok, CPid} = rabbit_mirror_queue_coordinator:start_link(Q, undefined), GM = rabbit_mirror_queue_coordinator:get_gm(CPid), {_Type, Nodes} = rabbit_misc:table_lookup(Args, <<"x-mirror">>), - [rabbit_mirror_queue_coordinator:add_slave(CPid, binary_to_atom(Node, utf8)) - || {longstr, Node} <- Nodes], + Nodes1 = case Nodes of + [] -> nodes(); + _ -> [list_to_atom(binary_to_list(Node)) || + {longstr, Node} <- Nodes] + end, + [rabbit_mirror_queue_coordinator:add_slave(CPid, Node) || Node <- Nodes1], {ok, BQ} = application:get_env(backing_queue_module), BQS = BQ:init(Q, Recover), #state { gm = GM, -- cgit v1.2.1 From a1a8b18d66376ef14b55ae1ff2b4b8736d97c623 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Tue, 21 Dec 2010 13:23:06 +0000 Subject: Correct all manner of crazy wrongness with maybe_run_queue_via_backing_queue. The principle problem is that the state with which the function is to be run is dependent on the caller. Sometimes it's the 'top level' BQ state (eg invocations called by amqqueue_process). Sometimes it's the 'bottom level' state (eg invocations called by vq or qi or msg_store). We didn't used to have multiple layers, but now we do. Consequently, the invocation must indicate 'where' the fun is to be run. The clearest explanation is in master:invoke/3 --- include/rabbit_backing_queue_spec.hrl | 2 ++ src/rabbit_amqqueue.erl | 21 +++++++------- src/rabbit_amqqueue_process.erl | 38 +++++++++++++------------ src/rabbit_backing_queue.erl | 7 ++++- src/rabbit_mirror_queue_master.erl | 9 +++++- src/rabbit_mirror_queue_slave.erl | 15 +++++----- src/rabbit_variable_queue.erl | 52 +++++++++++++++++++---------------- 7 files changed, 84 insertions(+), 60 deletions(-) diff --git a/include/rabbit_backing_queue_spec.hrl b/include/rabbit_backing_queue_spec.hrl index a330fe1e..fdb144d6 100644 --- a/include/rabbit_backing_queue_spec.hrl +++ b/include/rabbit_backing_queue_spec.hrl @@ -78,3 +78,5 @@ -spec(idle_timeout/1 :: (state()) -> state()). -spec(handle_pre_hibernate/1 :: (state()) -> state()). -spec(status/1 :: (state()) -> [{atom(), any()}]). +-spec(invoke/3 :: (atom(), fun ((A) -> A), state()) -> + {[rabbit_guid:guid()], state()}). diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl index 52a41e71..0abe1198 100644 --- a/src/rabbit_amqqueue.erl +++ b/src/rabbit_amqqueue.erl @@ -33,8 +33,8 @@ -export([start/0, stop/0, declare/5, delete_immediately/1, delete/3, purge/1]). -export([internal_declare/2, internal_delete/1, - maybe_run_queue_via_backing_queue/2, - maybe_run_queue_via_backing_queue_async/2, + maybe_run_queue_via_backing_queue/3, + maybe_run_queue_via_backing_queue_async/3, update_ram_duration/1, set_ram_duration_target/2, set_maximum_since_use/2, maybe_expire/1, drop_expired/1]). -export([pseudo_queue/2]). @@ -151,10 +151,10 @@ -spec(internal_delete/1 :: (name()) -> rabbit_types:ok_or_error('not_found') | rabbit_types:connection_exit()). --spec(maybe_run_queue_via_backing_queue/2 :: - (pid(), (fun ((A) -> {[rabbit_guid:guid()], A}))) -> 'ok'). --spec(maybe_run_queue_via_backing_queue_async/2 :: - (pid(), (fun ((A) -> {[rabbit_guid:guid()], A}))) -> 'ok'). +-spec(maybe_run_queue_via_backing_queue/3 :: + (pid(), atom(), (fun ((A) -> {[rabbit_guid:guid()], A}))) -> 'ok'). +-spec(maybe_run_queue_via_backing_queue_async/3 :: + (pid(), atom(), (fun ((A) -> {[rabbit_guid:guid()], A}))) -> 'ok'). -spec(update_ram_duration/1 :: (pid()) -> 'ok'). -spec(set_ram_duration_target/2 :: (pid(), number() | 'infinity') -> 'ok'). -spec(set_maximum_since_use/2 :: (pid(), non_neg_integer()) -> 'ok'). @@ -459,11 +459,12 @@ internal_delete(QueueName) -> Deletions -> ok = rabbit_binding:process_deletions(Deletions) end. -maybe_run_queue_via_backing_queue(QPid, Fun) -> - gen_server2:call(QPid, {maybe_run_queue_via_backing_queue, Fun}, infinity). +maybe_run_queue_via_backing_queue(QPid, Mod, Fun) -> + gen_server2:call(QPid, {maybe_run_queue_via_backing_queue, Mod, Fun}, + infinity). -maybe_run_queue_via_backing_queue_async(QPid, Fun) -> - gen_server2:cast(QPid, {maybe_run_queue_via_backing_queue, Fun}). +maybe_run_queue_via_backing_queue_async(QPid, Mod, Fun) -> + gen_server2:cast(QPid, {maybe_run_queue_via_backing_queue, Mod, Fun}). update_ram_duration(QPid) -> gen_server2:cast(QPid, update_ram_duration). diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl index 516f7b00..fedfd03a 100644 --- a/src/rabbit_amqqueue_process.erl +++ b/src/rabbit_amqqueue_process.erl @@ -266,7 +266,7 @@ ensure_sync_timer(State = #q{sync_timer_ref = undefined, backing_queue = BQ}) -> {ok, TRef} = timer:apply_after( ?SYNC_INTERVAL, rabbit_amqqueue, maybe_run_queue_via_backing_queue, - [self(), fun (BQS) -> {[], BQ:idle_timeout(BQS)} end]), + [self(), BQ, fun (BQS) -> {[], BQ:idle_timeout(BQS)} end]), State#q{sync_timer_ref = TRef}; ensure_sync_timer(State) -> State. @@ -559,11 +559,11 @@ deliver_or_enqueue(Delivery, State) -> requeue_and_run(AckTags, State = #q{backing_queue = BQ, ttl = TTL}) -> maybe_run_queue_via_backing_queue( - fun (BQS) -> - {_Guids, BQS1} = - BQ:requeue(AckTags, reset_msg_expiry_fun(TTL), BQS), - {[], BQS1} - end, State). + BQ, fun (BQS) -> + {_Guids, BQS1} = + BQ:requeue(AckTags, reset_msg_expiry_fun(TTL), BQS), + {[], BQS1} + end, State). fetch(AckRequired, State = #q{backing_queue_state = BQS, backing_queue = BQ}) -> @@ -665,8 +665,10 @@ maybe_send_reply(ChPid, Msg) -> ok = rabbit_channel:send_command(ChPid, Msg). qname(#q{q = #amqqueue{name = QName}}) -> QName. -maybe_run_queue_via_backing_queue(Fun, State = #q{backing_queue_state = BQS}) -> - {Guids, BQS1} = Fun(BQS), +maybe_run_queue_via_backing_queue(Mod, Fun, + State = #q{backing_queue = BQ, + backing_queue_state = BQS}) -> + {Guids, BQS1} = BQ:invoke(Mod, Fun, BQS), run_message_queue( confirm_messages(Guids, State#q{backing_queue_state = BQS1})). @@ -805,11 +807,11 @@ emit_consumer_deleted(ChPid, ConsumerTag) -> prioritise_call(Msg, _From, _State) -> case Msg of - info -> 9; - {info, _Items} -> 9; - consumers -> 9; - {maybe_run_queue_via_backing_queue, _Fun} -> 6; - _ -> 0 + info -> 9; + {info, _Items} -> 9; + consumers -> 9; + {maybe_run_queue_via_backing_queue, _Mod, _Fun} -> 6; + _ -> 0 end. prioritise_cast(Msg, _State) -> @@ -1040,12 +1042,12 @@ handle_call({requeue, AckTags, ChPid}, From, State) -> noreply(requeue_and_run(AckTags, State)) end; -handle_call({maybe_run_queue_via_backing_queue, Fun}, _From, State) -> - reply(ok, maybe_run_queue_via_backing_queue(Fun, State)). +handle_call({maybe_run_queue_via_backing_queue, Mod, Fun}, _From, State) -> + reply(ok, maybe_run_queue_via_backing_queue(Mod, Fun, State)). -handle_cast({maybe_run_queue_via_backing_queue, Fun}, State) -> - noreply(maybe_run_queue_via_backing_queue(Fun, State)); +handle_cast({maybe_run_queue_via_backing_queue, Mod, Fun}, State) -> + noreply(maybe_run_queue_via_backing_queue(Mod, Fun, State)); handle_cast({deliver, Delivery}, State) -> %% Asynchronous, non-"mandatory", non-"immediate" deliver mode. @@ -1175,7 +1177,7 @@ handle_info({'DOWN', _MonitorRef, process, DownPid, _Reason}, State) -> handle_info(timeout, State = #q{backing_queue = BQ}) -> noreply(maybe_run_queue_via_backing_queue( - fun (BQS) -> {[], BQ:idle_timeout(BQS)} end, State)); + BQ, fun (BQS) -> {[], BQ:idle_timeout(BQS)} end, State)); handle_info({'EXIT', _Pid, Reason}, State) -> {stop, Reason, State}; diff --git a/src/rabbit_backing_queue.erl b/src/rabbit_backing_queue.erl index 7a728498..ffa6982a 100644 --- a/src/rabbit_backing_queue.erl +++ b/src/rabbit_backing_queue.erl @@ -137,7 +137,12 @@ behaviour_info(callbacks) -> %% Exists for debugging purposes, to be able to expose state via %% rabbitmqctl list_queues backing_queue_status - {status, 1} + {status, 1}, + + %% Passed a function to be invoked with the relevant backing + %% queue's state. Useful for when the backing queue or other + %% components need to pass functions into the backing queue. + {invoke, 3} ]; behaviour_info(_Other) -> undefined. diff --git a/src/rabbit_mirror_queue_master.erl b/src/rabbit_mirror_queue_master.erl index 4f1bcc9d..11831a29 100644 --- a/src/rabbit_mirror_queue_master.erl +++ b/src/rabbit_mirror_queue_master.erl @@ -22,7 +22,7 @@ requeue/3, len/1, is_empty/1, dropwhile/2, set_ram_duration_target/2, ram_duration/1, needs_idle_timeout/1, idle_timeout/1, handle_pre_hibernate/1, - status/1]). + status/1, invoke/3]). -export([start/1, stop/0]). @@ -241,3 +241,10 @@ handle_pre_hibernate(State = #state { backing_queue = BQ, status(#state { backing_queue = BQ, backing_queue_state = BQS}) -> BQ:status(BQS). + +invoke(?MODULE, Fun, State) -> + Fun(State); +invoke(Mod, Fun, State = #state { backing_queue = BQ, + backing_queue_state = BQS }) -> + {Guids, BQS1} = BQ:invoke(Mod, Fun, BQS), + {Guids, State #state { backing_queue_state = BQS1 }}. diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index deb1cc66..a58e1579 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -180,12 +180,12 @@ handle_call({gm_deaths, Deaths}, From, {stop, normal, State} end; -handle_call({maybe_run_queue_via_backing_queue, Fun}, _From, State) -> - reply(ok, maybe_run_queue_via_backing_queue(Fun, State)). +handle_call({maybe_run_queue_via_backing_queue, Mod, Fun}, _From, State) -> + reply(ok, maybe_run_queue_via_backing_queue(Mod, Fun, State)). -handle_cast({maybe_run_queue_via_backing_queue, Fun}, State) -> - noreply(maybe_run_queue_via_backing_queue(Fun, State)); +handle_cast({maybe_run_queue_via_backing_queue, Mod, Fun}, State) -> + noreply(maybe_run_queue_via_backing_queue(Mod, Fun, State)); handle_cast({gm, Instruction}, State) -> handle_process_result(process_instruction(Instruction, State)); @@ -284,9 +284,10 @@ handle_msg([SPid], _From, Msg) -> %% --------------------------------------------------------------------------- maybe_run_queue_via_backing_queue( - Fun, State = #state { backing_queue_state = BQS, - guid_to_channel = GTC }) -> - {Guids, BQS1} = Fun(BQS), + Mod, Fun, State = #state { backing_queue = BQ, + backing_queue_state = BQS, + guid_to_channel = GTC }) -> + {Guids, BQS1} = BQ:invoke(Mod, Fun, BQS), GTC1 = lists:foldl(fun maybe_confirm_message/2, GTC, Guids), State #state { backing_queue_state = BQS1, guid_to_channel = GTC1 }. diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl index 09ead22b..ba77d185 100644 --- a/src/rabbit_variable_queue.erl +++ b/src/rabbit_variable_queue.erl @@ -37,7 +37,7 @@ requeue/3, len/1, is_empty/1, dropwhile/2, set_ram_duration_target/2, ram_duration/1, needs_idle_timeout/1, idle_timeout/1, handle_pre_hibernate/1, - status/1]). + status/1, invoke/3]). -export([start/1, stop/0]). @@ -865,6 +865,9 @@ status(#vqstate { {avg_ack_ingress_rate, AvgAckIngressRate}, {avg_ack_egress_rate , AvgAckEgressRate} ]. +invoke(?MODULE, Fun, State) -> + Fun(State). + %%---------------------------------------------------------------------------- %% Minor helpers %%---------------------------------------------------------------------------- @@ -1100,10 +1103,11 @@ blank_rate(Timestamp, IngressLength) -> msg_store_callback(PersistentGuids, Pubs, AckTags, Fun, MsgPropsFun) -> Self = self(), F = fun () -> rabbit_amqqueue:maybe_run_queue_via_backing_queue( - Self, fun (StateN) -> {[], tx_commit_post_msg_store( - true, Pubs, AckTags, - Fun, MsgPropsFun, StateN)} - end) + Self, ?MODULE, + fun (StateN) -> {[], tx_commit_post_msg_store( + true, Pubs, AckTags, + Fun, MsgPropsFun, StateN)} + end) end, fun () -> spawn(fun () -> ok = rabbit_misc:with_exit_handler( fun () -> remove_persistent_messages( @@ -1409,27 +1413,29 @@ msgs_confirmed(GuidSet, State) -> msgs_written_to_disk(QPid, GuidSet) -> rabbit_amqqueue:maybe_run_queue_via_backing_queue_async( - QPid, fun (State = #vqstate { msgs_on_disk = MOD, - msg_indices_on_disk = MIOD, - unconfirmed = UC }) -> - msgs_confirmed(gb_sets:intersection(GuidSet, MIOD), - State #vqstate { - msgs_on_disk = - gb_sets:intersection( - gb_sets:union(MOD, GuidSet), UC) }) - end). + QPid, ?MODULE, + fun (State = #vqstate { msgs_on_disk = MOD, + msg_indices_on_disk = MIOD, + unconfirmed = UC }) -> + msgs_confirmed(gb_sets:intersection(GuidSet, MIOD), + State #vqstate { + msgs_on_disk = + gb_sets:intersection( + gb_sets:union(MOD, GuidSet), UC) }) + end). msg_indices_written_to_disk(QPid, GuidSet) -> rabbit_amqqueue:maybe_run_queue_via_backing_queue_async( - QPid, fun (State = #vqstate { msgs_on_disk = MOD, - msg_indices_on_disk = MIOD, - unconfirmed = UC }) -> - msgs_confirmed(gb_sets:intersection(GuidSet, MOD), - State #vqstate { - msg_indices_on_disk = - gb_sets:intersection( - gb_sets:union(MIOD, GuidSet), UC) }) - end). + QPid, ?MODULE, + fun (State = #vqstate { msgs_on_disk = MOD, + msg_indices_on_disk = MIOD, + unconfirmed = UC }) -> + msgs_confirmed(gb_sets:intersection(GuidSet, MOD), + State #vqstate { + msg_indices_on_disk = + gb_sets:intersection( + gb_sets:union(MIOD, GuidSet), UC) }) + end). %%---------------------------------------------------------------------------- %% Phase changes -- cgit v1.2.1 From 96529337fddf02316bdbd78ad3d1c6f152972ca5 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Tue, 21 Dec 2010 13:31:42 +0000 Subject: Use amqqueue:store_queue/1 so that we modify durable queues durably --- src/rabbit_amqqueue.erl | 1 + src/rabbit_mirror_queue_misc.erl | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl index 0abe1198..f04c5fec 100644 --- a/src/rabbit_amqqueue.erl +++ b/src/rabbit_amqqueue.erl @@ -48,6 +48,7 @@ -export([notify_sent/2, unblock/2, flush_all/2]). -export([commit_all/3, rollback_all/3, notify_down_all/2, limit_all/3]). -export([on_node_down/1]). +-export([store_queue/1]). -include("rabbit.hrl"). -include_lib("stdlib/include/qlc.hrl"). diff --git a/src/rabbit_mirror_queue_misc.erl b/src/rabbit_mirror_queue_misc.erl index 05602076..090cb812 100644 --- a/src/rabbit_mirror_queue_misc.erl +++ b/src/rabbit_mirror_queue_misc.erl @@ -39,7 +39,7 @@ remove_from_queue(QueueName, DeadPids) -> _ -> Q1 = Q #amqqueue { pid = QPid1, mirror_pids = MPids1 }, - mnesia:write(rabbit_queue, Q1, write), + ok = rabbit_amqqueue:store_queue(Q1), {ok, QPid1} end end -- cgit v1.2.1 From e8915154deb02dfb64c2b71a336af39c44d835d0 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Tue, 21 Dec 2010 13:36:43 +0000 Subject: idiot --- src/rabbit_mirror_queue_slave.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index a58e1579..a61cea0d 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -266,7 +266,7 @@ members_changed([SPid], _Births, Deaths) -> rabbit_misc:with_exit_handler( fun () -> {stop, normal} end, fun () -> - case gen_server2:call(SPid, {gm_deaths, Deaths}) of + case gen_server2:call(SPid, {gm_deaths, Deaths}, infinity) of ok -> ok; {promote, CPid} -> -- cgit v1.2.1 From e32b21bc5a8fd1a167e540f09b9d92bcd87056dc Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Tue, 21 Dec 2010 15:40:11 +0000 Subject: Make the addition of slave nodes more robust. This is especially important for recovery of durable queues --- src/rabbit_mirror_queue_coordinator.erl | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/rabbit_mirror_queue_coordinator.erl b/src/rabbit_mirror_queue_coordinator.erl index 6303952d..608148b5 100644 --- a/src/rabbit_mirror_queue_coordinator.erl +++ b/src/rabbit_mirror_queue_coordinator.erl @@ -70,9 +70,17 @@ handle_call(get_gm, _From, State = #state { gm = GM }) -> reply(GM, State). handle_cast({add_slave, Node}, State = #state { q = Q }) -> - Result = rabbit_mirror_queue_slave_sup:start_child(Node, [Q]), - rabbit_log:info("Adding slave node for queue ~p: ~p~n", - [Q #amqqueue.name, Result]), + Nodes = nodes(), + case lists:member(Node, Nodes) of + true -> + Result = rabbit_mirror_queue_slave_sup:start_child(Node, [Q]), + rabbit_log:info("Adding slave node for queue ~p: ~p~n", + [Q #amqqueue.name, Result]); + false -> + rabbit_log:info( + "Ignoring request to add slave on node ~p for queue ~p~n", + [Q #amqqueue.name, Node]) + end, noreply(State); handle_cast({gm_deaths, Deaths}, -- cgit v1.2.1 From 7d69905fb40fcfa8a9657b951c1a1cde058be2d4 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Thu, 20 Jan 2011 14:18:38 +0000 Subject: Correct formatting of resources in log messages --- src/rabbit_mirror_queue_coordinator.erl | 12 ++++++------ src/rabbit_mirror_queue_slave.erl | 8 ++++---- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/rabbit_mirror_queue_coordinator.erl b/src/rabbit_mirror_queue_coordinator.erl index 608148b5..d853a3e9 100644 --- a/src/rabbit_mirror_queue_coordinator.erl +++ b/src/rabbit_mirror_queue_coordinator.erl @@ -74,19 +74,19 @@ handle_cast({add_slave, Node}, State = #state { q = Q }) -> case lists:member(Node, Nodes) of true -> Result = rabbit_mirror_queue_slave_sup:start_child(Node, [Q]), - rabbit_log:info("Adding slave node for queue ~p: ~p~n", - [Q #amqqueue.name, Result]); + rabbit_log:info("Adding slave node for ~p: ~p~n", + [rabbit_misc:rs(Q #amqqueue.name), Result]); false -> rabbit_log:info( - "Ignoring request to add slave on node ~p for queue ~p~n", - [Q #amqqueue.name, Node]) + "Ignoring request to add slave on node ~p for ~p~n", + [Node, rabbit_misc:rs(Q #amqqueue.name)]) end, noreply(State); handle_cast({gm_deaths, Deaths}, State = #state { q = #amqqueue { name = QueueName } }) -> - rabbit_log:info("Master ~p saw deaths ~p for queue ~p~n", - [self(), Deaths, QueueName]), + rabbit_log:info("Master ~p saw deaths ~p for ~p~n", + [self(), Deaths, rabbit_misc:rs(QueueName)]), case rabbit_mirror_queue_misc:remove_from_queue(QueueName, Deaths) of {ok, Pid} when node(Pid) =:= node() -> noreply(State); diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index a61cea0d..483c849a 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -164,8 +164,8 @@ handle_call({gm_deaths, Deaths}, From, State = #state { q = #amqqueue { name = QueueName }, gm = GM, master_node = MNode }) -> - rabbit_log:info("Slave ~p saw deaths ~p for queue ~p~n", - [self(), Deaths, QueueName]), + rabbit_log:info("Slave ~p saw deaths ~p for ~p~n", + [self(), Deaths, rabbit_misc:rs(QueueName)]), case rabbit_mirror_queue_misc:remove_from_queue(QueueName, Deaths) of {ok, Pid} when node(Pid) =:= MNode -> reply(ok, State); @@ -325,8 +325,8 @@ promote_me(From, #state { q = Q, sender_queues = SQ, seen = Seen, guid_ack = GA }) -> - rabbit_log:info("Promoting slave ~p for queue ~p~n", - [self(), Q #amqqueue.name]), + rabbit_log:info("Promoting slave ~p for ~p~n", + [self(), rabbit_misc:rs(Q #amqqueue.name)]), {ok, CPid} = rabbit_mirror_queue_coordinator:start_link(Q, GM), true = unlink(GM), gen_server2:reply(From, {promote, CPid}), -- cgit v1.2.1 From 844093df9d5a32552b70dd51640551bfcd19190e Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Thu, 20 Jan 2011 14:24:43 +0000 Subject: And the result of misc:rs is a string, so format it as such --- src/rabbit_mirror_queue_coordinator.erl | 6 +++--- src/rabbit_mirror_queue_slave.erl | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/rabbit_mirror_queue_coordinator.erl b/src/rabbit_mirror_queue_coordinator.erl index d853a3e9..30fd6ed3 100644 --- a/src/rabbit_mirror_queue_coordinator.erl +++ b/src/rabbit_mirror_queue_coordinator.erl @@ -74,18 +74,18 @@ handle_cast({add_slave, Node}, State = #state { q = Q }) -> case lists:member(Node, Nodes) of true -> Result = rabbit_mirror_queue_slave_sup:start_child(Node, [Q]), - rabbit_log:info("Adding slave node for ~p: ~p~n", + rabbit_log:info("Adding slave node for ~s: ~p~n", [rabbit_misc:rs(Q #amqqueue.name), Result]); false -> rabbit_log:info( - "Ignoring request to add slave on node ~p for ~p~n", + "Ignoring request to add slave on node ~p for ~s~n", [Node, rabbit_misc:rs(Q #amqqueue.name)]) end, noreply(State); handle_cast({gm_deaths, Deaths}, State = #state { q = #amqqueue { name = QueueName } }) -> - rabbit_log:info("Master ~p saw deaths ~p for ~p~n", + rabbit_log:info("Master ~p saw deaths ~p for ~s~n", [self(), Deaths, rabbit_misc:rs(QueueName)]), case rabbit_mirror_queue_misc:remove_from_queue(QueueName, Deaths) of {ok, Pid} when node(Pid) =:= node() -> diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index 483c849a..4f9d2066 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -164,7 +164,7 @@ handle_call({gm_deaths, Deaths}, From, State = #state { q = #amqqueue { name = QueueName }, gm = GM, master_node = MNode }) -> - rabbit_log:info("Slave ~p saw deaths ~p for ~p~n", + rabbit_log:info("Slave ~p saw deaths ~p for ~s~n", [self(), Deaths, rabbit_misc:rs(QueueName)]), case rabbit_mirror_queue_misc:remove_from_queue(QueueName, Deaths) of {ok, Pid} when node(Pid) =:= MNode -> @@ -325,7 +325,7 @@ promote_me(From, #state { q = Q, sender_queues = SQ, seen = Seen, guid_ack = GA }) -> - rabbit_log:info("Promoting slave ~p for ~p~n", + rabbit_log:info("Promoting slave ~p for ~s~n", [self(), rabbit_misc:rs(Q #amqqueue.name)]), {ok, CPid} = rabbit_mirror_queue_coordinator:start_link(Q, GM), true = unlink(GM), -- cgit v1.2.1 From 926d3b66b33dd75f87a8bc903a60e00ecbbea96f Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Mon, 28 Feb 2011 12:55:44 +0000 Subject: Removing gm_test from bug23554 which has in bug23727 become gm_soak_test --- src/gm_test.erl | 126 -------------------------------------------------------- 1 file changed, 126 deletions(-) delete mode 100644 src/gm_test.erl diff --git a/src/gm_test.erl b/src/gm_test.erl deleted file mode 100644 index e0a92a0c..00000000 --- a/src/gm_test.erl +++ /dev/null @@ -1,126 +0,0 @@ -%% The contents of this file are subject to the Mozilla Public License -%% Version 1.1 (the "License"); you may not use this file except in -%% compliance with the License. You may obtain a copy of the License at -%% http://www.mozilla.org/MPL/ -%% -%% Software distributed under the License is distributed on an "AS IS" -%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the -%% License for the specific language governing rights and limitations -%% under the License. -%% -%% The Original Code is RabbitMQ. -%% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2011 VMware, Inc. All rights reserved. -%% - --module(gm_test). - --export([test/0]). --export([joined/2, members_changed/3, handle_msg/3, terminate/2]). - --behaviour(gm). - --include("gm_specs.hrl"). - -get_state() -> - get(state). - -with_state(Fun) -> - put(state, Fun(get_state())). - -inc() -> - case 1 + get(count) of - 100000 -> Now = os:timestamp(), - Start = put(ts, Now), - Diff = timer:now_diff(Now, Start), - Rate = 100000 / (Diff / 1000000), - io:format("~p seeing ~p msgs/sec~n", [self(), Rate]), - put(count, 0); - N -> put(count, N) - end. - -joined([], Members) -> - io:format("Joined ~p (~p members)~n", [self(), length(Members)]), - put(state, dict:from_list([{Member, empty} || Member <- Members])), - put(count, 0), - put(ts, os:timestamp()), - ok. - -members_changed([], Births, Deaths) -> - with_state( - fun (State) -> - State1 = - lists:foldl( - fun (Born, StateN) -> - false = dict:is_key(Born, StateN), - dict:store(Born, empty, StateN) - end, State, Births), - lists:foldl( - fun (Died, StateN) -> - true = dict:is_key(Died, StateN), - dict:store(Died, died, StateN) - end, State1, Deaths) - end), - ok. - -handle_msg([], From, {test_msg, Num}) -> - inc(), - with_state( - fun (State) -> - ok = case dict:find(From, State) of - {ok, died} -> - exit({{from, From}, - {received_posthumous_delivery, Num}}); - {ok, empty} -> ok; - {ok, Num} -> ok; - {ok, Num1} when Num < Num1 -> - exit({{from, From}, - {duplicate_delivery_of, Num1}, - {expecting, Num}}); - {ok, Num1} -> - exit({{from, From}, - {missing_delivery_of, Num}, - {received_early, Num1}}); - error -> - exit({{from, From}, - {received_premature_delivery, Num}}) - end, - dict:store(From, Num + 1, State) - end), - ok. - -terminate([], Reason) -> - io:format("Left ~p (~p)~n", [self(), Reason]), - ok. - -spawn_member() -> - spawn_link( - fun () -> - random:seed(now()), - %% start up delay of no more than 10 seconds - timer:sleep(random:uniform(10000)), - {ok, Pid} = gm:start_link(?MODULE, ?MODULE, []), - Start = random:uniform(10000), - send_loop(Pid, Start, Start + random:uniform(10000)), - gm:leave(Pid), - spawn_more() - end). - -spawn_more() -> - [spawn_member() || _ <- lists:seq(1, 4 - random:uniform(4))]. - -send_loop(_Pid, Target, Target) -> - ok; -send_loop(Pid, Count, Target) when Target > Count -> - case random:uniform(3) of - 3 -> gm:confirmed_broadcast(Pid, {test_msg, Count}); - _ -> gm:broadcast(Pid, {test_msg, Count}) - end, - timer:sleep(random:uniform(5) - 1), %% sleep up to 4 ms - send_loop(Pid, Count + 1, Target). - -test() -> - ok = gm:create_tables(), - spawn_member(), - spawn_member(). -- cgit v1.2.1 From edb6f73c32720660b1705642bc5192226a9cca30 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Mon, 28 Feb 2011 17:11:58 +0000 Subject: Large amounts of debitrotting due to changes to confirms api and such like. Sadly mirrored confirms aren't working again yet... not really sure why --- src/rabbit_mirror_queue_master.erl | 4 +- src/rabbit_mirror_queue_slave.erl | 115 +++++++++++++++++++++++++++++-------- 2 files changed, 92 insertions(+), 27 deletions(-) diff --git a/src/rabbit_mirror_queue_master.erl b/src/rabbit_mirror_queue_master.erl index 11831a29..e2f9b020 100644 --- a/src/rabbit_mirror_queue_master.erl +++ b/src/rabbit_mirror_queue_master.erl @@ -232,8 +232,8 @@ ram_duration(State = #state { backing_queue = BQ, backing_queue_state = BQS}) -> needs_idle_timeout(#state { backing_queue = BQ, backing_queue_state = BQS}) -> BQ:needs_idle_timeout(BQS). -idle_timeout(#state { backing_queue = BQ, backing_queue_state = BQS}) -> - BQ:idle_timeout(BQS). +idle_timeout(State = #state { backing_queue = BQ, backing_queue_state = BQS}) -> + State #state { backing_queue_state = BQ:idle_timeout(BQS) }. handle_pre_hibernate(State = #state { backing_queue = BQ, backing_queue_state = BQS}) -> diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index 4f9d2066..396e3c35 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -67,7 +67,8 @@ -export([start_link/1, set_maximum_since_use/2]). -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, - code_change/3, handle_pre_hibernate/1]). + code_change/3, handle_pre_hibernate/1, prioritise_call/3, + prioritise_cast/2]). -export([joined/2, members_changed/3, handle_msg/3]). @@ -82,6 +83,7 @@ master_node, backing_queue, backing_queue_state, + sync_timer_ref, rate_timer_ref, sender_queues, %% :: Pid -> MsgQ @@ -91,6 +93,7 @@ guid_to_channel %% for confirms }). +-define(SYNC_INTERVAL, 25). %% milliseconds -define(RAM_DURATION_UPDATE_INTERVAL, 5000). start_link(Q) -> @@ -137,6 +140,7 @@ init([#amqqueue { name = QueueName } = Q]) -> backing_queue = BQ, backing_queue_state = BQS, rate_timer_ref = undefined, + sync_timer_ref = undefined, sender_queues = dict:new(), guid_ack = dict:new(), @@ -212,7 +216,14 @@ handle_cast(update_ram_duration, rabbit_memory_monitor:report_ram_duration(self(), RamDuration), BQS2 = BQ:set_ram_duration_target(DesiredDuration, BQS1), noreply(State #state { rate_timer_ref = just_measured, - backing_queue_state = BQS2 }). + backing_queue_state = BQS2 }); + +handle_cast(sync_timeout, State) -> + noreply(backing_queue_idle_timeout( + State #state { sync_timer_ref = undefined })). + +handle_info(timeout, State) -> + noreply(backing_queue_idle_timeout(State)); handle_info(Msg, State) -> {stop, {unexpected_info, Msg}, State}. @@ -245,12 +256,30 @@ code_change(_OldVsn, State, _Extra) -> handle_pre_hibernate(State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> %% mainly copied from amqqueue_process - BQS1 = BQ:handle_pre_hibernate(BQS), - %% no activity for a while == 0 egress and ingress rates + {RamDuration, BQS1} = BQ:ram_duration(BQS), DesiredDuration = - rabbit_memory_monitor:report_ram_duration(self(), infinity), + rabbit_memory_monitor:report_ram_duration(self(), RamDuration), BQS2 = BQ:set_ram_duration_target(DesiredDuration, BQS1), - {hibernate, stop_rate_timer(State #state { backing_queue_state = BQS2 })}. + BQS3 = BQ:handle_pre_hibernate(BQS2), + {hibernate, stop_rate_timer(State #state { backing_queue_state = BQS3 })}. + +prioritise_call(Msg, _From, _State) -> + case Msg of + {maybe_run_queue_via_backing_queue, _Mod, _Fun} -> 6; + {gm_deaths, _Deaths} -> 5; + _ -> 0 + end. + +prioritise_cast(Msg, _State) -> + case Msg of + update_ram_duration -> 8; + {set_ram_duration_target, _Duration} -> 8; + {set_maximum_since_use, _Age} -> 8; + {maybe_run_queue_via_backing_queue, _Mod, _Fun} -> 6; + sync_timeout -> 6; + {gm, _Msg} -> 5; + _ -> 0 + end. %% --------------------------------------------------------------------------- %% GM @@ -285,12 +314,9 @@ handle_msg([SPid], _From, Msg) -> maybe_run_queue_via_backing_queue( Mod, Fun, State = #state { backing_queue = BQ, - backing_queue_state = BQS, - guid_to_channel = GTC }) -> + backing_queue_state = BQS }) -> {Guids, BQS1} = BQ:invoke(Mod, Fun, BQS), - GTC1 = lists:foldl(fun maybe_confirm_message/2, GTC, Guids), - State #state { backing_queue_state = BQS1, - guid_to_channel = GTC1 }. + confirm_messages(Guids, State #state { backing_queue_state = BQS1 }). record_confirm_or_confirm(#delivery { msg_seq_no = undefined }, _Q, GTC) -> GTC; @@ -305,13 +331,27 @@ record_confirm_or_confirm(#delivery { sender = ChPid, msg_seq_no = MsgSeqNo }, ok = rabbit_channel:confirm(ChPid, MsgSeqNo), GTC. -maybe_confirm_message(Guid, GTC) -> - case dict:find(Guid, GTC) of - {ok, {ChPid, MsgSeqNo}} when MsgSeqNo =/= undefined -> - ok = rabbit_channel:confirm(ChPid, MsgSeqNo), - dict:erase(Guid, GTC); - error -> - GTC +confirm_messages(Guids, State = #state { guid_to_channel = GTC }) -> + {CMs, GTC1} = + lists:foldl( + fun(Guid, {CMs, GTC0}) -> + case dict:find(Guid, GTC0) of + {ok, {ChPid, MsgSeqNo}} -> + {gb_trees_cons(ChPid, MsgSeqNo, CMs), + dict:erase(Guid, GTC0)}; + _ -> + {CMs, GTC0} + end + end, {gb_trees:empty(), GTC}, Guids), + gb_trees:map(fun(ChPid, MsgSeqNos) -> + rabbit_channel:confirm(ChPid, MsgSeqNos) + end, CMs), + State #state { guid_to_channel = GTC1 }. + +gb_trees_cons(Key, Value, Tree) -> + case gb_trees:lookup(Key, Tree) of + {value, Values} -> gb_trees:update(Key, [Value | Values], Tree); + none -> gb_trees:insert(Key, [Value], Tree) end. handle_process_result({ok, State}) -> noreply(State); @@ -348,15 +388,39 @@ promote_me(From, #state { q = Q, {become, rabbit_amqqueue_process, QueueState, hibernate}. noreply(State) -> - {noreply, next_state(State), hibernate}. + {NewState, Timeout} = next_state(State), + {noreply, NewState, Timeout}. reply(Reply, State) -> - {reply, Reply, next_state(State), hibernate}. + {NewState, Timeout} = next_state(State), + {reply, Reply, NewState, Timeout}. next_state(State) -> - ensure_rate_timer(State). + State1 = #state { backing_queue = BQ, backing_queue_state = BQS } = + ensure_rate_timer(State), + case BQ:needs_idle_timeout(BQS) of + true -> {ensure_sync_timer(State1), 0}; + false -> {stop_sync_timer(State1), hibernate} + end. %% copied+pasted from amqqueue_process +backing_queue_idle_timeout(State = #state { backing_queue = BQ }) -> + maybe_run_queue_via_backing_queue( + BQ, fun (BQS) -> {[], BQ:idle_timeout(BQS)} end, State). + +ensure_sync_timer(State = #state { sync_timer_ref = undefined }) -> + {ok, TRef} = timer:apply_after( + ?SYNC_INTERVAL, rabbit_amqqueue, sync_timeout, [self()]), + State #state { sync_timer_ref = TRef }; +ensure_sync_timer(State) -> + State. + +stop_sync_timer(State = #state { sync_timer_ref = undefined }) -> + State; +stop_sync_timer(State = #state { sync_timer_ref = TRef }) -> + {ok, cancel} = timer:cancel(TRef), + State #state { sync_timer_ref = undefined }. + ensure_rate_timer(State = #state { rate_timer_ref = undefined }) -> {ok, TRef} = timer:apply_after( ?RAM_DURATION_UPDATE_INTERVAL, @@ -438,10 +502,11 @@ process_instruction( {true, AckRequired} -> {AckTag, BQS1} = BQ:publish_delivered(AckRequired, Msg, MsgProps, ChPid, BQS), - {GA1, GTC3} = case AckRequired of - true -> {dict:store(Guid, AckTag, GA), GTC1}; - false -> {GA, maybe_confirm_message(Guid, GTC1)} - end, + {GA1, GTC3} = + case AckRequired of + true -> {dict:store(Guid, AckTag, GA), GTC1}; + false -> {GA, confirm_messages([Guid], GTC1)} + end, State1 #state { backing_queue_state = BQS1, guid_ack = GA1, guid_to_channel = GTC3 } -- cgit v1.2.1 From ab52e4b4f9e7632eccd4ea769b28a744272d595f Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Mon, 28 Feb 2011 18:09:01 +0000 Subject: confirms still don't work... but it needs sleep to fix this. --- src/rabbit_mirror_queue_slave.erl | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index 396e3c35..df9a28f4 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -328,7 +328,7 @@ record_confirm_or_confirm( dict:store(Guid, {ChPid, MsgSeqNo}, GTC); record_confirm_or_confirm(#delivery { sender = ChPid, msg_seq_no = MsgSeqNo }, _Q, GTC) -> - ok = rabbit_channel:confirm(ChPid, MsgSeqNo), + ok = rabbit_channel:confirm(ChPid, [MsgSeqNo]), GTC. confirm_messages(Guids, State = #state { guid_to_channel = GTC }) -> @@ -502,14 +502,13 @@ process_instruction( {true, AckRequired} -> {AckTag, BQS1} = BQ:publish_delivered(AckRequired, Msg, MsgProps, ChPid, BQS), - {GA1, GTC3} = - case AckRequired of - true -> {dict:store(Guid, AckTag, GA), GTC1}; - false -> {GA, confirm_messages([Guid], GTC1)} - end, + GA1 = case AckRequired of + true -> dict:store(Guid, AckTag, GA); + false -> GA + end, State1 #state { backing_queue_state = BQS1, guid_ack = GA1, - guid_to_channel = GTC3 } + guid_to_channel = GTC1 } end}; process_instruction({set_length, Length}, State = #state { backing_queue = BQ, -- cgit v1.2.1 From 5e3b5b3b898fffcda5f897687a90dfe95669e989 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Wed, 2 Mar 2011 13:01:30 +0000 Subject: Fixed confirms in HA queues. Broke slave promotion. Will fix --- src/rabbit_mirror_queue_slave.erl | 168 +++++++++++++++++++++++--------------- 1 file changed, 104 insertions(+), 64 deletions(-) diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index df9a28f4..5c101ee2 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -88,9 +88,8 @@ sender_queues, %% :: Pid -> MsgQ guid_ack, %% :: Guid -> AckTag - seen, %% Set Guid - guid_to_channel %% for confirms + guid_status }). -define(SYNC_INTERVAL, 25). %% milliseconds @@ -144,9 +143,7 @@ init([#amqqueue { name = QueueName } = Q]) -> sender_queues = dict:new(), guid_ack = dict:new(), - seen = sets:new(), - - guid_to_channel = dict:new() + guid_status = dict:new() }, hibernate, {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}; @@ -318,35 +315,41 @@ maybe_run_queue_via_backing_queue( {Guids, BQS1} = BQ:invoke(Mod, Fun, BQS), confirm_messages(Guids, State #state { backing_queue_state = BQS1 }). -record_confirm_or_confirm(#delivery { msg_seq_no = undefined }, _Q, GTC) -> - GTC; -record_confirm_or_confirm( - #delivery { sender = ChPid, - message = #basic_message { is_persistent = true, - guid = Guid }, - msg_seq_no = MsgSeqNo }, #amqqueue { durable = true }, GTC) -> - dict:store(Guid, {ChPid, MsgSeqNo}, GTC); -record_confirm_or_confirm(#delivery { sender = ChPid, msg_seq_no = MsgSeqNo }, - _Q, GTC) -> - ok = rabbit_channel:confirm(ChPid, [MsgSeqNo]), - GTC. - -confirm_messages(Guids, State = #state { guid_to_channel = GTC }) -> - {CMs, GTC1} = + +needs_confirming(#delivery{ msg_seq_no = undefined }, _State) -> + never; +needs_confirming(#delivery { message = #basic_message { + is_persistent = true } }, + #state { q = #amqqueue { durable = true } }) -> + eventually; +needs_confirming(_Delivery, _State) -> + immediately. + +confirm_messages(Guids, State = #state { guid_status = GS }) -> + {GS1, CMs} = lists:foldl( - fun(Guid, {CMs, GTC0}) -> - case dict:find(Guid, GTC0) of - {ok, {ChPid, MsgSeqNo}} -> - {gb_trees_cons(ChPid, MsgSeqNo, CMs), - dict:erase(Guid, GTC0)}; - _ -> - {CMs, GTC0} + fun (Guid, {GSN, CMsN} = Acc) -> + %% We will never see {confirmed, ChPid} here. + case dict:find(Guid, GSN) of + error -> + %% If it needed confirming, it'll have + %% already been done. + Acc; + {ok, {published, ChPid}} -> + %% Still not seen it from the channel, just + %% record that it's been confirmed. + {dict:store(Guid, {confirmed, ChPid}, GSN), CMsN}; + {ok, {published, ChPid, MsgSeqNo}} -> + %% Seen from both GM and Channel. Can now + %% confirm. + {dict:erase(Guid, GSN), + gb_trees_cons(ChPid, MsgSeqNo, CMsN)} end - end, {gb_trees:empty(), GTC}, Guids), - gb_trees:map(fun(ChPid, MsgSeqNos) -> - rabbit_channel:confirm(ChPid, MsgSeqNos) + end, {GS, gb_trees:empty()}, Guids), + gb_trees:map(fun (ChPid, MsgSeqNos) -> + ok = rabbit_channel:confirm(ChPid, MsgSeqNos) end, CMs), - State #state { guid_to_channel = GTC1 }. + State #state { guid_status = GS1 }. gb_trees_cons(Key, Value, Tree) -> case gb_trees:lookup(Key, Tree) of @@ -363,7 +366,6 @@ promote_me(From, #state { q = Q, backing_queue_state = BQS, rate_timer_ref = RateTRef, sender_queues = SQ, - seen = Seen, guid_ack = GA }) -> rabbit_log:info("Promoting slave ~p for ~s~n", [self(), rabbit_misc:rs(Q #amqqueue.name)]), @@ -371,8 +373,9 @@ promote_me(From, #state { q = Q, true = unlink(GM), gen_server2:reply(From, {promote, CPid}), ok = gm:confirmed_broadcast(GM, heartbeat), + %% TODO fix up seen MasterState = rabbit_mirror_queue_master:promote_backing_queue_state( - CPid, BQ, BQS, GM, Seen), + CPid, BQ, BQS, GM, sets:new()), %% We have to do the requeue via this init because otherwise we %% don't have access to the relevent MsgPropsFun. Also, we are %% already in mnesia as the master queue pid. Thus we cannot just @@ -441,59 +444,97 @@ stop_rate_timer(State = #state { rate_timer_ref = TRef }) -> State #state { rate_timer_ref = undefined }. maybe_enqueue_message( - Delivery = #delivery { message = #basic_message { guid = Guid }, - sender = ChPid }, - State = #state { q = Q, - sender_queues = SQ, - seen = Seen, - guid_to_channel = GTC }) -> - case sets:is_element(Guid, Seen) of - true -> - GTC1 = record_confirm_or_confirm(Delivery, Q, GTC), - State #state { guid_to_channel = GTC1, - seen = sets:del_element(Guid, Seen) }; - false -> + Delivery = #delivery { message = #basic_message { guid = Guid }, + msg_seq_no = MsgSeqNo, + sender = ChPid }, + State = #state { sender_queues = SQ, + guid_status = GS }) -> + %% We will never see {published, ChPid, MsgSeqNo} here. + case dict:find(Guid, GS) of + error -> MQ = case dict:find(ChPid, SQ) of {ok, MQ1} -> MQ1; error -> queue:new() end, SQ1 = dict:store(ChPid, queue:in(Delivery, MQ), SQ), - State #state { sender_queues = SQ1 } + State #state { sender_queues = SQ1 }; + {ok, {confirmed, ChPid}} -> + %% BQ has confirmed it but we didn't know what the + %% msg_seq_no was at the time. We do now! + ok = rabbit_channel:confirm(ChPid, [MsgSeqNo]), + State #state { guid_status = dict:erase(Guid, GS) }; + {ok, {published, ChPid}} -> + %% It was published to the BQ and we didn't know the + %% msg_seq_no so couldn't confirm it at the time. + case needs_confirming(Delivery, State) of + never -> + State #state { guid_status = dict:erase(Guid, GS) }; + eventually -> + State #state { + guid_status = dict:store( + Guid, {published, ChPid, MsgSeqNo}, GS) }; + immediately -> + ok = rabbit_channel:confirm(ChPid, [MsgSeqNo]), + State #state { guid_status = dict:erase(Guid, GS) } + end end. process_instruction( {publish, Deliver, ChPid, MsgProps, Msg = #basic_message { guid = Guid }}, - State = #state { q = Q, - sender_queues = SQ, + State = #state { sender_queues = SQ, backing_queue = BQ, backing_queue_state = BQS, guid_ack = GA, - seen = Seen, - guid_to_channel = GTC }) -> - {SQ1, Seen1, GTC1} = + guid_status = GS }) -> + + %% We really are going to do the publish right now, even though we + %% may not have seen it directly from the channel. As a result, we + %% may know that it needs confirming without knowing its + %% msg_seq_no, which means that we can see the confirmation come + %% back from the backing queue without knowing the msg_seq_no, + %% which means that we're going to have to hang on to the fact + %% that we've seen the guid confirmed until we can associate it + %% with a msg_seq_no. + GS1 = dict:store(Guid, {published, ChPid}, GS), + {SQ1, GS2} = case dict:find(ChPid, SQ) of error -> - {SQ, sets:add_element(Guid, Seen), GTC}; + {SQ, GS1}; {ok, MQ} -> case queue:out(MQ) of {empty, _MQ} -> - {SQ, sets:add_element(Guid, Seen), GTC}; + {SQ, GS1}; {{value, Delivery = #delivery { - message = #basic_message { guid = Guid } }}, + msg_seq_no = MsgSeqNo, + message = #basic_message { guid = Guid } }}, MQ1} -> - GTC2 = record_confirm_or_confirm(Delivery, Q, GTC), - {dict:store(ChPid, MQ1, SQ), Seen, GTC2}; + %% We received the msg from the channel + %% first. Thus we need to deal with confirms + %% here. + {dict:store(ChPid, MQ1, SQ), + case needs_confirming(Delivery, State) of + never -> + GS; + eventually -> + dict:store( + Guid, {published, ChPid, MsgSeqNo}, GS); + immediately -> + ok = rabbit_channel:confirm(ChPid, [MsgSeqNo]), + GS + end}; {{value, #delivery {}}, _MQ1} -> %% The instruction was sent to us before we %% were within the mirror_pids within the - %% amqqueue record. We'll never receive the - %% message directly. - {SQ, Seen, GTC} + %% #amqqueue{} record. We'll never receive the + %% message directly from the channel. And the + %% channel will not be expecting any confirms + %% from us. + {SQ, GS} end end, - State1 = State #state { sender_queues = SQ1, - seen = Seen1, - guid_to_channel = GTC1 }, + + State1 = State #state { sender_queues = SQ1, + guid_status = GS2 }, {ok, case Deliver of false -> @@ -507,8 +548,7 @@ process_instruction( false -> GA end, State1 #state { backing_queue_state = BQS1, - guid_ack = GA1, - guid_to_channel = GTC1 } + guid_ack = GA1 } end}; process_instruction({set_length, Length}, State = #state { backing_queue = BQ, -- cgit v1.2.1 From 00b3d0d64d64fbdad5c053fb8ab07932d8eb7341 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Wed, 2 Mar 2011 13:48:58 +0000 Subject: Remove erroneous documentation --- src/rabbit_mirror_queue_slave.erl | 34 ---------------------------------- 1 file changed, 34 deletions(-) diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index 5c101ee2..93f885ba 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -29,40 +29,6 @@ %% %% All instructions from the GM group must be processed in the order %% in which they're received. -%% -%% Thus, we need a queue per sender, and a queue for GM instructions. -%% -%% On receipt of a GM group instruction, three things are possible: -%% 1. The queue of publisher messages is empty. Thus store the GM -%% instruction to the instrQ. -%% 2. The head of the queue of publisher messages has a message that -%% matches the GUID of the GM instruction. Remove the message, and -%% route appropriately. -%% 3. The head of the queue of publisher messages has a message that -%% does not match the GUID of the GM instruction. Throw away the GM -%% instruction: the GM instruction must correspond to a message -%% that we'll never receive. If it did not, then before the current -%% instruction, we would have received an instruction for the -%% message at the head of this queue, thus the head of the queue -%% would have been removed and processed. -%% -%% On receipt of a publisher message, three things are possible: -%% 1. The queue of GM group instructions is empty. Add the message to -%% the relevant queue and await instructions from the GM. -%% 2. The head of the queue of GM group instructions has an -%% instruction matching the GUID of the message. Remove that -%% instruction and act on it. Attempt to process the rest of the -%% instrQ. -%% 3. The head of the queue of GM group instructions has an -%% instruction that does not match the GUID of the message. If the -%% message is from the same publisher as is referred to by the -%% instruction then throw away the GM group instruction and repeat -%% - attempt to match against the next instruction if there is one: -%% The instruction thrown away was for a message we'll never -%% receive. -%% -%% In all cases, we are relying heavily on order preserving messaging -%% both from the GM group and from the publishers. -export([start_link/1, set_maximum_since_use/2]). -- cgit v1.2.1 From 974119b73e9f1ff36fcc304928ba39e232c2801c Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Wed, 2 Mar 2011 16:12:49 +0000 Subject: It's very substantially wrong. --- src/rabbit_amqqueue_process.erl | 6 +-- src/rabbit_mirror_queue_master.erl | 94 +++++++++++++++++++++++++------------- src/rabbit_mirror_queue_slave.erl | 13 ++++-- 3 files changed, 75 insertions(+), 38 deletions(-) diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl index 6aed2f87..207f6bab 100644 --- a/src/rabbit_amqqueue_process.erl +++ b/src/rabbit_amqqueue_process.erl @@ -33,7 +33,7 @@ handle_info/2, handle_pre_hibernate/1, prioritise_call/3, prioritise_cast/2, prioritise_info/2]). --export([init_with_backing_queue_state/6]). +-export([init_with_backing_queue_state/7]). % Queue's state -record(q, {q, @@ -118,7 +118,7 @@ init(Q) -> {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. init_with_backing_queue_state(Q = #amqqueue{exclusive_owner = Owner}, BQ, BQS, - RateTRef, AckTags, Deliveries) -> + RateTRef, AckTags, Deliveries, GTC) -> ?LOGDEBUG("Queue starting - ~p~n", [Q]), case Owner of none -> ok; @@ -140,7 +140,7 @@ init_with_backing_queue_state(Q = #amqqueue{exclusive_owner = Owner}, BQ, BQS, expiry_timer_ref = undefined, ttl = undefined, stats_timer = rabbit_event:init_stats_timer(), - guid_to_channel = dict:new()})), + guid_to_channel = GTC})), lists:foldl( fun (Delivery, StateN) -> {_Delivered, StateN1} = deliver_or_enqueue(Delivery, StateN), diff --git a/src/rabbit_mirror_queue_master.erl b/src/rabbit_mirror_queue_master.erl index e2f9b020..b05d6973 100644 --- a/src/rabbit_mirror_queue_master.erl +++ b/src/rabbit_mirror_queue_master.erl @@ -37,7 +37,7 @@ backing_queue, backing_queue_state, set_delivered, - seen + seen_status }). %% --------------------------------------------------------------------------- @@ -70,15 +70,15 @@ init(#amqqueue { arguments = Args } = Q, Recover) -> backing_queue = BQ, backing_queue_state = BQS, set_delivered = 0, - seen = sets:new() }. + seen_status = dict:new() }. -promote_backing_queue_state(CPid, BQ, BQS, GM, Seen) -> +promote_backing_queue_state(CPid, BQ, BQS, GM, SeenStatus) -> #state { gm = GM, coordinator = CPid, backing_queue = BQ, backing_queue_state = BQS, set_delivered = BQ:len(BQS), - seen = Seen }. + seen_status = SeenStatus }. terminate(State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> %% Backing queue termination. The queue is going down but @@ -102,29 +102,61 @@ purge(State = #state { gm = GM, set_delivered = 0 }}. publish(Msg = #basic_message { guid = Guid }, MsgProps, ChPid, - State = #state { gm = GM, - backing_queue = BQ, - backing_queue_state = BQS, - seen = Seen }) -> - case sets:is_element(Guid, Seen) of - true -> State #state { seen = sets:del_element(Guid, Seen) }; - false -> ok = gm:broadcast(GM, {publish, false, ChPid, MsgProps, Msg}), - BQS1 = BQ:publish(Msg, MsgProps, ChPid, BQS), - State #state { backing_queue_state = BQS1 } - end. + State = #state { gm = GM, + backing_queue = BQ }) -> + {ok, State1} = + maybe_publish( + fun (BQS) -> + ok = gm:broadcast(GM, {publish, false, ChPid, MsgProps, Msg}), + {ok, BQ:publish(Msg, MsgProps, ChPid, BQS)} + end, State), + State1. publish_delivered(AckRequired, Msg = #basic_message { guid = Guid }, MsgProps, - ChPid, State = #state { gm = GM, - backing_queue = BQ, - backing_queue_state = BQS, - seen = Seen }) -> - case sets:is_element(Guid, Seen) of - true -> State #state { seen = sets:del_element(Guid, Seen) }; - false -> ok = gm:broadcast(GM, {publish, {true, AckRequired}, ChPid, - MsgProps, Msg}), - {AckTag, BQS1} = BQ:publish_delivered(AckRequired, Msg, - MsgProps, ChPid, BQS), - {AckTag, State #state { backing_queue_state = BQS1 }} + ChPid, State = #state { gm = GM, + backing_queue = BQ }) -> + case maybe_publish( + fun (BQS) -> + ok = gm:broadcast(GM, {publish, {true, AckRequired}, ChPid, + MsgProps, Msg}), + BQ:publish_delivered(AckRequired, Msg, MsgProps, ChPid, BQS) + end, State) of + {ok, State1} -> + %% publish_delivered but we've already published this + %% message. This means that we received the msg when we + %% were a slave but only via GM, not from the + %% channel. + %% + %% If AckRequired then we would have requeued the message + %% upon our promotion to master. Astonishingly, we think + %% we're empty, which means that someone else has already + %% consumed the message post requeue, and now we're about + %% to send it to another consumer. This could not be more + %% wrong. + +maybe_publish(Fun, State = #state { seen_status = SS, + backing_queue_state = BQS }) -> + %% We will never see {published, ChPid, MsgSeqNo} here. + case dict:find(Guid, SS) of + error -> + {Result, BQS1} = Fun(BQS), + {Result, State #state { backing_queue_state = BQS1 }}; + {ok, {published, ChPid}} -> + %% It already got published when we were a slave and no + %% confirmation is waiting. amqqueue_process will have + %% recorded if there's a confirm due to arrive, so can + %% delete entry. + {ok, State #state { seen_status = dict:erase(Guid, SS) }}; + {ok, {confirmed, ChPid}} -> + %% It got confirmed before we became master, but we've + %% only just received the publish from the channel, so + %% couldn't previously know what the msg_seq_no was. Thus + %% confirm now. amqqueue_process will have recorded a + %% confirm is due immediately prior to here (and thus _it_ + %% knows the guid -> msg_seq_no mapping). + ok = rabbit_amqqueue:maybe_run_queue_via_backing_queue_async( + self(), ?MODULE, fun (State1) -> {[Guid], State1} end), + {ok, State #state { seen_status = dict:erase(Guid, SS) }} end. dropwhile(Fun, State = #state { gm = GM, @@ -143,7 +175,7 @@ fetch(AckRequired, State = #state { gm = GM, backing_queue = BQ, backing_queue_state = BQS, set_delivered = SetDelivered, - seen = Seen }) -> + seen_status = SS }) -> {Result, BQS1} = BQ:fetch(AckRequired, BQS), State1 = State #state { backing_queue_state = BQS1 }, case Result of @@ -154,13 +186,13 @@ fetch(AckRequired, State = #state { gm = GM, ok = gm:broadcast(GM, {fetch, AckRequired, Guid, Remaining}), IsDelivered1 = IsDelivered orelse SetDelivered > 0, SetDelivered1 = lists:max([0, SetDelivered - 1]), - Seen1 = case SetDelivered + SetDelivered1 of - 1 -> sets:new(); %% transition to empty - _ -> Seen - end, + SS1 = case SetDelivered + SetDelivered1 of + 1 -> dict:new(); %% transition to empty + _ -> SS + end, {{Message, IsDelivered1, AckTag, Remaining}, State1 #state { set_delivered = SetDelivered1, - seen = Seen1 }} + seen_status = SS1 }} end. ack(AckTags, State = #state { gm = GM, diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index 93f885ba..5cdae16c 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -133,6 +133,8 @@ handle_call({gm_deaths, Deaths}, From, master_node = MNode }) -> rabbit_log:info("Slave ~p saw deaths ~p for ~s~n", [self(), Deaths, rabbit_misc:rs(QueueName)]), + %% The GM has told us about deaths, which means we're not going to + %% receive any more messages from GM case rabbit_mirror_queue_misc:remove_from_queue(QueueName, Deaths) of {ok, Pid} when node(Pid) =:= MNode -> reply(ok, State); @@ -332,28 +334,31 @@ promote_me(From, #state { q = Q, backing_queue_state = BQS, rate_timer_ref = RateTRef, sender_queues = SQ, - guid_ack = GA }) -> + guid_ack = GA, + guid_status = GS }) -> rabbit_log:info("Promoting slave ~p for ~s~n", [self(), rabbit_misc:rs(Q #amqqueue.name)]), {ok, CPid} = rabbit_mirror_queue_coordinator:start_link(Q, GM), true = unlink(GM), gen_server2:reply(From, {promote, CPid}), ok = gm:confirmed_broadcast(GM, heartbeat), - %% TODO fix up seen MasterState = rabbit_mirror_queue_master:promote_backing_queue_state( - CPid, BQ, BQS, GM, sets:new()), + CPid, BQ, BQS, GM, GS), %% We have to do the requeue via this init because otherwise we %% don't have access to the relevent MsgPropsFun. Also, we are %% already in mnesia as the master queue pid. Thus we cannot just %% publish stuff by sending it to ourself - we must pass it %% through to this init, otherwise we can violate ordering %% constraints. + GTC = dict:from_list( + [{Guid, {ChPid, MsgSeqNo}} || + {Guid, {published, ChPid, MsgSeqNo}} <- dict:to_list(GS)]), AckTags = [AckTag || {_Guid, AckTag} <- dict:to_list(GA)], Deliveries = lists:append([queue:to_list(PubQ) || {_ChPid, PubQ} <- dict:to_list(SQ)]), QueueState = rabbit_amqqueue_process:init_with_backing_queue_state( Q, rabbit_mirror_queue_master, MasterState, RateTRef, - AckTags, Deliveries), + AckTags, Deliveries, GTC), {become, rabbit_amqqueue_process, QueueState, hibernate}. noreply(State) -> -- cgit v1.2.1 From 5592ce2dbf48b7d3171e875c775667d4dd9f8fa3 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Tue, 8 Mar 2011 13:01:14 +0000 Subject: add BQ:validate_message --- include/rabbit_backing_queue_spec.hrl | 2 + src/rabbit_amqqueue_process.erl | 72 ++++++++++++++++++++++------------- src/rabbit_backing_queue.erl | 5 ++- src/rabbit_mirror_queue_master.erl | 5 +++ src/rabbit_mirror_queue_slave.erl | 13 ++++++- src/rabbit_variable_queue.erl | 4 +- 6 files changed, 71 insertions(+), 30 deletions(-) diff --git a/include/rabbit_backing_queue_spec.hrl b/include/rabbit_backing_queue_spec.hrl index 9f4f76ca..4ef13cb3 100644 --- a/include/rabbit_backing_queue_spec.hrl +++ b/include/rabbit_backing_queue_spec.hrl @@ -68,3 +68,5 @@ -spec(status/1 :: (state()) -> [{atom(), any()}]). -spec(invoke/3 :: (atom(), fun ((A) -> A), state()) -> {[rabbit_guid:guid()], state()}). +-spec(validate_message/2 :: + (rabbit_types:basic_message(), state()) -> boolean()). diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl index 8c19aa16..0c9eba9d 100644 --- a/src/rabbit_amqqueue_process.erl +++ b/src/rabbit_amqqueue_process.erl @@ -487,46 +487,64 @@ attempt_delivery(#delivery{txn = none, sender = ChPid, message = Message, msg_seq_no = MsgSeqNo}, - {NeedsConfirming, State = #q{backing_queue = BQ}}) -> + {NeedsConfirming, State = #q{backing_queue = BQ, + backing_queue_state = BQS}}) -> %% must confirm immediately if it has a MsgSeqNo and not NeedsConfirming case {NeedsConfirming, MsgSeqNo} of {_, undefined} -> ok; {no_confirm, _} -> rabbit_channel:confirm(ChPid, [MsgSeqNo]); {confirm, _} -> ok end, - PredFun = fun (IsEmpty, _State) -> not IsEmpty end, - DeliverFun = - fun (AckRequired, false, State1 = #q{backing_queue_state = BQS}) -> - %% we don't need an expiry here because messages are - %% not being enqueued, so we use an empty - %% message_properties. - {AckTag, BQS1} = - BQ:publish_delivered( - AckRequired, Message, - (?BASE_MESSAGE_PROPERTIES)#message_properties{ - needs_confirming = (NeedsConfirming =:= confirm)}, - ChPid, BQS), - {{Message, false, AckTag}, true, - State1#q{backing_queue_state = BQS1}} - end, - {Delivered, State1} = - deliver_msgs_to_consumers({ PredFun, DeliverFun }, false, State), - {Delivered, NeedsConfirming, State1}; + case BQ:validate_message(Message, BQS) of + {invalid, _Bool} = Invalid -> + {Invalid, NeedsConfirming, State}; + valid -> + PredFun = fun (IsEmpty, _State) -> not IsEmpty end, + DeliverFun = + fun (AckRequired, false, + State1 = #q{backing_queue_state = BQS1}) -> + %% we don't need an expiry here because + %% messages are not being enqueued, so we use + %% an empty message_properties. + {AckTag, BQS2} = + BQ:publish_delivered( + AckRequired, Message, + (?BASE_MESSAGE_PROPERTIES)#message_properties{ + needs_confirming = + (NeedsConfirming =:= confirm)}, + ChPid, BQS1), + {{Message, false, AckTag}, true, + State1#q{backing_queue_state = BQS2}} + end, + {Delivered, State1} = + deliver_msgs_to_consumers({ PredFun, DeliverFun }, false, + State), + {{valid, Delivered}, NeedsConfirming, State1} + end; attempt_delivery(#delivery{txn = Txn, sender = ChPid, message = Message}, {NeedsConfirming, State = #q{backing_queue = BQ, backing_queue_state = BQS}}) -> - store_ch_record((ch_record(ChPid))#cr{txn = Txn}), - BQS1 = BQ:tx_publish(Txn, Message, ?BASE_MESSAGE_PROPERTIES, ChPid, BQS), - {true, NeedsConfirming, State#q{backing_queue_state = BQS1}}. + case BQ:validate_message(Message, BQS) of + {invalid, _Reason} = Invalid -> + {Invalid, NeedsConfirming, State}; + valid -> + store_ch_record((ch_record(ChPid))#cr{txn = Txn}), + BQS1 = BQ:tx_publish(Txn, Message, ?BASE_MESSAGE_PROPERTIES, ChPid, + BQS), + {{valid, true}, NeedsConfirming, + State#q{backing_queue_state = BQS1}} + end. deliver_or_enqueue(Delivery, State) -> case attempt_delivery(Delivery, record_confirm_message(Delivery, State)) of - {true, _, State1} -> + {{invalid, seen}, _, State1} -> + {true, State1}; + {{valid, true}, _, State1} -> {true, State1}; - {false, NeedsConfirming, State1 = #q{backing_queue = BQ, - backing_queue_state = BQS}} -> + {{valid, false}, NeedsConfirming, + State1 = #q{backing_queue = BQ, backing_queue_state = BQS}} -> #delivery{message = Message} = Delivery, BQS1 = BQ:publish(Message, (message_properties(State)) #message_properties{ @@ -863,9 +881,9 @@ handle_call({deliver_immediately, Delivery}, _From, State) -> %% just all ready-to-consume queues get the message, with unready %% queues discarding the message? %% - {Delivered, _NeedsConfirming, State1} = + {{_Valid, Bool}, _NeedsConfirming, State1} = attempt_delivery(Delivery, record_confirm_message(Delivery, State)), - reply(Delivered, State1); + reply(Bool, State1); handle_call({deliver, Delivery}, From, State) -> %% Synchronous, "mandatory" delivery mode. Reply asap. diff --git a/src/rabbit_backing_queue.erl b/src/rabbit_backing_queue.erl index d42fe140..726b9bef 100644 --- a/src/rabbit_backing_queue.erl +++ b/src/rabbit_backing_queue.erl @@ -127,7 +127,10 @@ behaviour_info(callbacks) -> %% Passed a function to be invoked with the relevant backing %% queue's state. Useful for when the backing queue or other %% components need to pass functions into the backing queue. - {invoke, 3} + {invoke, 3}, + + %% TODO: document me + {validate_message, 2} ]; behaviour_info(_Other) -> undefined. diff --git a/src/rabbit_mirror_queue_master.erl b/src/rabbit_mirror_queue_master.erl index 800d9453..513a8bb5 100644 --- a/src/rabbit_mirror_queue_master.erl +++ b/src/rabbit_mirror_queue_master.erl @@ -280,3 +280,8 @@ invoke(Mod, Fun, State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> {MsgIds, BQS1} = BQ:invoke(Mod, Fun, BQS), {MsgIds, State #state { backing_queue_state = BQS1 }}. + +validate_message(Message, #state { backing_queue = BQ, + backing_queue_state = BSQ }) -> + %% this will definitely change. + BQ:validate_message(Message, BQS). diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index d9ad7120..0aedff59 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -119,6 +119,16 @@ init([#amqqueue { name = QueueName } = Q]) -> handle_call({deliver_immediately, Delivery = #delivery {}}, From, State) -> %% Synchronous, "immediate" delivery mode + %% + %% TODO: we cannot reply here because we may not have received + %% this from gm, and indeed the master might die before it + %% receives it. Thus if we are promoted to master at that point + %% then we must reply appropriately. So we're going to have to + %% enqueue it, record that it needs a reply, and then reply either + %% when we get the nod via gm, or, if we're promoted, in the mean + %% time we'll have to figure out something else... Of course, if + %% we've already seen it from gm then we're going to have to reply + %% now. gen_server2:reply(From, false), %% master may deliver it, not us noreply(maybe_enqueue_message(Delivery, State)); @@ -419,7 +429,7 @@ maybe_enqueue_message( msg_seq_no = MsgSeqNo, sender = ChPid }, State = #state { sender_queues = SQ, - msg_id_status = MS }) -> + msg_id_status = MS }) -> %% We will never see {published, ChPid, MsgSeqNo} here. case dict:find(MsgId, MS) of error -> @@ -506,6 +516,7 @@ process_instruction( State1 = State #state { sender_queues = SQ1, msg_id_status = MS2 }, + %% we probably want to work in BQ:validate_message here {ok, case Deliver of false -> diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl index 7019efbb..4ad46f1a 100644 --- a/src/rabbit_variable_queue.erl +++ b/src/rabbit_variable_queue.erl @@ -22,7 +22,7 @@ requeue/3, len/1, is_empty/1, dropwhile/2, set_ram_duration_target/2, ram_duration/1, needs_idle_timeout/1, idle_timeout/1, handle_pre_hibernate/1, - status/1, invoke/3, multiple_routing_keys/0]). + status/1, invoke/3, validate_message/2, multiple_routing_keys/0]). -export([start/1, stop/0]). @@ -868,6 +868,8 @@ status(#vqstate { invoke(?MODULE, Fun, State) -> Fun(State). +validate_message(_Msg, _State) -> true. + %%---------------------------------------------------------------------------- %% Minor helpers %%---------------------------------------------------------------------------- -- cgit v1.2.1 From c8edb5712e80bcf0829ee03ceccb56fac898e4f6 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Tue, 8 Mar 2011 13:13:24 +0000 Subject: oops --- include/rabbit_backing_queue_spec.hrl | 4 ++-- src/rabbit_variable_queue.erl | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/rabbit_backing_queue_spec.hrl b/include/rabbit_backing_queue_spec.hrl index 4ef13cb3..8f010e58 100644 --- a/include/rabbit_backing_queue_spec.hrl +++ b/include/rabbit_backing_queue_spec.hrl @@ -68,5 +68,5 @@ -spec(status/1 :: (state()) -> [{atom(), any()}]). -spec(invoke/3 :: (atom(), fun ((A) -> A), state()) -> {[rabbit_guid:guid()], state()}). --spec(validate_message/2 :: - (rabbit_types:basic_message(), state()) -> boolean()). +-spec(validate_message/2 :: (rabbit_types:basic_message(), state()) -> + {'valid'|'invalid', boolean()}). diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl index 4ad46f1a..a3f397cc 100644 --- a/src/rabbit_variable_queue.erl +++ b/src/rabbit_variable_queue.erl @@ -868,7 +868,7 @@ status(#vqstate { invoke(?MODULE, Fun, State) -> Fun(State). -validate_message(_Msg, _State) -> true. +validate_message(_Msg, _State) -> valid. %%---------------------------------------------------------------------------- %% Minor helpers -- cgit v1.2.1 From 4debf471dc5ba63dfb7ad50db24835844eb61939 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Tue, 8 Mar 2011 13:24:18 +0000 Subject: Hmm, I'd forgotten than deliver_immediately == false => the msg _isn't_ enqueued --- src/rabbit_mirror_queue_slave.erl | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index 0aedff59..e17eef01 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -119,16 +119,15 @@ init([#amqqueue { name = QueueName } = Q]) -> handle_call({deliver_immediately, Delivery = #delivery {}}, From, State) -> %% Synchronous, "immediate" delivery mode - %% - %% TODO: we cannot reply here because we may not have received - %% this from gm, and indeed the master might die before it - %% receives it. Thus if we are promoted to master at that point - %% then we must reply appropriately. So we're going to have to - %% enqueue it, record that it needs a reply, and then reply either - %% when we get the nod via gm, or, if we're promoted, in the mean - %% time we'll have to figure out something else... Of course, if - %% we've already seen it from gm then we're going to have to reply - %% now. + + %% It is safe to reply 'false' here even if a) we've not seen the + %% msg via gm, or b) the master dies before we receive the msg via + %% gm. In the case of (a), we will eventually receive the msg via + %% gm, and it's only the master's result to the channel that is + %% important. In the case of (b), if the master does die and we do + %% get promoted then at that point we have no consumers, thus + %% 'false' is precisely the correct answer. However, we must be + %% careful to _not_ enqueue the message in this case. gen_server2:reply(From, false), %% master may deliver it, not us noreply(maybe_enqueue_message(Delivery, State)); -- cgit v1.2.1 From a0da6633d6c511267d14cdc79771dfdc73e5d17f Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Tue, 8 Mar 2011 15:44:58 +0000 Subject: Just some more notes --- src/rabbit_mirror_queue_slave.erl | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index e17eef01..87ce31d8 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -359,7 +359,18 @@ promote_me(From, #state { q = Q, %% publish stuff by sending it to ourself - we must pass it %% through to this init, otherwise we can violate ordering %% constraints. - GTC = dict:from_list( + + %% MTC should contain only entries for which we are still + %% expecting confirms to come back to use from the underlying BQ. + + %% TODO: what do we do with entries in MS that are 'confirmed' + %% already? Well they should end up in the master queue's state, + %% and the confirms should be issued either by the + %% amqqueue_process if 'immediately', or otherwise by the master + %% queue on validate_message?! That's disgusting. There's no way + %% validate_message should be side-effecting... though we could at + %% least ensure it's idempotent. Hmm. + MTC = dict:from_list( [{MsgId, {ChPid, MsgSeqNo}} || {MsgId, {published, ChPid, MsgSeqNo}} <- dict:to_list(MS)]), AckTags = [AckTag || {_MsgId, AckTag} <- dict:to_list(MA)], @@ -367,7 +378,7 @@ promote_me(From, #state { q = Q, || {_ChPid, PubQ} <- dict:to_list(SQ)]), QueueState = rabbit_amqqueue_process:init_with_backing_queue_state( Q, rabbit_mirror_queue_master, MasterState, RateTRef, - AckTags, Deliveries, GTC), + AckTags, Deliveries, MTC), {become, rabbit_amqqueue_process, QueueState, hibernate}. noreply(State) -> -- cgit v1.2.1 From fc071687a13dcde2b3431cad21e7ca270c6f2898 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Wed, 9 Mar 2011 13:19:10 +0000 Subject: just stashing this whilst I fix something else --- src/rabbit_amqqueue_process.erl | 2 +- src/rabbit_mirror_queue_master.erl | 112 ++++++++++++++++++------------------- 2 files changed, 56 insertions(+), 58 deletions(-) diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl index 0c9eba9d..b3e04337 100644 --- a/src/rabbit_amqqueue_process.erl +++ b/src/rabbit_amqqueue_process.erl @@ -539,7 +539,7 @@ attempt_delivery(#delivery{txn = Txn, deliver_or_enqueue(Delivery, State) -> case attempt_delivery(Delivery, record_confirm_message(Delivery, State)) of - {{invalid, seen}, _, State1} -> + {{invalid, _Bool}, _, State1} -> {true, State1}; {{valid, true}, _, State1} -> {true, State1}; diff --git a/src/rabbit_mirror_queue_master.erl b/src/rabbit_mirror_queue_master.erl index 513a8bb5..a1e2a49a 100644 --- a/src/rabbit_mirror_queue_master.erl +++ b/src/rabbit_mirror_queue_master.erl @@ -102,62 +102,30 @@ purge(State = #state { gm = GM, set_delivered = 0 }}. publish(Msg = #basic_message { id = MsgId }, MsgProps, ChPid, - State = #state { gm = GM, - backing_queue = BQ }) -> - {ok, State1} = - maybe_publish( - fun (BQS) -> - ok = gm:broadcast(GM, {publish, false, ChPid, MsgProps, Msg}), - {ok, BQ:publish(Msg, MsgProps, ChPid, BQS)} - end, MsgId, State), - State1. + State = #state { gm = GM, + seen_status = SS, + backing_queue = BQ, + backing_queue_state = BQS }) -> + false = dict:is_key(MsgId, SS), %% ASSERTION + ok = gm:broadcast(GM, {publish, false, ChPid, MsgProps, Msg}), + BQS1 = BQ:publish(Msg, MsgProps, ChPid, BQS), + State #state { backing_queue_state = BQS1 }. publish_delivered(AckRequired, Msg = #basic_message { id = MsgId }, MsgProps, - ChPid, State = #state { gm = GM, - backing_queue = BQ }) -> - case maybe_publish( - fun (BQS) -> - ok = gm:broadcast(GM, {publish, {true, AckRequired}, ChPid, - MsgProps, Msg}), - BQ:publish_delivered(AckRequired, Msg, MsgProps, ChPid, BQS) - end, MsgId, State) of - {ok, State1} -> - %% publish_delivered but we've already published this - %% message. This means that we received the msg when we - %% were a slave but only via GM, not from the - %% channel. - %% - %% If AckRequired then we would have requeued the message - %% upon our promotion to master. Astonishingly, we think - %% we're empty, which means that someone else has already - %% consumed the message post requeue, and now we're about - %% to send it to another consumer. This could not be more - %% wrong. - -maybe_publish(Fun, MsgId, State = #state { seen_status = SS, - backing_queue_state = BQS }) -> - %% We will never see {published, ChPid, MsgSeqNo} here. - case dict:find(MsgId, SS) of - error -> - {Result, BQS1} = Fun(BQS), - {Result, State #state { backing_queue_state = BQS1 }}; - {ok, {published, ChPid}} -> - %% It already got published when we were a slave and no - %% confirmation is waiting. amqqueue_process will have - %% recorded if there's a confirm due to arrive, so can - %% delete entry. - {ok, State #state { seen_status = dict:erase(MsgId, SS) }}; - {ok, {confirmed, ChPid}} -> - %% It got confirmed before we became master, but we've - %% only just received the publish from the channel, so - %% couldn't previously know what the msg_seq_no was. Thus - %% confirm now. amqqueue_process will have recorded a - %% confirm is due immediately prior to here (and thus _it_ - %% knows the msg_id -> msg_seq_no mapping). - ok = rabbit_amqqueue:maybe_run_queue_via_backing_queue_async( - self(), ?MODULE, fun (State1) -> {[MsgId], State1} end), - {ok, State #state { seen_status = dict:erase(MsgId, SS) }} - end. + ChPid, State = #state { gm = GM, + backing_queue = BQ, + seen_status = SS, + backing_queue = BQ, + backing_queue_state = BQS }) -> + false = dict:is_key(MsgId, SS), %% ASSERTION + %% Must use confirmed_broadcast here in order to guarantee that + %% all slaves are forced to interpret this publish_delivered at + %% the same point, especially if we die and a slave is promoted. + BQS1 = BQ:publish(Msg, MsgProps, ChPid, BQS), + ok = gm:confirmed_broadcast( + GM, {publish, {true, AckRequired}, ChPid, MsgProps, Msg}), + BQS1 = BQ:publish_delivered(AckRequired, Msg, MsgProps, ChPid, BQS), + State #state { backing_queue_state = BQS1 }. dropwhile(Fun, State = #state { gm = GM, backing_queue = BQ, @@ -281,7 +249,37 @@ invoke(Mod, Fun, State = #state { backing_queue = BQ, {MsgIds, BQS1} = BQ:invoke(Mod, Fun, BQS), {MsgIds, State #state { backing_queue_state = BQS1 }}. -validate_message(Message, #state { backing_queue = BQ, +validate_message(Message, #state { seen_status = SS, + backing_queue = BQ, backing_queue_state = BSQ }) -> - %% this will definitely change. - BQ:validate_message(Message, BQS). + %% Here, we need to deal with the possibility that we're about to + %% receive a message that we've already seen when we were a slave + %% (we received it via gm). Thus if we do receive such message now + %% via the channel, there may be a confirm waiting to issue for + %% it. + + %% We will never see {published, ChPid, MsgSeqNo} here. + case dict:find(MsgId, SS) of + error -> + %% We permit the underlying BQ to have a peek at it, but + %% only if we ourselves are not filtering out the msg. + BQ:validate_message(Message, BQS); + {ok, {published, ChPid}} -> + %% It already got published when we were a slave and no + %% confirmation is waiting. amqqueue_process will have, in + %% its msg_id_to_channel mapping, the entry for dealing + %% with the confirm when that comes back in, so the msg is + %% invalid, and we don't need to do anything further here. + {invalid, false}; + {ok, {confirmed, ChPid}} -> + %% It got confirmed before we became master, but we've + %% only just received the publish from the channel, so + %% couldn't previously know what the msg_seq_no was. Thus + %% confirm now. amqqueue_process will have recorded a + %% confirm is due immediately prior to here (and thus _it_ + %% knows the msg_id -> msg_seq_no mapping). + ok = rabbit_amqqueue:maybe_run_queue_via_backing_queue_async( + self(), ?MODULE, fun (State1) -> {[MsgId], State1} end), + {ok, State #state { seen_status = dict:erase(MsgId, SS) }} + end. + -- cgit v1.2.1 From a58c4974332cd71c0ed59a710f0dda67c5002c83 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Wed, 9 Mar 2011 15:08:41 +0000 Subject: Sort out and clean the validate_message stuff. It compiles again now... --- src/rabbit_amqqueue_process.erl | 24 ++++++++++-------------- src/rabbit_mirror_queue_master.erl | 12 ++++++------ src/rabbit_mirror_queue_slave.erl | 14 ++++++++++---- 3 files changed, 26 insertions(+), 24 deletions(-) diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl index eb3b13cc..81e260bd 100644 --- a/src/rabbit_amqqueue_process.erl +++ b/src/rabbit_amqqueue_process.erl @@ -494,7 +494,9 @@ attempt_delivery(#delivery{txn = none, end, case BQ:validate_message(Message, BQS) of {invalid, BQS1} -> - {invalid, NeedsConfirming, State#q{backing_queue_state = BQS1}}; + %% if the message is invalid, we pretend it was delivered + %% fine + {true, NeedsConfirming, State#q{backing_queue_state = BQS1}}; {valid, BQS1} -> PredFun = fun (IsEmpty, _State) -> not IsEmpty end, DeliverFun = @@ -516,7 +518,7 @@ attempt_delivery(#delivery{txn = none, {Delivered, State2} = deliver_msgs_to_consumers({ PredFun, DeliverFun }, false, State#q{backing_queue_state = BQS1}), - {{valid, Delivered}, NeedsConfirming, State2} + {Delivered, NeedsConfirming, State2} end; attempt_delivery(#delivery{txn = Txn, sender = ChPid, @@ -525,22 +527,19 @@ attempt_delivery(#delivery{txn = Txn, backing_queue_state = BQS}}) -> case BQ:validate_message(Message, BQS) of {invalid, BQS1} -> - {invalid, NeedsConfirming, State#q{backing_queue_state = BQS1}}; + {true, NeedsConfirming, State#q{backing_queue_state = BQS1}}; {valid, BQS1} -> store_ch_record((ch_record(ChPid))#cr{txn = Txn}), BQS2 = BQ:tx_publish(Txn, Message, ?BASE_MESSAGE_PROPERTIES, ChPid, BQS1), - {{valid, true}, NeedsConfirming, - State#q{backing_queue_state = BQS2}} + {true, NeedsConfirming, State#q{backing_queue_state = BQS2}} end. deliver_or_enqueue(Delivery, State) -> case attempt_delivery(Delivery, record_confirm_message(Delivery, State)) of - {invalid, _, State1} -> + {true, _, State1} -> State1; - {{valid, true}, _, State1} -> - State1; - {{valid, false}, NeedsConfirming, + {false, NeedsConfirming, State1 = #q{backing_queue = BQ, backing_queue_state = BQS}} -> #delivery{message = Message} = Delivery, BQS1 = BQ:publish(Message, @@ -878,12 +877,9 @@ handle_call({deliver_immediately, Delivery}, _From, State) -> %% just all ready-to-consume queues get the message, with unready %% queues discarding the message? %% - {Valid, _NeedsConfirming, State1} = + {Delivered, _NeedsConfirming, State1} = attempt_delivery(Delivery, record_confirm_message(Delivery, State)), - reply(case Valid of - valid -> true; - invalid -> false - end, State1); + reply(Delivered, State1); handle_call({deliver, Delivery}, From, State) -> %% Synchronous, "mandatory" delivery mode. Reply asap. diff --git a/src/rabbit_mirror_queue_master.erl b/src/rabbit_mirror_queue_master.erl index dd2357bb..704e62c1 100644 --- a/src/rabbit_mirror_queue_master.erl +++ b/src/rabbit_mirror_queue_master.erl @@ -22,7 +22,7 @@ requeue/3, len/1, is_empty/1, dropwhile/2, set_ram_duration_target/2, ram_duration/1, needs_idle_timeout/1, idle_timeout/1, handle_pre_hibernate/1, - status/1, invoke/3]). + status/1, invoke/3, validate_message/2]). -export([start/1, stop/0]). @@ -113,7 +113,6 @@ publish(Msg = #basic_message { id = MsgId }, MsgProps, ChPid, publish_delivered(AckRequired, Msg = #basic_message { id = MsgId }, MsgProps, ChPid, State = #state { gm = GM, - backing_queue = BQ, seen_status = SS, backing_queue = BQ, backing_queue_state = BQS }) -> @@ -252,7 +251,7 @@ invoke(Mod, Fun, State = #state { backing_queue = BQ, validate_message(Message = #basic_message { id = MsgId }, State = #state { seen_status = SS, backing_queue = BQ, - backing_queue_state = BSQ }) -> + backing_queue_state = BQS }) -> %% Here, we need to deal with the possibility that we're about to %% receive a message that we've already seen when we were a slave %% (we received it via gm). Thus if we do receive such message now @@ -266,14 +265,15 @@ validate_message(Message = #basic_message { id = MsgId }, %% only if we ourselves are not filtering out the msg. {Result, BQS1} = BQ:validate_message(Message, BQS), {Result, State #state { backing_queue_state = BQS1 }}; - {ok, {published, ChPid}} -> + {ok, {published, _ChPid}} -> %% It already got published when we were a slave and no %% confirmation is waiting. amqqueue_process will have, in %% its msg_id_to_channel mapping, the entry for dealing %% with the confirm when that comes back in. The msg is - %% invalid. We will not see this again, so erase. + %% invalid. We will not see this again, nor will we be + %% further involved in confirming this message, so erase. {invalid, State #state { seen_status = dict:erase(MsgId, SS) }}; - {ok, {confirmed, ChPid}} -> + {ok, {confirmed, _ChPid}} -> %% It got confirmed before we became master, but we've %% only just received the publish from the channel, so %% couldn't previously know what the msg_seq_no was. Thus diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index 87ce31d8..68dd50e2 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -128,13 +128,17 @@ handle_call({deliver_immediately, Delivery = #delivery {}}, From, State) -> %% get promoted then at that point we have no consumers, thus %% 'false' is precisely the correct answer. However, we must be %% careful to _not_ enqueue the message in this case. + + %% Note this is distinct from the case where we receive the msg + %% via gm first, then we're promoted to master, and only then do + %% we receive the msg from the channel. gen_server2:reply(From, false), %% master may deliver it, not us - noreply(maybe_enqueue_message(Delivery, State)); + noreply(maybe_enqueue_message(Delivery, false, State)); handle_call({deliver, Delivery = #delivery {}}, From, State) -> %% Synchronous, "mandatory" delivery mode gen_server2:reply(From, true), %% amqqueue throws away the result anyway - noreply(maybe_enqueue_message(Delivery, State)); + noreply(maybe_enqueue_message(Delivery, true, State)); handle_call({gm_deaths, Deaths}, From, State = #state { q = #amqqueue { name = QueueName }, @@ -170,7 +174,7 @@ handle_cast({gm, Instruction}, State) -> handle_cast({deliver, Delivery = #delivery {}}, State) -> %% Asynchronous, non-"mandatory", non-"immediate" deliver mode. - noreply(maybe_enqueue_message(Delivery, State)); + noreply(maybe_enqueue_message(Delivery, true, State)); handle_cast({set_maximum_since_use, Age}, State) -> ok = file_handle_cache:set_maximum_since_use(Age), @@ -438,6 +442,7 @@ maybe_enqueue_message( Delivery = #delivery { message = #basic_message { id = MsgId }, msg_seq_no = MsgSeqNo, sender = ChPid }, + EnqueueOnPromotion, State = #state { sender_queues = SQ, msg_id_status = MS }) -> %% We will never see {published, ChPid, MsgSeqNo} here. @@ -447,7 +452,8 @@ maybe_enqueue_message( {ok, MQ1} -> MQ1; error -> queue:new() end, - SQ1 = dict:store(ChPid, queue:in(Delivery, MQ), SQ), + SQ1 = dict:store(ChPid, + queue:in({Delivery, EnqueueOnPromotion}, MQ), SQ), State #state { sender_queues = SQ1 }; {ok, {confirmed, ChPid}} -> %% BQ has confirmed it but we didn't know what the -- cgit v1.2.1 From e52e2d3b2aa322e28bc5e1e37246548de2d3c8a6 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Wed, 9 Mar 2011 15:12:48 +0000 Subject: fix --- src/rabbit_mirror_queue_slave.erl | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index 68dd50e2..1a239880 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -501,9 +501,10 @@ process_instruction( case queue:out(MQ) of {empty, _MQ} -> {SQ, MS1}; - {{value, Delivery = #delivery { - msg_seq_no = MsgSeqNo, - message = #basic_message { id = MsgId } }}, + {{value, {Delivery = #delivery { + msg_seq_no = MsgSeqNo, + message = #basic_message { id = MsgId } }, + _EnqueueOnPromotion}}, MQ1} -> %% We received the msg from the channel %% first. Thus we need to deal with confirms @@ -519,7 +520,7 @@ process_instruction( ok = rabbit_channel:confirm(ChPid, [MsgSeqNo]), MS end}; - {{value, #delivery {}}, _MQ1} -> + {{value, {#delivery {}, _EnqueueOnPromotion}}, _MQ1} -> %% The instruction was sent to us before we %% were within the mirror_pids within the %% #amqqueue{} record. We'll never receive the -- cgit v1.2.1 From 64fbfeb070380fe31a4242d8f3c0c2384adafd2d Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Wed, 9 Mar 2011 15:26:30 +0000 Subject: Correct documentation. Code is still wrong though --- src/rabbit_mirror_queue_slave.erl | 46 ++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index 1a239880..481c9dd4 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -357,23 +357,35 @@ promote_me(From, #state { q = Q, ok = gm:confirmed_broadcast(GM, heartbeat), MasterState = rabbit_mirror_queue_master:promote_backing_queue_state( CPid, BQ, BQS, GM, MS), - %% We have to do the requeue via this init because otherwise we - %% don't have access to the relevent MsgPropsFun. Also, we are - %% already in mnesia as the master queue pid. Thus we cannot just - %% publish stuff by sending it to ourself - we must pass it - %% through to this init, otherwise we can violate ordering - %% constraints. - - %% MTC should contain only entries for which we are still - %% expecting confirms to come back to use from the underlying BQ. - - %% TODO: what do we do with entries in MS that are 'confirmed' - %% already? Well they should end up in the master queue's state, - %% and the confirms should be issued either by the - %% amqqueue_process if 'immediately', or otherwise by the master - %% queue on validate_message?! That's disgusting. There's no way - %% validate_message should be side-effecting... though we could at - %% least ensure it's idempotent. Hmm. + + %% We find all the messages that we've received from channels but + %% not from gm, and if they're due to be enqueued on promotion + %% then we pass them to the + %% queue_process:init_with_backing_queue_state to be enqueued. + + %% We also have to requeue messages which are pending acks: the + %% consumers from the master queue have been lost and so these + %% messages need requeuing. They might also be pending + %% confirmation, and indeed they might also be pending arrival of + %% the publication from the channel itself, if we received both + %% the publication and the fetch via gm first! Requeuing doesn't + %% affect confirmations: if the message was previously pending a + %% confirmation then it still will be, under the same msg_id. So + %% as a master, we need to be prepared to filter out the + %% publication of said messages from the channel (validate_message + %% (thus such requeued messages must remain in the msg_id_status + %% which becomes seen_status in the master)). + + %% Then there are messages we already have in the queue, which are + %% not currently pending acknowledgement: + %% 1. Messages we've only received via gm: + %% Filter out subsequent publication from channel through + %% validate_message. Might have to issue confirms then or + %% later, thus queue_process state will have to know that + %% there's a pending confirm. + %% 2. Messages received via both gm and channel: + %% Queue will have to deal with issuing confirms if necessary. + MTC = dict:from_list( [{MsgId, {ChPid, MsgSeqNo}} || {MsgId, {published, ChPid, MsgSeqNo}} <- dict:to_list(MS)]), -- cgit v1.2.1 From 34024504caa97ec6f85f4d375537d76d7554f350 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Wed, 9 Mar 2011 16:28:52 +0000 Subject: Further improvement of documentation --- src/rabbit_mirror_queue_slave.erl | 38 +++++++++++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index 481c9dd4..8c765d3c 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -355,14 +355,12 @@ promote_me(From, #state { q = Q, true = unlink(GM), gen_server2:reply(From, {promote, CPid}), ok = gm:confirmed_broadcast(GM, heartbeat), - MasterState = rabbit_mirror_queue_master:promote_backing_queue_state( - CPid, BQ, BQS, GM, MS), %% We find all the messages that we've received from channels but %% not from gm, and if they're due to be enqueued on promotion %% then we pass them to the %% queue_process:init_with_backing_queue_state to be enqueued. - + %% %% We also have to requeue messages which are pending acks: the %% consumers from the master queue have been lost and so these %% messages need requeuing. They might also be pending @@ -374,8 +372,8 @@ promote_me(From, #state { q = Q, %% as a master, we need to be prepared to filter out the %% publication of said messages from the channel (validate_message %% (thus such requeued messages must remain in the msg_id_status - %% which becomes seen_status in the master)). - + %% (MS) which becomes seen_status (SS) in the master)). + %% %% Then there are messages we already have in the queue, which are %% not currently pending acknowledgement: %% 1. Messages we've only received via gm: @@ -385,6 +383,36 @@ promote_me(From, #state { q = Q, %% there's a pending confirm. %% 2. Messages received via both gm and channel: %% Queue will have to deal with issuing confirms if necessary. + %% + %% MS contains the following three entry types: + %% + %% {published, ChPid}: + %% published via gm only; pending arrival of publication from + %% channel, maybe pending confirm. + %% + %% {published, ChPid, MsgSeqNo}: + %% published via gm and channel; pending confirm. + %% + %% {confirmed, ChPid}: + %% published via gm only, and confirmed; pending publication + %% from channel. + %% + %% The middle form only, needs to go through to the queue_process + %% state to form the msg_id_to_channel mapping (MTC). + %% + %% The two outer forms only, need to go to the master state + %% seen_status (SS). + %% + %% No messages that are enqueued from SQ at this point will have + %% entries in MS. + %% + %% Messages that are extracted from MA may have entries in MS, and + %% those messages are then requeued. However, as discussed above, + %% this does not affect MS, nor which bits go through to SS in + %% Master, or MTC in queue_process. + + MasterState = rabbit_mirror_queue_master:promote_backing_queue_state( + CPid, BQ, BQS, GM, MS), MTC = dict:from_list( [{MsgId, {ChPid, MsgSeqNo}} || -- cgit v1.2.1 From 4bb562f335972ab764433103286ae1fb04dffb9c Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Wed, 9 Mar 2011 16:39:59 +0000 Subject: There's a chance that might be it --- src/rabbit_mirror_queue_slave.erl | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index 8c765d3c..6369e114 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -397,12 +397,12 @@ promote_me(From, #state { q = Q, %% published via gm only, and confirmed; pending publication %% from channel. %% - %% The middle form only, needs to go through to the queue_process - %% state to form the msg_id_to_channel mapping (MTC). - %% %% The two outer forms only, need to go to the master state %% seen_status (SS). %% + %% The middle form only, needs to go through to the queue_process + %% state to form the msg_id_to_channel mapping (MTC). + %% %% No messages that are enqueued from SQ at this point will have %% entries in MS. %% @@ -411,15 +411,21 @@ promote_me(From, #state { q = Q, %% this does not affect MS, nor which bits go through to SS in %% Master, or MTC in queue_process. + SS = dict:filter(fun ({published, _ChPid}) -> true; + ({published, _ChPid, _MsgSeqNo}) -> false; + ({confirmed, _ChPid}) -> true + end, MS), + MasterState = rabbit_mirror_queue_master:promote_backing_queue_state( - CPid, BQ, BQS, GM, MS), + CPid, BQ, BQS, GM, SS), - MTC = dict:from_list( - [{MsgId, {ChPid, MsgSeqNo}} || - {MsgId, {published, ChPid, MsgSeqNo}} <- dict:to_list(MS)]), + MTC = dict:filter(fun ({published, _ChPid}) -> false; + ({published, _ChPid, _MsgSeqNo}) -> true; + ({confirmed, _ChPid}) -> false + end, MS), AckTags = [AckTag || {_MsgId, AckTag} <- dict:to_list(MA)], - Deliveries = lists:append([queue:to_list(PubQ) - || {_ChPid, PubQ} <- dict:to_list(SQ)]), + Deliveries = [Delivery || {_ChPid, PubQ} <- dict:to_list(SQ), + {Delivery, true} <- queue:to_list(PubQ)], QueueState = rabbit_amqqueue_process:init_with_backing_queue_state( Q, rabbit_mirror_queue_master, MasterState, RateTRef, AckTags, Deliveries, MTC), -- cgit v1.2.1 From da854e15d58fe71ea5478dccc61f88b2eab9d2a1 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Wed, 9 Mar 2011 17:03:07 +0000 Subject: or that might be a bit better... --- src/rabbit_amqqueue_process.erl | 6 ++---- src/rabbit_mirror_queue_slave.erl | 14 +++++++------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl index 81e260bd..d8cd510b 100644 --- a/src/rabbit_amqqueue_process.erl +++ b/src/rabbit_amqqueue_process.erl @@ -142,10 +142,8 @@ init_with_backing_queue_state(Q = #amqqueue{exclusive_owner = Owner}, BQ, BQS, stats_timer = rabbit_event:init_stats_timer(), msg_id_to_channel = MTC})), lists:foldl( - fun (Delivery, StateN) -> - {_Delivered, StateN1} = deliver_or_enqueue(Delivery, StateN), - StateN1 - end, State, Deliveries). + fun (Delivery, StateN) -> deliver_or_enqueue(Delivery, StateN) end, + State, Deliveries). terminate(shutdown, State = #q{backing_queue = BQ}) -> terminate_shutdown(fun (BQS) -> BQ:terminate(BQS) end, State); diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index 6369e114..57ddf8db 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -222,7 +222,7 @@ terminate(Reason, #state { q = Q, rate_timer_ref = RateTRef }) -> ok = gm:leave(GM), QueueState = rabbit_amqqueue_process:init_with_backing_queue_state( - Q, BQ, BQS, RateTRef, [], []), + Q, BQ, BQS, RateTRef, [], [], dict:new()), rabbit_amqqueue_process:terminate(Reason, QueueState); terminate([_SPid], _Reason) -> %% gm case @@ -411,17 +411,17 @@ promote_me(From, #state { q = Q, %% this does not affect MS, nor which bits go through to SS in %% Master, or MTC in queue_process. - SS = dict:filter(fun ({published, _ChPid}) -> true; - ({published, _ChPid, _MsgSeqNo}) -> false; - ({confirmed, _ChPid}) -> true + SS = dict:filter(fun (_MsgId, {published, _ChPid}) -> true; + (_MsgId, {published, _ChPid, _MsgSeqNo}) -> false; + (_MsgId, {confirmed, _ChPid}) -> true end, MS), MasterState = rabbit_mirror_queue_master:promote_backing_queue_state( CPid, BQ, BQS, GM, SS), - MTC = dict:filter(fun ({published, _ChPid}) -> false; - ({published, _ChPid, _MsgSeqNo}) -> true; - ({confirmed, _ChPid}) -> false + MTC = dict:filter(fun (_MsgId, {published, _ChPid}) -> false; + (_MsgId, {published, _ChPid, _MsgSeqNo}) -> true; + (_MsgId, {confirmed, _ChPid}) -> false end, MS), AckTags = [AckTag || {_MsgId, AckTag} <- dict:to_list(MA)], Deliveries = [Delivery || {_ChPid, PubQ} <- dict:to_list(SQ), -- cgit v1.2.1 From 2ca5a6c3ce467184008067dd5f53876327ce9f01 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Thu, 10 Mar 2011 12:51:49 +0000 Subject: It was right before... --- src/rabbit_mirror_queue_slave.erl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index 57ddf8db..950df509 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -419,10 +419,10 @@ promote_me(From, #state { q = Q, MasterState = rabbit_mirror_queue_master:promote_backing_queue_state( CPid, BQ, BQS, GM, SS), - MTC = dict:filter(fun (_MsgId, {published, _ChPid}) -> false; - (_MsgId, {published, _ChPid, _MsgSeqNo}) -> true; - (_MsgId, {confirmed, _ChPid}) -> false - end, MS), + + MTC = dict:from_list( + [{MsgId, {ChPid, MsgSeqNo}} || + {MsgId, {published, ChPid, MsgSeqNo}} <- dict:to_list(MS)]), AckTags = [AckTag || {_MsgId, AckTag} <- dict:to_list(MA)], Deliveries = [Delivery || {_ChPid, PubQ} <- dict:to_list(SQ), {Delivery, true} <- queue:to_list(PubQ)], -- cgit v1.2.1 From 5f11273b7379510d20b3792d30b8a0c9481353ef Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Thu, 10 Mar 2011 13:50:11 +0000 Subject: Make creation of slaves synchronous. This means we are guaranteed on queue.declare to have all slaves up by the time of the _ok --- src/rabbit_mirror_queue_coordinator.erl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/rabbit_mirror_queue_coordinator.erl b/src/rabbit_mirror_queue_coordinator.erl index 30fd6ed3..bd77c976 100644 --- a/src/rabbit_mirror_queue_coordinator.erl +++ b/src/rabbit_mirror_queue_coordinator.erl @@ -39,7 +39,7 @@ start_link(Queue, GM) -> gen_server2:start_link(?MODULE, [Queue, GM], []). add_slave(CPid, SlaveNode) -> - gen_server2:cast(CPid, {add_slave, SlaveNode}). + gen_server2:call(CPid, {add_slave, SlaveNode}, infinity). get_gm(CPid) -> gen_server2:call(CPid, get_gm, infinity). @@ -67,9 +67,9 @@ init([#amqqueue { name = QueueName } = Q, GM]) -> {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. handle_call(get_gm, _From, State = #state { gm = GM }) -> - reply(GM, State). + reply(GM, State); -handle_cast({add_slave, Node}, State = #state { q = Q }) -> +handle_call({add_slave, Node}, _From, State = #state { q = Q }) -> Nodes = nodes(), case lists:member(Node, Nodes) of true -> @@ -81,7 +81,7 @@ handle_cast({add_slave, Node}, State = #state { q = Q }) -> "Ignoring request to add slave on node ~p for ~s~n", [Node, rabbit_misc:rs(Q #amqqueue.name)]) end, - noreply(State); + reply(ok, State). handle_cast({gm_deaths, Deaths}, State = #state { q = #amqqueue { name = QueueName } }) -> -- cgit v1.2.1 From 4013400d6787ac306c1c1c354a6db8d5a283bfba Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Thu, 10 Mar 2011 16:26:37 +0000 Subject: discovered another entire class of interleaving opportunities I've not considered. Fortunately, the fix turned out to be simple. ish. --- src/rabbit_mirror_queue_master.erl | 48 ++++++++++++++++++++++++++++---------- src/rabbit_mirror_queue_slave.erl | 7 +++--- 2 files changed, 39 insertions(+), 16 deletions(-) diff --git a/src/rabbit_mirror_queue_master.erl b/src/rabbit_mirror_queue_master.erl index 704e62c1..388f5ce3 100644 --- a/src/rabbit_mirror_queue_master.erl +++ b/src/rabbit_mirror_queue_master.erl @@ -243,10 +243,30 @@ status(#state { backing_queue = BQ, backing_queue_state = BQS}) -> invoke(?MODULE, Fun, State) -> Fun(State); -invoke(Mod, Fun, State = #state { backing_queue = BQ, - backing_queue_state = BQS }) -> +invoke(Mod, Fun, State = #state { backing_queue = BQ, + backing_queue_state = BQS, + seen_status = SS }) -> {MsgIds, BQS1} = BQ:invoke(Mod, Fun, BQS), - {MsgIds, State #state { backing_queue_state = BQS1 }}. + {MsgIds1, SS1} = + lists:foldl( + fun (MsgId, {MsgIdsN, SSN}) -> + case dict:find(MsgId, SSN) of + error -> + {[MsgId | MsgIdsN], SSN}; + {ok, published} -> + %% It was published when we were a slave, + %% and we were promoted before we saw the + %% publish from the channel. We still + %% haven't seen the channel publish, and + %% consequently we need to filter out the + %% confirm here. We will issue the confirm + %% when we see the publish from the + %% channel. + {MsgIdsN, dict:store(MsgId, confirmed, SSN)} + end + end, {[], SS}, MsgIds), + {MsgIds1, State #state { backing_queue_state = BQS1, + seen_status = SS1 }}. validate_message(Message = #basic_message { id = MsgId }, State = #state { seen_status = SS, @@ -265,20 +285,24 @@ validate_message(Message = #basic_message { id = MsgId }, %% only if we ourselves are not filtering out the msg. {Result, BQS1} = BQ:validate_message(Message, BQS), {Result, State #state { backing_queue_state = BQS1 }}; - {ok, {published, _ChPid}} -> + {ok, published} -> %% It already got published when we were a slave and no %% confirmation is waiting. amqqueue_process will have, in %% its msg_id_to_channel mapping, the entry for dealing - %% with the confirm when that comes back in. The msg is - %% invalid. We will not see this again, nor will we be + %% with the confirm when that comes back in (it's added + %% immediately prior to calling validate_message). The msg + %% is invalid. We will not see this again, nor will we be %% further involved in confirming this message, so erase. {invalid, State #state { seen_status = dict:erase(MsgId, SS) }}; - {ok, {confirmed, _ChPid}} -> - %% It got confirmed before we became master, but we've - %% only just received the publish from the channel, so - %% couldn't previously know what the msg_seq_no was. Thus - %% confirm now. As above, amqqueue_process will have the - %% entry for the msg_id_to_channel mapping. + {ok, confirmed} -> + %% It got published when we were a slave via gm, and + %% confirmed some time after that (maybe even after + %% promotion), but before we received the publish from the + %% channel, so couldn't previously know what the + %% msg_seq_no was (and thus confirm as a slave). So we + %% need to confirm now. As above, amqqueue_process will + %% have the entry for the msg_id_to_channel mapping added + %% immediately prior to calling validate_message/2. ok = rabbit_amqqueue:maybe_run_queue_via_backing_queue_async( self(), ?MODULE, fun (State1) -> {[MsgId], State1} end), {invalid, State #state { seen_status = dict:erase(MsgId, SS) }} diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index 950df509..d7f86456 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -411,10 +411,9 @@ promote_me(From, #state { q = Q, %% this does not affect MS, nor which bits go through to SS in %% Master, or MTC in queue_process. - SS = dict:filter(fun (_MsgId, {published, _ChPid}) -> true; - (_MsgId, {published, _ChPid, _MsgSeqNo}) -> false; - (_MsgId, {confirmed, _ChPid}) -> true - end, MS), + SS = dict:from_list([{MsgId, Status} + || {MsgId, {Status, _ChPid}} <- dict:to_list(MS), + Status =:= published orelse Status =:= confirmed]), MasterState = rabbit_mirror_queue_master:promote_backing_queue_state( CPid, BQ, BQS, GM, SS), -- cgit v1.2.1 From cf5cdaebf4b5364ed2a1a04a26ff2968de42c00b Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Thu, 10 Mar 2011 16:30:53 +0000 Subject: Remove erroneous loss of state. Whilst this now can cause a memory leak, it only affects the master, the dict is not particularly rich, and it'll only be left with entries for dead channels (most likely channels that were on the old master). Also, we might very well be able to address this through other means --- src/rabbit_mirror_queue_master.erl | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/src/rabbit_mirror_queue_master.erl b/src/rabbit_mirror_queue_master.erl index 388f5ce3..54c718b1 100644 --- a/src/rabbit_mirror_queue_master.erl +++ b/src/rabbit_mirror_queue_master.erl @@ -141,8 +141,7 @@ dropwhile(Fun, State = #state { gm = GM, fetch(AckRequired, State = #state { gm = GM, backing_queue = BQ, backing_queue_state = BQS, - set_delivered = SetDelivered, - seen_status = SS }) -> + set_delivered = SetDelivered }) -> {Result, BQS1} = BQ:fetch(AckRequired, BQS), State1 = State #state { backing_queue_state = BQS1 }, case Result of @@ -153,13 +152,8 @@ fetch(AckRequired, State = #state { gm = GM, ok = gm:broadcast(GM, {fetch, AckRequired, MsgId, Remaining}), IsDelivered1 = IsDelivered orelse SetDelivered > 0, SetDelivered1 = lists:max([0, SetDelivered - 1]), - SS1 = case SetDelivered + SetDelivered1 of - 1 -> dict:new(); %% transition to empty - _ -> SS - end, {{Message, IsDelivered1, AckTag, Remaining}, - State1 #state { set_delivered = SetDelivered1, - seen_status = SS1 }} + State1 #state { set_delivered = SetDelivered1 }} end. ack(AckTags, State = #state { gm = GM, -- cgit v1.2.1 From eb8c2af1781252b6a1eb8d5d65d40f04f589d169 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Fri, 11 Mar 2011 12:27:33 +0000 Subject: Add ability to dynamically add slaves --- src/rabbit_control.erl | 6 +++ src/rabbit_mirror_queue_coordinator.erl | 21 +-------- src/rabbit_mirror_queue_master.erl | 2 +- src/rabbit_mirror_queue_misc.erl | 32 ++++++++++++- src/rabbit_mirror_queue_slave.erl | 81 +++++++++++++++------------------ 5 files changed, 77 insertions(+), 65 deletions(-) diff --git a/src/rabbit_control.erl b/src/rabbit_control.erl index e2c050f5..604b1bfa 100644 --- a/src/rabbit_control.erl +++ b/src/rabbit_control.erl @@ -232,6 +232,12 @@ action(list_queues, Node, Args, Opts, Inform) -> [VHostArg, ArgAtoms]), ArgAtoms); +action(add_queue_mirror, Node, [Queue, MirrorNode], Opts, Inform) -> + Inform("Adding mirror of queue ~p on node ~p~n", [Queue, MirrorNode]), + VHostArg = list_to_binary(proplists:get_value(?VHOST_OPT, Opts)), + rpc_call(Node, rabbit_mirror_queue_misc, add_slave, + [VHostArg, list_to_binary(Queue), list_to_atom(MirrorNode)]); + action(list_exchanges, Node, Args, Opts, Inform) -> Inform("Listing exchanges", []), VHostArg = list_to_binary(proplists:get_value(?VHOST_OPT, Opts)), diff --git a/src/rabbit_mirror_queue_coordinator.erl b/src/rabbit_mirror_queue_coordinator.erl index bd77c976..5fd07e60 100644 --- a/src/rabbit_mirror_queue_coordinator.erl +++ b/src/rabbit_mirror_queue_coordinator.erl @@ -16,7 +16,7 @@ -module(rabbit_mirror_queue_coordinator). --export([start_link/2, add_slave/2, get_gm/1]). +-export([start_link/2, get_gm/1]). -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3]). @@ -38,9 +38,6 @@ start_link(Queue, GM) -> gen_server2:start_link(?MODULE, [Queue, GM], []). -add_slave(CPid, SlaveNode) -> - gen_server2:call(CPid, {add_slave, SlaveNode}, infinity). - get_gm(CPid) -> gen_server2:call(CPid, get_gm, infinity). @@ -67,21 +64,7 @@ init([#amqqueue { name = QueueName } = Q, GM]) -> {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. handle_call(get_gm, _From, State = #state { gm = GM }) -> - reply(GM, State); - -handle_call({add_slave, Node}, _From, State = #state { q = Q }) -> - Nodes = nodes(), - case lists:member(Node, Nodes) of - true -> - Result = rabbit_mirror_queue_slave_sup:start_child(Node, [Q]), - rabbit_log:info("Adding slave node for ~s: ~p~n", - [rabbit_misc:rs(Q #amqqueue.name), Result]); - false -> - rabbit_log:info( - "Ignoring request to add slave on node ~p for ~s~n", - [Node, rabbit_misc:rs(Q #amqqueue.name)]) - end, - reply(ok, State). + reply(GM, State). handle_cast({gm_deaths, Deaths}, State = #state { q = #amqqueue { name = QueueName } }) -> diff --git a/src/rabbit_mirror_queue_master.erl b/src/rabbit_mirror_queue_master.erl index 54c718b1..c5a2e88a 100644 --- a/src/rabbit_mirror_queue_master.erl +++ b/src/rabbit_mirror_queue_master.erl @@ -62,7 +62,7 @@ init(#amqqueue { arguments = Args } = Q, Recover) -> _ -> [list_to_atom(binary_to_list(Node)) || {longstr, Node} <- Nodes] end, - [rabbit_mirror_queue_coordinator:add_slave(CPid, Node) || Node <- Nodes1], + [rabbit_mirror_queue_misc:add_slave(Q, Node) || Node <- Nodes1], {ok, BQ} = application:get_env(backing_queue_module), BQS = BQ:init(Q, Recover), #state { gm = GM, diff --git a/src/rabbit_mirror_queue_misc.erl b/src/rabbit_mirror_queue_misc.erl index 090cb812..23d7c398 100644 --- a/src/rabbit_mirror_queue_misc.erl +++ b/src/rabbit_mirror_queue_misc.erl @@ -16,7 +16,7 @@ -module(rabbit_mirror_queue_misc). --export([remove_from_queue/2]). +-export([remove_from_queue/2, add_slave/2, add_slave/3]). -include("rabbit.hrl"). @@ -44,3 +44,33 @@ remove_from_queue(QueueName, DeadPids) -> end end end). + +add_slave(VHostPath, QueueName, MirrorNode) -> + add_slave(rabbit_misc:r(VHostPath, queue, QueueName), MirrorNode). + +add_slave(Queue, MirrorNode) -> + rabbit_amqqueue:with( + Queue, + fun (#amqqueue { arguments = Args, name = Name, + pid = QPid, mirror_pids = MPids } = Q) -> + case rabbit_misc:table_lookup(Args, <<"x-mirror">>) of + undefined -> + ok; + _ -> + case [MirrorNode || Pid <- [QPid | MPids], + node(Pid) =:= MirrorNode] of + [] -> + Result = + rabbit_mirror_queue_slave_sup:start_child( + MirrorNode, [Q]), + rabbit_log:info("Adding slave node for ~s: ~p~n", + [rabbit_misc:rs(Name), Result]), + case Result of + {ok, _Pid} -> ok; + _ -> Result + end; + [_] -> + {error, queue_already_mirrored_on_node} + end + end + end). diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index d7f86456..064dc329 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -76,46 +76,38 @@ init([#amqqueue { name = QueueName } = Q]) -> end, Self = self(), Node = node(), - case rabbit_misc:execute_mnesia_transaction( - fun () -> - [Q1 = #amqqueue { pid = QPid, mirror_pids = MPids }] = - mnesia:read({rabbit_queue, QueueName}), - case [Pid || Pid <- [QPid | MPids], node(Pid) =:= Node] of - [] -> - MPids1 = MPids ++ [Self], - mnesia:write(rabbit_queue, - Q1 #amqqueue { mirror_pids = MPids1 }, - write), - {ok, QPid}; - _ -> - {error, node_already_present} - end - end) of - {ok, MPid} -> - ok = file_handle_cache:register_callback( - rabbit_amqqueue, set_maximum_since_use, [self()]), - ok = rabbit_memory_monitor:register( - self(), {rabbit_amqqueue, set_ram_duration_target, - [self()]}), - {ok, BQ} = application:get_env(backing_queue_module), - BQS = BQ:init(Q, false), - {ok, #state { q = Q, - gm = GM, - master_node = node(MPid), - backing_queue = BQ, - backing_queue_state = BQS, - rate_timer_ref = undefined, - sync_timer_ref = undefined, - - sender_queues = dict:new(), - msg_id_ack = dict:new(), - msg_id_status = dict:new() - }, hibernate, - {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, - ?DESIRED_HIBERNATE}}; - {error, Error} -> - {stop, Error} - end. + {ok, MPid} = + rabbit_misc:execute_mnesia_transaction( + fun () -> + [Q1 = #amqqueue { pid = QPid, mirror_pids = MPids }] = + mnesia:read({rabbit_queue, QueueName}), + %% ASSERTION + [] = [Pid || Pid <- [QPid | MPids], node(Pid) =:= Node], + MPids1 = MPids ++ [Self], + mnesia:write(rabbit_queue, + Q1 #amqqueue { mirror_pids = MPids1 }, + write), + {ok, QPid} + end), + ok = file_handle_cache:register_callback( + rabbit_amqqueue, set_maximum_since_use, [self()]), + ok = rabbit_memory_monitor:register( + self(), {rabbit_amqqueue, set_ram_duration_target, [self()]}), + {ok, BQ} = application:get_env(backing_queue_module), + BQS = BQ:init(Q, false), + {ok, #state { q = Q, + gm = GM, + master_node = node(MPid), + backing_queue = BQ, + backing_queue_state = BQS, + rate_timer_ref = undefined, + sync_timer_ref = undefined, + + sender_queues = dict:new(), + msg_id_ack = dict:new(), + msg_id_status = dict:new() + }, hibernate, + {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. handle_call({deliver_immediately, Delivery = #delivery {}}, From, State) -> %% Synchronous, "immediate" delivery mode @@ -578,7 +570,7 @@ process_instruction( State1 = State #state { sender_queues = SQ1, msg_id_status = MS2 }, - %% we probably want to work in BQ:validate_message here + {ok, case Deliver of false -> @@ -649,10 +641,11 @@ process_instruction({requeue, MsgPropsFun, MsgIds}, State #state { msg_id_ack = MA1, backing_queue_state = BQS1 }; false -> - %% the only thing we can safely do is nuke out our BQ - %% and MA + %% The only thing we can safely do is nuke out our BQ + %% and MA. The interaction between this and confirms + %% doesn't really bear thinking about... {_Count, BQS1} = BQ:purge(BQS), - {MsgIds, BQS2} = ack_all(BQ, MA, BQS1), + {_MsgIds, BQS2} = ack_all(BQ, MA, BQS1), State #state { msg_id_ack = dict:new(), backing_queue_state = BQS2 } end}; -- cgit v1.2.1 From 566ff20ae152accafd4992ccae0b007333f366fd Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Fri, 11 Mar 2011 16:21:05 +0000 Subject: Make new mirrors magically occur when set to [] and a new node in the cluster appears. Fix various other bits and pieces --- src/rabbit_mirror_queue_master.erl | 36 ++++++++++++++++++----------------- src/rabbit_mirror_queue_misc.erl | 18 +++++++++++++++++- src/rabbit_mirror_queue_slave_sup.erl | 6 ++++++ 3 files changed, 42 insertions(+), 18 deletions(-) diff --git a/src/rabbit_mirror_queue_master.erl b/src/rabbit_mirror_queue_master.erl index c5a2e88a..25a1e4b8 100644 --- a/src/rabbit_mirror_queue_master.erl +++ b/src/rabbit_mirror_queue_master.erl @@ -53,7 +53,7 @@ stop() -> %% Same as start/1. exit({not_valid_for_generic_backing_queue, ?MODULE}). -init(#amqqueue { arguments = Args } = Q, Recover) -> +init(#amqqueue { arguments = Args, name = QName } = Q, Recover) -> {ok, CPid} = rabbit_mirror_queue_coordinator:start_link(Q, undefined), GM = rabbit_mirror_queue_coordinator:get_gm(CPid), {_Type, Nodes} = rabbit_misc:table_lookup(Args, <<"x-mirror">>), @@ -62,7 +62,7 @@ init(#amqqueue { arguments = Args } = Q, Recover) -> _ -> [list_to_atom(binary_to_list(Node)) || {longstr, Node} <- Nodes] end, - [rabbit_mirror_queue_misc:add_slave(Q, Node) || Node <- Nodes1], + [rabbit_mirror_queue_misc:add_slave(QName, Node) || Node <- Nodes1], {ok, BQ} = application:get_env(backing_queue_module), BQS = BQ:init(Q, Recover), #state { gm = GM, @@ -120,11 +120,11 @@ publish_delivered(AckRequired, Msg = #basic_message { id = MsgId }, MsgProps, %% Must use confirmed_broadcast here in order to guarantee that %% all slaves are forced to interpret this publish_delivered at %% the same point, especially if we die and a slave is promoted. - BQS1 = BQ:publish(Msg, MsgProps, ChPid, BQS), ok = gm:confirmed_broadcast( GM, {publish, {true, AckRequired}, ChPid, MsgProps, Msg}), - BQS1 = BQ:publish_delivered(AckRequired, Msg, MsgProps, ChPid, BQS), - State #state { backing_queue_state = BQS1 }. + {AckTag, BQS1} = + BQ:publish_delivered(AckRequired, Msg, MsgProps, ChPid, BQS), + {AckTag, State #state { backing_queue_state = BQS1 }}. dropwhile(Fun, State = #state { gm = GM, backing_queue = BQ, @@ -247,20 +247,22 @@ invoke(Mod, Fun, State = #state { backing_queue = BQ, case dict:find(MsgId, SSN) of error -> {[MsgId | MsgIdsN], SSN}; - {ok, published} -> - %% It was published when we were a slave, - %% and we were promoted before we saw the - %% publish from the channel. We still - %% haven't seen the channel publish, and - %% consequently we need to filter out the - %% confirm here. We will issue the confirm - %% when we see the publish from the - %% channel. - {MsgIdsN, dict:store(MsgId, confirmed, SSN)} + {ok, published} -> + %% It was published when we were a slave, + %% and we were promoted before we saw the + %% publish from the channel. We still + %% haven't seen the channel publish, and + %% consequently we need to filter out the + %% confirm here. We will issue the confirm + %% when we see the publish from the channel. + {MsgIdsN, dict:store(MsgId, confirmed, SSN)}; + {ok, confirmed} -> + %% Well, confirms are racy by definition. + {[MsgId | MsgIdsN], SSN} end - end, {[], SS}, MsgIds), + end, {[], SS}, MsgIds), {MsgIds1, State #state { backing_queue_state = BQS1, - seen_status = SS1 }}. + seen_status = SS1 }}. validate_message(Message = #basic_message { id = MsgId }, State = #state { seen_status = SS, diff --git a/src/rabbit_mirror_queue_misc.erl b/src/rabbit_mirror_queue_misc.erl index 23d7c398..51c2a28a 100644 --- a/src/rabbit_mirror_queue_misc.erl +++ b/src/rabbit_mirror_queue_misc.erl @@ -16,7 +16,7 @@ -module(rabbit_mirror_queue_misc). --export([remove_from_queue/2, add_slave/2, add_slave/3]). +-export([remove_from_queue/2, add_slave/2, add_slave/3, on_node_up/0]). -include("rabbit.hrl"). @@ -74,3 +74,19 @@ add_slave(Queue, MirrorNode) -> end end end). + +on_node_up() -> + Qs = + rabbit_misc:execute_mnesia_transaction( + fun () -> + mnesia:foldl( + fun (#amqqueue{ arguments = Args, name = QName }, QsN) -> + case rabbit_misc:table_lookup( + Args, <<"x-mirror">>) of + {_Type, []} -> [QName | QsN]; + _ -> QsN + end + end, [], rabbit_queue) + end), + [add_slave(Q, node()) || Q <- Qs], + ok. diff --git a/src/rabbit_mirror_queue_slave_sup.erl b/src/rabbit_mirror_queue_slave_sup.erl index 80c0520c..2fb3be51 100644 --- a/src/rabbit_mirror_queue_slave_sup.erl +++ b/src/rabbit_mirror_queue_slave_sup.erl @@ -22,6 +22,12 @@ {requires, queue_sup_queue_recovery}, {enables, routing_ready}]}). +-rabbit_boot_step({mirrored_queues, + [{description, "adding mirrors to queues"}, + {mfa, {rabbit_mirror_queue_misc, on_node_up, []}}, + {requires, mirror_queue_slave_sup}, + {enables, routing_ready}]}). + -behaviour(supervisor2). -export([start/0, start_link/0, start_child/2]). -- cgit v1.2.1 From 279e858cac439e493fe3990a4ef0ef689a0ff29b Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Fri, 11 Mar 2011 17:19:24 +0000 Subject: Make slaves rejoin on boot --- src/rabbit_mirror_queue_misc.erl | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/rabbit_mirror_queue_misc.erl b/src/rabbit_mirror_queue_misc.erl index 51c2a28a..bf341c74 100644 --- a/src/rabbit_mirror_queue_misc.erl +++ b/src/rabbit_mirror_queue_misc.erl @@ -83,8 +83,17 @@ on_node_up() -> fun (#amqqueue{ arguments = Args, name = QName }, QsN) -> case rabbit_misc:table_lookup( Args, <<"x-mirror">>) of - {_Type, []} -> [QName | QsN]; - _ -> QsN + {_Type, []} -> + [QName | QsN]; + {_Type, Nodes} -> + Nodes1 = [list_to_atom(binary_to_list(Node)) + || {longstr, Node} <- Nodes], + case lists:member(node(), Nodes1) of + true -> [QName | QsN]; + false -> QsN + end; + _ -> + QsN end end, [], rabbit_queue) end), -- cgit v1.2.1 From 96ed87d84faf2fa623c0d34cdfa4ec4bb32d9ea4 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Wed, 16 Mar 2011 12:44:13 +0000 Subject: Made gm do batching of messages. This has an astonishing performance impact: if every broadcast msg to the gm results in network activity then performance is low - presumably serialisation of, and network broadcast of small messages is very inefficient. By batching broadcasts and then sending many on a timer, performance is much much higher. --- src/gm.erl | 134 +++++++++++++++++++++++++++++++++++---------------- src/gm_soak_test.erl | 8 +-- 2 files changed, 96 insertions(+), 46 deletions(-) diff --git a/src/gm.erl b/src/gm.erl index 8cf22581..5b3623cf 100644 --- a/src/gm.erl +++ b/src/gm.erl @@ -376,15 +376,16 @@ confirmed_broadcast/2, group_members/1]). -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, - code_change/3, prioritise_info/2]). + code_change/3, prioritise_cast/2, prioritise_info/2]). -export([behaviour_info/1]). --export([table_definitions/0]). +-export([table_definitions/0, flush/1]). -define(GROUP_TABLE, gm_group). -define(HIBERNATE_AFTER_MIN, 1000). -define(DESIRED_HIBERNATE, 10000). +-define(BROADCAST_TIMER, 25). -define(SETS, ordsets). -define(DICT, orddict). @@ -398,7 +399,9 @@ pub_count, members_state, callback_args, - confirms + confirms, + broadcast_buffer, + broadcast_timer }). -record(gm_group, { name, version, members }). @@ -508,21 +511,26 @@ confirmed_broadcast(Server, Msg) -> group_members(Server) -> gen_server2:call(Server, group_members, infinity). +flush(Server) -> + gen_server2:cast(Server, flush). + init([GroupName, Module, Args]) -> random:seed(now()), gen_server2:cast(self(), join), Self = self(), - {ok, #state { self = Self, - left = {Self, undefined}, - right = {Self, undefined}, - group_name = GroupName, - module = Module, - view = undefined, - pub_count = 0, - members_state = undefined, - callback_args = Args, - confirms = queue:new() }, hibernate, + {ok, #state { self = Self, + left = {Self, undefined}, + right = {Self, undefined}, + group_name = GroupName, + module = Module, + view = undefined, + pub_count = 0, + members_state = undefined, + callback_args = Args, + confirms = queue:new(), + broadcast_buffer = [], + broadcast_timer = undefined }, hibernate, {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. @@ -620,7 +628,11 @@ handle_cast(join, State = #state { self = Self, {Module:joined(Args, all_known_members(View)), State1}); handle_cast(leave, State) -> - {stop, normal, State}. + {stop, normal, State}; + +handle_cast(flush, State) -> + noreply( + flush_broadcast_buffer(State #state { broadcast_timer = undefined })). handle_info({'DOWN', MRef, process, _Pid, _Reason}, @@ -662,14 +674,17 @@ handle_info({'DOWN', MRef, process, _Pid, _Reason}, end. -terminate(Reason, #state { module = Module, - callback_args = Args }) -> +terminate(Reason, State = #state { module = Module, + callback_args = Args }) -> + flush_broadcast_buffer(State), Module:terminate(Args, Reason). code_change(_OldVsn, State, _Extra) -> {ok, State}. +prioritise_cast(flush, _State) -> 1; +prioritise_cast(_ , _State) -> 0. prioritise_info({'DOWN', _MRef, process, _Pid, _Reason}, _State) -> 1; prioritise_info(_ , _State) -> 0. @@ -782,33 +797,62 @@ handle_msg({activity, _NotLeft, _Activity}, State) -> noreply(State) -> - {noreply, State, hibernate}. + {noreply, ensure_broadcast_timer(State), hibernate}. reply(Reply, State) -> - {reply, Reply, State, hibernate}. - -internal_broadcast(Msg, From, State = #state { self = Self, - pub_count = PubCount, - members_state = MembersState, - module = Module, - confirms = Confirms, - callback_args = Args }) -> - PubMsg = {PubCount, Msg}, - Activity = activity_cons(Self, [PubMsg], [], activity_nil()), - ok = maybe_send_activity(activity_finalise(Activity), State), - MembersState1 = - with_member( - fun (Member = #member { pending_ack = PA }) -> - Member #member { pending_ack = queue:in(PubMsg, PA) } - end, Self, MembersState), + {reply, Reply, ensure_broadcast_timer(State), hibernate}. + +ensure_broadcast_timer(State = #state { broadcast_buffer = [], + broadcast_timer = undefined }) -> + State; +ensure_broadcast_timer(State = #state { broadcast_buffer = [], + broadcast_timer = TRef }) -> + timer:cancel(TRef), + State #state { broadcast_timer = undefined }; +ensure_broadcast_timer(State = #state { broadcast_timer = undefined }) -> + {ok, TRef} = timer:apply_after(?BROADCAST_TIMER, ?MODULE, flush, [self()]), + State #state { broadcast_timer = TRef }; +ensure_broadcast_timer(State) -> + State. + +internal_broadcast(Msg, From, State = #state { self = Self, + pub_count = PubCount, + module = Module, + confirms = Confirms, + callback_args = Args, + broadcast_buffer = Buffer }) -> + Result = Module:handle_msg(Args, Self, Msg), + Buffer1 = [{PubCount, Msg} | Buffer], Confirms1 = case From of none -> Confirms; _ -> queue:in({PubCount, From}, Confirms) end, - handle_callback_result({Module:handle_msg(Args, Self, Msg), - State #state { pub_count = PubCount + 1, - members_state = MembersState1, - confirms = Confirms1 }}). + State1 = State #state { pub_count = PubCount + 1, + confirms = Confirms1, + broadcast_buffer = Buffer1 }, + case From =/= none of + true -> + handle_callback_result({Result, flush_broadcast_buffer(State1)}); + false -> + handle_callback_result( + {Result, State1 #state { broadcast_buffer = Buffer1 }}) + end. + +flush_broadcast_buffer(State = #state { broadcast_buffer = [] }) -> + State; +flush_broadcast_buffer(State = #state { self = Self, + members_state = MembersState, + broadcast_buffer = Buffer }) -> + Pubs = lists:reverse(Buffer), + Activity = activity_cons(Self, Pubs, [], activity_nil()), + ok = maybe_send_activity(activity_finalise(Activity), State), + MembersState1 = with_member( + fun (Member = #member { pending_ack = PA }) -> + PA1 = queue:join(PA, queue:from_list(Pubs)), + Member #member { pending_ack = PA1 } + end, Self, MembersState), + State #state { members_state = MembersState1, + broadcast_buffer = [] }. %% --------------------------------------------------------------------------- @@ -1093,16 +1137,22 @@ maybe_monitor(Self, Self) -> maybe_monitor(Other, _Self) -> erlang:monitor(process, Other). -check_neighbours(State = #state { self = Self, - left = Left, - right = Right, - view = View }) -> +check_neighbours(State = #state { self = Self, + left = Left, + right = Right, + view = View, + broadcast_buffer = Buffer }) -> #view_member { left = VLeft, right = VRight } = fetch_view_member(Self, View), Ver = view_version(View), Left1 = ensure_neighbour(Ver, Self, Left, VLeft), Right1 = ensure_neighbour(Ver, Self, Right, VRight), - State1 = State #state { left = Left1, right = Right1 }, + Buffer1 = case Right1 of + {Self, undefined} -> []; + _ -> Buffer + end, + State1 = State #state { left = Left1, right = Right1, + broadcast_buffer = Buffer1 }, ok = maybe_send_catchup(Right, State1), State1. diff --git a/src/gm_soak_test.erl b/src/gm_soak_test.erl index 1f8832a6..4e30e1d5 100644 --- a/src/gm_soak_test.erl +++ b/src/gm_soak_test.erl @@ -80,12 +80,12 @@ handle_msg([], From, {test_msg, Num}) -> {ok, Num} -> ok; {ok, Num1} when Num < Num1 -> exit({{from, From}, - {duplicate_delivery_of, Num1}, - {expecting, Num}}); + {duplicate_delivery_of, Num}, + {expecting, Num1}}); {ok, Num1} -> exit({{from, From}, - {missing_delivery_of, Num}, - {received_early, Num1}}); + {received_early, Num}, + {expecting, Num1}}); error -> exit({{from, From}, {received_premature_delivery, Num}}) -- cgit v1.2.1 From 306ae34a68b5a15c789f68d75965ca9e3e8943ad Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Wed, 16 Mar 2011 13:08:22 +0000 Subject: A different and largely opposite version of 'never'... --- src/rabbit_mirror_queue_slave.erl | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index d20b00d4..fd501624 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -313,7 +313,6 @@ confirm_messages(MsgIds, State = #state { msg_id_status = MS }) -> {MS1, CMs} = lists:foldl( fun (MsgId, {MSN, CMsN} = Acc) -> - %% We will never see {confirmed, ChPid} here. case dict:find(MsgId, MSN) of error -> %% If it needed confirming, it'll have @@ -327,7 +326,14 @@ confirm_messages(MsgIds, State = #state { msg_id_status = MS }) -> %% Seen from both GM and Channel. Can now %% confirm. {dict:erase(MsgId, MSN), - gb_trees_cons(ChPid, MsgSeqNo, CMsN)} + gb_trees_cons(ChPid, MsgSeqNo, CMsN)}; + {ok, {confirmed, ChPid}} -> + %% It's already been confirmed. This is + %% probably it's been both sync'd to disk + %% and then delivered and ack'd before we've + %% seen the publish from the + %% channel. Nothing to do here. + Acc end end, {MS, gb_trees:empty()}, MsgIds), gb_trees:map(fun (ChPid, MsgSeqNos) -> -- cgit v1.2.1 From 837e4a8e7328a586ad83707041d36652ac548417 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Wed, 16 Mar 2011 13:16:30 +0000 Subject: whoops --- src/rabbit_mirror_queue_slave.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index fd501624..4a9dc1fe 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -327,7 +327,7 @@ confirm_messages(MsgIds, State = #state { msg_id_status = MS }) -> %% confirm. {dict:erase(MsgId, MSN), gb_trees_cons(ChPid, MsgSeqNo, CMsN)}; - {ok, {confirmed, ChPid}} -> + {ok, {confirmed, _ChPid}} -> %% It's already been confirmed. This is %% probably it's been both sync'd to disk %% and then delivered and ack'd before we've -- cgit v1.2.1 From 7de37afd01a5aa913a7bced14d1e0e00432793e3 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Thu, 7 Apr 2011 12:18:32 +0100 Subject: Validate_msg becomes is_duplicate, which makes much more sense --- include/rabbit_backing_queue_spec.hrl | 4 ++-- src/rabbit_amqqueue_process.erl | 21 +++++++++++++-------- src/rabbit_backing_queue.erl | 6 ++++-- src/rabbit_mirror_queue_master.erl | 24 ++++++++++++------------ src/rabbit_variable_queue.erl | 4 ++-- 5 files changed, 33 insertions(+), 26 deletions(-) diff --git a/include/rabbit_backing_queue_spec.hrl b/include/rabbit_backing_queue_spec.hrl index b85e4ad6..f5e441dc 100644 --- a/include/rabbit_backing_queue_spec.hrl +++ b/include/rabbit_backing_queue_spec.hrl @@ -71,5 +71,5 @@ -spec(handle_pre_hibernate/1 :: (state()) -> state()). -spec(status/1 :: (state()) -> [{atom(), any()}]). -spec(invoke/3 :: (atom(), fun ((atom(), A) -> A), state()) -> state()). --spec(validate_message/2 :: (rabbit_types:basic_message(), state()) -> - {'invalid' | 'valid', state()}). +-spec(is_duplicate/2 :: (rabbit_types:basic_message(), state()) -> + {boolean(), state()}). diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl index 9e54312f..575d69f4 100644 --- a/src/rabbit_amqqueue_process.erl +++ b/src/rabbit_amqqueue_process.erl @@ -523,12 +523,17 @@ attempt_delivery(Delivery = #delivery{txn = none, immediately -> rabbit_channel:confirm(ChPid, [MsgSeqNo]); _ -> ok end, - case BQ:validate_message(Message, BQS) of - {invalid, BQS1} -> - %% if the message is invalid, we pretend it was delivered - %% fine + case BQ:is_duplicate(Message, BQS) of + {true, BQS1} -> + %% if the message has previously been seen by the BQ then + %% it must have been seen under the same circumstances as + %% now: i.e. if it is now a deliver_immediately then it + %% must have been before. Consequently, if the BQ has seen + %% it before then it's safe to assume it's been delivered + %% (i.e. the only thing that cares about that is + %% deliver_immediately). {true, Confirm, State#q{backing_queue_state = BQS1}}; - {valid, BQS1} -> + {false, BQS1} -> PredFun = fun (IsEmpty, _State) -> not IsEmpty end, DeliverFun = fun (AckRequired, false, @@ -555,10 +560,10 @@ attempt_delivery(Delivery = #delivery{txn = Txn, message = Message}, State = #q{backing_queue = BQ, backing_queue_state = BQS}) -> Confirm = should_confirm_message(Delivery, State), - case BQ:validate_message(Message, BQS) of - {invalid, BQS1} -> + case BQ:is_duplicate(Message, BQS) of + {true, BQS1} -> {true, Confirm, State#q{backing_queue_state = BQS1}}; - {valid, BQS1} -> + {false, BQS1} -> store_ch_record((ch_record(ChPid))#cr{txn = Txn}), BQS2 = BQ:tx_publish(Txn, Message, ?BASE_MESSAGE_PROPERTIES, ChPid, BQS1), diff --git a/src/rabbit_backing_queue.erl b/src/rabbit_backing_queue.erl index 7087be91..dfa5500e 100644 --- a/src/rabbit_backing_queue.erl +++ b/src/rabbit_backing_queue.erl @@ -171,8 +171,10 @@ behaviour_info(callbacks) -> %% components need to pass functions into the backing queue. {invoke, 3}, - %% TODO: document me - {validate_message, 2} + %% Called prior to a publish or publish_delivered call. Allows + %% the BQ to signal that it's already seen this message and thus + %% the message should be dropped. + {is_duplicate, 2} ]; behaviour_info(_Other) -> undefined. diff --git a/src/rabbit_mirror_queue_master.erl b/src/rabbit_mirror_queue_master.erl index 0ca73f03..42af4e51 100644 --- a/src/rabbit_mirror_queue_master.erl +++ b/src/rabbit_mirror_queue_master.erl @@ -22,7 +22,7 @@ requeue/3, len/1, is_empty/1, drain_confirmed/1, dropwhile/2, set_ram_duration_target/2, ram_duration/1, needs_idle_timeout/1, idle_timeout/1, handle_pre_hibernate/1, - status/1, invoke/3, validate_message/2]). + status/1, invoke/3, is_duplicate/2]). -export([start/1, stop/0]). @@ -274,11 +274,11 @@ invoke(Mod, Fun, State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> State #state { backing_queue_state = BQ:invoke(Mod, Fun, BQS) }. -validate_message(Message = #basic_message { id = MsgId }, - State = #state { seen_status = SS, - backing_queue = BQ, - backing_queue_state = BQS, - confirmed = Confirmed }) -> +is_duplicate(Message = #basic_message { id = MsgId }, + State = #state { seen_status = SS, + backing_queue = BQ, + backing_queue_state = BQS, + confirmed = Confirmed }) -> %% Here, we need to deal with the possibility that we're about to %% receive a message that we've already seen when we were a slave %% (we received it via gm). Thus if we do receive such message now @@ -297,10 +297,10 @@ validate_message(Message = #basic_message { id = MsgId }, %% confirmation is waiting. amqqueue_process will have, in %% its msg_id_to_channel mapping, the entry for dealing %% with the confirm when that comes back in (it's added - %% immediately prior to calling validate_message). The msg - %% is invalid. We will not see this again, nor will we be + %% immediately after calling is_duplicate). The msg is + %% invalid. We will not see this again, nor will we be %% further involved in confirming this message, so erase. - {invalid, State #state { seen_status = dict:erase(MsgId, SS) }}; + {true, State #state { seen_status = dict:erase(MsgId, SS) }}; {ok, confirmed} -> %% It got published when we were a slave via gm, and %% confirmed some time after that (maybe even after @@ -309,7 +309,7 @@ validate_message(Message = #basic_message { id = MsgId }, %% msg_seq_no was (and thus confirm as a slave). So we %% need to confirm now. As above, amqqueue_process will %% have the entry for the msg_id_to_channel mapping added - %% immediately prior to calling validate_message/2. - {invalid, State #state { seen_status = dict:erase(MsgId, SS), - confirmed = [MsgId | Confirmed] }} + %% immediately after calling is_duplicate/2. + {true, State #state { seen_status = dict:erase(MsgId, SS), + confirmed = [MsgId | Confirmed] }} end. diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl index 486d30fd..a8f9974a 100644 --- a/src/rabbit_variable_queue.erl +++ b/src/rabbit_variable_queue.erl @@ -22,7 +22,7 @@ requeue/3, len/1, is_empty/1, dropwhile/2, set_ram_duration_target/2, ram_duration/1, needs_idle_timeout/1, idle_timeout/1, handle_pre_hibernate/1, - status/1, invoke/3, validate_message/2, multiple_routing_keys/0]). + status/1, invoke/3, is_duplicate/2, multiple_routing_keys/0]). -export([start/1, stop/0]). @@ -886,7 +886,7 @@ status(#vqstate { invoke(?MODULE, Fun, State) -> Fun(?MODULE, State). -validate_message(_Msg, State) -> {valid, State}. +is_duplicate(_Msg, State) -> {false, State}. %%---------------------------------------------------------------------------- %% Minor helpers -- cgit v1.2.1 From 176769e7112ab3b311ab0cec60d944ca267709f3 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Thu, 7 Apr 2011 13:29:28 +0100 Subject: Add BQ:discard, correct BQ:is_duplicate, finally fix the last bits of immediate delivery, though hopefully in a way which has not leaked through to the lower layers... --- include/rabbit_backing_queue_spec.hrl | 3 +- src/rabbit_amqqueue_process.erl | 39 ++++++++++++-------- src/rabbit_backing_queue.erl | 13 +++++-- src/rabbit_mirror_queue_master.erl | 18 +++++++--- src/rabbit_mirror_queue_slave.erl | 68 +++++++++++++++++++++++++++++------ src/rabbit_variable_queue.erl | 5 ++- 6 files changed, 112 insertions(+), 34 deletions(-) diff --git a/include/rabbit_backing_queue_spec.hrl b/include/rabbit_backing_queue_spec.hrl index f5e441dc..b0c5f13b 100644 --- a/include/rabbit_backing_queue_spec.hrl +++ b/include/rabbit_backing_queue_spec.hrl @@ -72,4 +72,5 @@ -spec(status/1 :: (state()) -> [{atom(), any()}]). -spec(invoke/3 :: (atom(), fun ((atom(), A) -> A), state()) -> state()). -spec(is_duplicate/2 :: (rabbit_types:basic_message(), state()) -> - {boolean(), state()}). + {'false'|'published'|'discarded', state()}). +-spec(discard/3 :: (rabbit_types:basic_message(), pid(), state()) -> state()). diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl index 575d69f4..79f6472d 100644 --- a/src/rabbit_amqqueue_process.erl +++ b/src/rabbit_amqqueue_process.erl @@ -524,15 +524,6 @@ attempt_delivery(Delivery = #delivery{txn = none, _ -> ok end, case BQ:is_duplicate(Message, BQS) of - {true, BQS1} -> - %% if the message has previously been seen by the BQ then - %% it must have been seen under the same circumstances as - %% now: i.e. if it is now a deliver_immediately then it - %% must have been before. Consequently, if the BQ has seen - %% it before then it's safe to assume it's been delivered - %% (i.e. the only thing that cares about that is - %% deliver_immediately). - {true, Confirm, State#q{backing_queue_state = BQS1}}; {false, BQS1} -> PredFun = fun (IsEmpty, _State) -> not IsEmpty end, DeliverFun = @@ -553,7 +544,17 @@ attempt_delivery(Delivery = #delivery{txn = none, {Delivered, State2} = deliver_msgs_to_consumers({ PredFun, DeliverFun }, false, State#q{backing_queue_state = BQS1}), - {Delivered, Confirm, State2} + {Delivered, Confirm, State2}; + {Duplicate, BQS1} -> + %% if the message has previously been seen by the BQ then + %% it must have been seen under the same circumstances as + %% now: i.e. if it is now a deliver_immediately then it + %% must have been before. + Delivered = case Duplicate of + published -> true; + discarded -> false + end, + {Delivered, Confirm, State#q{backing_queue_state = BQS1}} end; attempt_delivery(Delivery = #delivery{txn = Txn, sender = ChPid, @@ -561,13 +562,17 @@ attempt_delivery(Delivery = #delivery{txn = Txn, State = #q{backing_queue = BQ, backing_queue_state = BQS}) -> Confirm = should_confirm_message(Delivery, State), case BQ:is_duplicate(Message, BQS) of - {true, BQS1} -> - {true, Confirm, State#q{backing_queue_state = BQS1}}; {false, BQS1} -> store_ch_record((ch_record(ChPid))#cr{txn = Txn}), BQS2 = BQ:tx_publish(Txn, Message, ?BASE_MESSAGE_PROPERTIES, ChPid, BQS1), - {true, Confirm, State#q{backing_queue_state = BQS2}} + {true, Confirm, State#q{backing_queue_state = BQS2}}; + {Duplicate, BQS1} -> + Delivered = case Duplicate of + published -> true; + discarded -> false + end, + {Delivered, Confirm, State#q{backing_queue_state = BQS1}} end. deliver_or_enqueue(Delivery = #delivery{message = Message}, State) -> @@ -721,6 +726,12 @@ rollback_transaction(Txn, C, State = #q{backing_queue = BQ, subtract_acks(A, B) when is_list(B) -> lists:foldl(fun sets:del_element/2, A, B). +discard_delivery(#delivery{sender = ChPid, + message = Message}, + State = #q{backing_queue = BQ, + backing_queue_state = BQS}) -> + State#q{backing_queue_state = BQ:discard(Message, ChPid, BQS)}. + reset_msg_expiry_fun(TTL) -> fun(MsgProps) -> MsgProps#message_properties{expiry = calculate_msg_expiry(TTL)} @@ -910,7 +921,7 @@ handle_call({deliver_immediately, Delivery}, _From, State) -> {Delivered, Confirm, State1} = attempt_delivery(Delivery, State), reply(Delivered, case Delivered of true -> maybe_record_confirm_message(Confirm, State1); - false -> State1 + false -> discard_delivery(Delivery, State1) end); handle_call({deliver, Delivery}, From, State) -> diff --git a/src/rabbit_backing_queue.erl b/src/rabbit_backing_queue.erl index dfa5500e..0bbbd559 100644 --- a/src/rabbit_backing_queue.erl +++ b/src/rabbit_backing_queue.erl @@ -172,9 +172,16 @@ behaviour_info(callbacks) -> {invoke, 3}, %% Called prior to a publish or publish_delivered call. Allows - %% the BQ to signal that it's already seen this message and thus - %% the message should be dropped. - {is_duplicate, 2} + %% the BQ to signal that it's already seen this message (and in + %% what capacity - i.e. was it published previously or discarded + %% previously) and thus the message should be dropped. + {is_duplicate, 2}, + + %% Called to inform the BQ about messages which have reached the + %% queue, but are not going to be further passed to BQ for some + %% reason. Note that this is not invoked for messages for which + %% BQ:is_duplicate/2 has already returned {true, BQS}. + {discard, 3} ]; behaviour_info(_Other) -> undefined. diff --git a/src/rabbit_mirror_queue_master.erl b/src/rabbit_mirror_queue_master.erl index 42af4e51..b0a22edd 100644 --- a/src/rabbit_mirror_queue_master.erl +++ b/src/rabbit_mirror_queue_master.erl @@ -22,7 +22,7 @@ requeue/3, len/1, is_empty/1, drain_confirmed/1, dropwhile/2, set_ram_duration_target/2, ram_duration/1, needs_idle_timeout/1, idle_timeout/1, handle_pre_hibernate/1, - status/1, invoke/3, is_duplicate/2]). + status/1, invoke/3, is_duplicate/2, discard/3]). -export([start/1, stop/0]). @@ -150,6 +150,7 @@ drain_confirmed(State = #state { backing_queue = BQ, {MsgIds1, SS1} = lists:foldl( fun (MsgId, {MsgIdsN, SSN}) -> + %% We will never see 'discarded' here case dict:find(MsgId, SSN) of error -> {[MsgId | MsgIdsN], SSN}; @@ -300,7 +301,7 @@ is_duplicate(Message = #basic_message { id = MsgId }, %% immediately after calling is_duplicate). The msg is %% invalid. We will not see this again, nor will we be %% further involved in confirming this message, so erase. - {true, State #state { seen_status = dict:erase(MsgId, SS) }}; + {published, State #state { seen_status = dict:erase(MsgId, SS) }}; {ok, confirmed} -> %% It got published when we were a slave via gm, and %% confirmed some time after that (maybe even after @@ -310,6 +311,15 @@ is_duplicate(Message = #basic_message { id = MsgId }, %% need to confirm now. As above, amqqueue_process will %% have the entry for the msg_id_to_channel mapping added %% immediately after calling is_duplicate/2. - {true, State #state { seen_status = dict:erase(MsgId, SS), - confirmed = [MsgId | Confirmed] }} + {published, State #state { seen_status = dict:erase(MsgId, SS), + confirmed = [MsgId | Confirmed] }}; + {ok, discarded} -> + {discarded, State #state { seen_status = dict:erase(MsgId, SS) }} end. + +discard(Msg = #basic_message {}, ChPid, + State = #state { gm = GM, + backing_queue = BQ, + backing_queue_state = BQS }) -> + ok = gm:broadcast(GM, {discard, ChPid, Msg}), + State#state{backing_queue_state = BQ:discard(Msg, ChPid, BQS)}. diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index 4a9dc1fe..628135b1 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -313,6 +313,7 @@ confirm_messages(MsgIds, State = #state { msg_id_status = MS }) -> {MS1, CMs} = lists:foldl( fun (MsgId, {MSN, CMsN} = Acc) -> + %% We will never see 'discarded' here case dict:find(MsgId, MSN) of error -> %% If it needed confirming, it'll have @@ -395,21 +396,25 @@ promote_me(From, #state { q = Q, %% %% MS contains the following three entry types: %% - %% {published, ChPid}: + %% a) {published, ChPid}: %% published via gm only; pending arrival of publication from %% channel, maybe pending confirm. %% - %% {published, ChPid, MsgSeqNo}: + %% b) {published, ChPid, MsgSeqNo}: %% published via gm and channel; pending confirm. %% - %% {confirmed, ChPid}: + %% c) {confirmed, ChPid}: %% published via gm only, and confirmed; pending publication %% from channel. %% - %% The two outer forms only, need to go to the master state + %% d) discarded + %% seen via gm only as discarded. Pending publication from + %% channel + %% + %% The forms a, c and d only, need to go to the master state %% seen_status (SS). %% - %% The middle form only, needs to go through to the queue_process + %% The form b only, needs to go through to the queue_process %% state to form the msg_id_to_channel mapping (MTC). %% %% No messages that are enqueued from SQ at this point will have @@ -420,9 +425,12 @@ promote_me(From, #state { q = Q, %% this does not affect MS, nor which bits go through to SS in %% Master, or MTC in queue_process. - SS = dict:from_list([{MsgId, Status} - || {MsgId, {Status, _ChPid}} <- dict:to_list(MS), - Status =:= published orelse Status =:= confirmed]), + MSList = dict:to_list(MS), + SS = dict:from_list( + [E || E = {_MsgId, discarded} <- MSList] ++ + [{MsgId, Status} + || {MsgId, {Status, _ChPid}} <- MSList, + Status =:= published orelse Status =:= confirmed]), MasterState = rabbit_mirror_queue_master:promote_backing_queue_state( CPid, BQ, BQS, GM, SS), @@ -528,7 +536,11 @@ maybe_enqueue_message( immediately -> ok = rabbit_channel:confirm(ChPid, [MsgSeqNo]), State #state { msg_id_status = dict:erase(MsgId, MS) } - end + end; + {ok, discarded} -> + %% We've already heard from GM that the msg is to be + %% discarded. We won't see this again. + State #state { msg_id_status = dict:erase(MsgId, MS) } end. process_instruction( @@ -559,8 +571,7 @@ process_instruction( {{value, {Delivery = #delivery { msg_seq_no = MsgSeqNo, message = #basic_message { id = MsgId } }, - _EnqueueOnPromotion}}, - MQ1} -> + _EnqueueOnPromotion}}, MQ1} -> %% We received the msg from the channel %% first. Thus we need to deal with confirms %% here. @@ -604,6 +615,41 @@ process_instruction( State1 #state { backing_queue_state = BQS1, msg_id_ack = MA1 } end}; +process_instruction({discard, ChPid, Msg = #basic_message { id = MsgId }}, + State = #state { sender_queues = SQ, + backing_queue = BQ, + backing_queue_state = BQS, + msg_id_status = MS }) -> + %% Many of the comments around the publish head above apply here + %% too. + MS1 = dict:store(MsgId, discarded, MS), + {SQ1, MS2} = + case dict:find(ChPid, SQ) of + error -> + {SQ, MS1}; + {ok, MQ} -> + case queue:out(MQ) of + {empty, _MQ} -> + {SQ, MS1}; + {{value, {#delivery { + message = #basic_message { id = MsgId } }, + _EnqueueOnPromotion}}, MQ1} -> + %% We've already seen it from the channel, + %% we're not going to see this again, so don't + %% add it to MS + {dict:store(ChPid, MQ1, SQ), MS}; + {{value, {#delivery {}, _EnqueueOnPromotion}}, _MQ1} -> + %% The instruction was sent to us before we + %% were within the mirror_pids within the + %% #amqqueue{} record. We'll never receive the + %% message directly from the channel. + {SQ, MS} + end + end, + BQS1 = BQ:discard(Msg, ChPid, BQS), + {ok, State #state { sender_queues = SQ1, + msg_id_status = MS2, + backing_queue_state = BQS1 }}; process_instruction({set_length, Length}, State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl index a8f9974a..84987c88 100644 --- a/src/rabbit_variable_queue.erl +++ b/src/rabbit_variable_queue.erl @@ -22,7 +22,8 @@ requeue/3, len/1, is_empty/1, dropwhile/2, set_ram_duration_target/2, ram_duration/1, needs_idle_timeout/1, idle_timeout/1, handle_pre_hibernate/1, - status/1, invoke/3, is_duplicate/2, multiple_routing_keys/0]). + status/1, invoke/3, is_duplicate/2, discard/3, + multiple_routing_keys/0]). -export([start/1, stop/0]). @@ -888,6 +889,8 @@ invoke(?MODULE, Fun, State) -> is_duplicate(_Msg, State) -> {false, State}. +discard(_Msg, _ChPid, State) -> State. + %%---------------------------------------------------------------------------- %% Minor helpers %%---------------------------------------------------------------------------- -- cgit v1.2.1 From 9c42e0eece5965fb7d4375842bda712015d01f40 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Thu, 7 Apr 2011 15:55:19 +0100 Subject: Grrr. non-maskable-interrupt half way through implementing txns --- src/rabbit_mirror_queue_master.erl | 105 ++++++++++++++++++++++--------------- src/rabbit_mirror_queue_slave.erl | 20 +++++-- 2 files changed, 79 insertions(+), 46 deletions(-) diff --git a/src/rabbit_mirror_queue_master.erl b/src/rabbit_mirror_queue_master.erl index b0a22edd..a61c32e0 100644 --- a/src/rabbit_mirror_queue_master.erl +++ b/src/rabbit_mirror_queue_master.erl @@ -26,7 +26,7 @@ -export([start/1, stop/0]). --export([promote_backing_queue_state/5]). +-export([promote_backing_queue_state/6]). -behaviour(rabbit_backing_queue). @@ -38,7 +38,9 @@ backing_queue_state, set_delivered, seen_status, - confirmed + confirmed, + ack_msg_id, + abandoned_txns }). %% --------------------------------------------------------------------------- @@ -73,16 +75,19 @@ init(#amqqueue { arguments = Args, name = QName } = Q, Recover, backing_queue_state = BQS, set_delivered = 0, seen_status = dict:new(), - confirmed = [] }. + confirmed = [], + ack_msg_id = dict:new() }. -promote_backing_queue_state(CPid, BQ, BQS, GM, SeenStatus) -> +promote_backing_queue_state(CPid, BQ, BQS, GM, SeenStatus, AbandonedTxns) -> #state { gm = GM, coordinator = CPid, backing_queue = BQ, backing_queue_state = BQS, set_delivered = BQ:len(BQS), seen_status = SeenStatus, - confirmed = [] }. + confirmed = [], + ack_msg_id = dict:new(), + abandoned_txns = AbandonedTxns }. terminate(State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> %% Backing queue termination. The queue is going down but @@ -119,7 +124,8 @@ publish_delivered(AckRequired, Msg = #basic_message { id = MsgId }, MsgProps, ChPid, State = #state { gm = GM, seen_status = SS, backing_queue = BQ, - backing_queue_state = BQS }) -> + backing_queue_state = BQS, + ack_msg_id = AM }) -> false = dict:is_key(MsgId, SS), %% ASSERTION %% Must use confirmed_broadcast here in order to guarantee that %% all slaves are forced to interpret this publish_delivered at @@ -128,7 +134,9 @@ publish_delivered(AckRequired, Msg = #basic_message { id = MsgId }, MsgProps, GM, {publish, {true, AckRequired}, ChPid, MsgProps, Msg}), {AckTag, BQS1} = BQ:publish_delivered(AckRequired, Msg, MsgProps, ChPid, BQS), - {AckTag, State #state { backing_queue_state = BQS1 }}. + AM1 = maybe_store_acktag(AckTag, MsgId, AM), + {AckTag, State #state { backing_queue_state = BQS1, + ack_msg_id = AM1 }}. dropwhile(Fun, State = #state { gm = GM, backing_queue = BQ, @@ -175,7 +183,8 @@ drain_confirmed(State = #state { backing_queue = BQ, fetch(AckRequired, State = #state { gm = GM, backing_queue = BQ, backing_queue_state = BQS, - set_delivered = SetDelivered }) -> + set_delivered = SetDelivered, + ack_msg_id = AM }) -> {Result, BQS1} = BQ:fetch(AckRequired, BQS), State1 = State #state { backing_queue_state = BQS1 }, case Result of @@ -186,53 +195,60 @@ fetch(AckRequired, State = #state { gm = GM, ok = gm:broadcast(GM, {fetch, AckRequired, MsgId, Remaining}), IsDelivered1 = IsDelivered orelse SetDelivered > 0, SetDelivered1 = lists:max([0, SetDelivered - 1]), + AM1 = maybe_store_acktag(AckTag, MsgId, AM), {{Message, IsDelivered1, AckTag, Remaining}, - State1 #state { set_delivered = SetDelivered1 }} + State1 #state { set_delivered = SetDelivered1, + ack_msg_id = AM1 }} end. ack(AckTags, State = #state { gm = GM, backing_queue = BQ, - backing_queue_state = BQS }) -> + backing_queue_state = BQS, + ack_msg_id = AM }) -> {MsgIds, BQS1} = BQ:ack(AckTags, BQS), + AM1 = lists:foldl(fun dict:erase/2, AM, AckTags), case MsgIds of [] -> ok; _ -> ok = gm:broadcast(GM, {ack, MsgIds}) end, - {MsgIds, State #state { backing_queue_state = BQS1 }}. - -tx_publish(Txn, Msg, MsgProps, ChPid, #state {} = State) -> - %% gm:broadcast(GM, {tx_publish, Txn, MsgId, MsgProps, ChPid}) - State. + {MsgIds, State #state { backing_queue_state = BQS1, + ack_msg_id = AM1 }}. + +tx_publish(Txn, Msg, MsgProps, ChPid, + State = #state { gm = GM, + backing_queue = BQ, + backing_queue_state = BQS }) -> + ok = gm:broadcast(GM, {tx_publish, Txn, ChPid, MsgProps, Msg}), + BQS1 = BQ:tx_publish(Txn, Msg, MsgProps, ChPid, State), + State #state { backing_queue_state = BQS1 }. -tx_ack(Txn, AckTags, #state {} = State) -> - %% gm:broadcast(GM, {tx_ack, Txn, MsgIds}) +tx_ack(Txn, AckTags, State = #state { gm = GM, + backing_queue = BQ, + backing_queue_state = BQS, + ack_msg_id = AM }) -> + MsgIds = lists:foldl( + fun (AckTag, Acc) -> [dict:fetch(AckTag, AM) | Acc] end, + [], AckTags), + ok = gm:broadcast(GM, {tx_ack, Txn, MsgIds}) State. -tx_rollback(Txn, #state {} = State) -> - %% gm:broadcast(GM, {tx_rollback, Txn}) - {[], State}. - -tx_commit(Txn, PostCommitFun, MsgPropsFun, #state {} = State) -> - %% Maybe don't want to transmit the MsgPropsFun but what choice do - %% we have? OTOH, on the slaves, things won't be expiring on their - %% own (props are interpreted by amqqueue, not vq), so if the msg - %% props aren't quite the same, that doesn't matter. - %% - %% The PostCommitFun is actually worse - we need to prevent that - %% from being invoked until we have confirmation from all the - %% slaves that they've done everything up to there. - %% - %% In fact, transactions are going to need work seeing as it's at - %% this point that VQ mentions amqqueue, which will thus not work - %% on the slaves - we need to make sure that all the slaves do the - %% tx_commit_post_msg_store at the same point, and then when they - %% all confirm that (scatter/gather), we can finally invoke the - %% PostCommitFun. - %% - %% Another idea is that the slaves are actually driven with - %% pubacks and thus only the master needs to support txns - %% directly. - {[], State}. +tx_rollback(Txn, State = #state { gm = GM, + backing_queue = BQ, + backing_queue_state = BQS }) -> + ok = gm:confirmed_broadcast(GM, {tx_rollback, Txn}), + {AckTags, BQS1} = BQ:tx_rollback(Txn, BQS), + {AckTags, State #state { backing_queue_state = BQS1 }}. + +tx_commit(Txn, PostCommitFun, MsgPropsFun, + State = #state { gm = GM, + backing_queue = BQ, + backing_queue_state = BQS, + ack_msg_id = AM }) -> + ok = gm:confirmed_broadcast(GM, {tx_commit, Txn, MsgPropsFun}), + {AckTags, BQS1} = BQ:tx_commit(Txn, PostCommitFun, MsgPropsFun, BQS), + AM1 = lists:foldl(fun dict:erase/2, AM, AckTags), + {AckTags, State #state { backing_queue_state = BQS, + ack_msg_id = AM }}. requeue(AckTags, MsgPropsFun, State = #state { gm = GM, backing_queue = BQ, @@ -323,3 +339,8 @@ discard(Msg = #basic_message {}, ChPid, backing_queue_state = BQS }) -> ok = gm:broadcast(GM, {discard, ChPid, Msg}), State#state{backing_queue_state = BQ:discard(Msg, ChPid, BQS)}. + +maybe_store_acktag(undefined, _MsgId, AM) -> + AM; +maybe_store_acktag(AckTag, MsgId, AM) -> + dict:store(AckTag, MsgId, AM). diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index 628135b1..21a33341 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -55,7 +55,8 @@ sender_queues, %% :: Pid -> MsgQ msg_id_ack, %% :: MsgId -> AckTag - msg_id_status + msg_id_status, + open_transactions }). -define(SYNC_INTERVAL, 25). %% milliseconds @@ -105,7 +106,8 @@ init([#amqqueue { name = QueueName } = Q]) -> sender_queues = dict:new(), msg_id_ack = dict:new(), - msg_id_status = dict:new() + msg_id_status = dict:new(), + open_transactions = sets:new() }, hibernate, {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. @@ -358,7 +360,8 @@ promote_me(From, #state { q = Q, rate_timer_ref = RateTRef, sender_queues = SQ, msg_id_ack = MA, - msg_id_status = MS }) -> + msg_id_status = MS, + open_transactions = OT }) -> rabbit_log:info("Promoting slave ~p for ~s~n", [self(), rabbit_misc:rs(Q #amqqueue.name)]), {ok, CPid} = rabbit_mirror_queue_coordinator:start_link(Q, GM), @@ -366,6 +369,11 @@ promote_me(From, #state { q = Q, gen_server2:reply(From, {promote, CPid}), ok = gm:confirmed_broadcast(GM, heartbeat), + %% Start by rolling back all open transactions + + [ok = gm:confirmed_broadcast(GM, {tx_rollback, Txn}) + || Txn <- sets:to_list(OT)], + %% We find all the messages that we've received from channels but %% not from gm, and if they're due to be enqueued on promotion %% then we pass them to the @@ -380,7 +388,7 @@ promote_me(From, #state { q = Q, %% affect confirmations: if the message was previously pending a %% confirmation then it still will be, under the same msg_id. So %% as a master, we need to be prepared to filter out the - %% publication of said messages from the channel (validate_message + %% publication of said messages from the channel (is_duplicate %% (thus such requeued messages must remain in the msg_id_status %% (MS) which becomes seen_status (SS) in the master)). %% @@ -424,6 +432,10 @@ promote_me(From, #state { q = Q, %% those messages are then requeued. However, as discussed above, %% this does not affect MS, nor which bits go through to SS in %% Master, or MTC in queue_process. + %% + %% Everything that's in MA gets requeued. Consequently the new + %% master should start with a fresh AM as there are no messages + %% pending acks (txns will have been rolled back). MSList = dict:to_list(MS), SS = dict:from_list( -- cgit v1.2.1 From a80715423df8ae8904d1de86864e88c4a8e75c3d Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Thu, 7 Apr 2011 18:19:16 +0100 Subject: Well txns are still only half in. But I want to go home. --- include/rabbit_backing_queue_spec.hrl | 3 +- src/rabbit_amqqueue_process.erl | 4 +- src/rabbit_backing_queue.erl | 7 +-- src/rabbit_mirror_queue_master.erl | 92 +++++++++++++++++++++++++---------- src/rabbit_mirror_queue_slave.erl | 69 ++++++++++++++++++++++---- src/rabbit_variable_queue.erl | 4 +- 6 files changed, 135 insertions(+), 44 deletions(-) diff --git a/include/rabbit_backing_queue_spec.hrl b/include/rabbit_backing_queue_spec.hrl index b0c5f13b..d9296bf6 100644 --- a/include/rabbit_backing_queue_spec.hrl +++ b/include/rabbit_backing_queue_spec.hrl @@ -71,6 +71,7 @@ -spec(handle_pre_hibernate/1 :: (state()) -> state()). -spec(status/1 :: (state()) -> [{atom(), any()}]). -spec(invoke/3 :: (atom(), fun ((atom(), A) -> A), state()) -> state()). --spec(is_duplicate/2 :: (rabbit_types:basic_message(), state()) -> +-spec(is_duplicate/3 :: + (rabbit_types:txn(), rabbit_types:basic_message(), state()) -> {'false'|'published'|'discarded', state()}). -spec(discard/3 :: (rabbit_types:basic_message(), pid(), state()) -> state()). diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl index 79f6472d..d9be4909 100644 --- a/src/rabbit_amqqueue_process.erl +++ b/src/rabbit_amqqueue_process.erl @@ -523,7 +523,7 @@ attempt_delivery(Delivery = #delivery{txn = none, immediately -> rabbit_channel:confirm(ChPid, [MsgSeqNo]); _ -> ok end, - case BQ:is_duplicate(Message, BQS) of + case BQ:is_duplicate(none, Message, BQS) of {false, BQS1} -> PredFun = fun (IsEmpty, _State) -> not IsEmpty end, DeliverFun = @@ -561,7 +561,7 @@ attempt_delivery(Delivery = #delivery{txn = Txn, message = Message}, State = #q{backing_queue = BQ, backing_queue_state = BQS}) -> Confirm = should_confirm_message(Delivery, State), - case BQ:is_duplicate(Message, BQS) of + case BQ:is_duplicate(Txn, Message, BQS) of {false, BQS1} -> store_ch_record((ch_record(ChPid))#cr{txn = Txn}), BQS2 = BQ:tx_publish(Txn, Message, ?BASE_MESSAGE_PROPERTIES, ChPid, diff --git a/src/rabbit_backing_queue.erl b/src/rabbit_backing_queue.erl index 0bbbd559..0955a080 100644 --- a/src/rabbit_backing_queue.erl +++ b/src/rabbit_backing_queue.erl @@ -175,12 +175,13 @@ behaviour_info(callbacks) -> %% the BQ to signal that it's already seen this message (and in %% what capacity - i.e. was it published previously or discarded %% previously) and thus the message should be dropped. - {is_duplicate, 2}, + {is_duplicate, 3}, %% Called to inform the BQ about messages which have reached the %% queue, but are not going to be further passed to BQ for some - %% reason. Note that this is not invoked for messages for which - %% BQ:is_duplicate/2 has already returned {true, BQS}. + %% reason. Note that this is may be invoked for messages for + %% which BQ:is_duplicate/2 has already returned {'published' | + %% 'discarded', BQS}. {discard, 3} ]; behaviour_info(_Other) -> diff --git a/src/rabbit_mirror_queue_master.erl b/src/rabbit_mirror_queue_master.erl index a61c32e0..8714c44d 100644 --- a/src/rabbit_mirror_queue_master.erl +++ b/src/rabbit_mirror_queue_master.erl @@ -22,7 +22,7 @@ requeue/3, len/1, is_empty/1, drain_confirmed/1, dropwhile/2, set_ram_duration_target/2, ram_duration/1, needs_idle_timeout/1, idle_timeout/1, handle_pre_hibernate/1, - status/1, invoke/3, is_duplicate/2, discard/3]). + status/1, invoke/3, is_duplicate/3, discard/3]). -export([start/1, stop/0]). @@ -217,38 +217,59 @@ ack(AckTags, State = #state { gm = GM, tx_publish(Txn, Msg, MsgProps, ChPid, State = #state { gm = GM, backing_queue = BQ, - backing_queue_state = BQS }) -> - ok = gm:broadcast(GM, {tx_publish, Txn, ChPid, MsgProps, Msg}), - BQS1 = BQ:tx_publish(Txn, Msg, MsgProps, ChPid, State), - State #state { backing_queue_state = BQS1 }. + backing_queue_state = BQS, + abandoned_txns = AbandonedTxns }) -> + case sets:is_element(Txn, AbandonedTxns) of + true -> State; + false -> ok = gm:broadcast(GM, {tx_publish, Txn, ChPid, MsgProps, Msg}), + BQS1 = BQ:tx_publish(Txn, Msg, MsgProps, ChPid, State), + State #state { backing_queue_state = BQS1 } + end. tx_ack(Txn, AckTags, State = #state { gm = GM, backing_queue = BQ, backing_queue_state = BQS, - ack_msg_id = AM }) -> - MsgIds = lists:foldl( - fun (AckTag, Acc) -> [dict:fetch(AckTag, AM) | Acc] end, - [], AckTags), - ok = gm:broadcast(GM, {tx_ack, Txn, MsgIds}) - State. + ack_msg_id = AM, + abandoned_txns = AbandonedTxns }) -> + case sets:is_element(Txn, AbandonedTxns) of + true -> + State; + false -> + MsgIds = lists:foldl( + fun (AckTag, Acc) -> [dict:fetch(AckTag, AM) | Acc] end, + [], AckTags), + ok = gm:broadcast(GM, {tx_ack, Txn, MsgIds}), + State + end. tx_rollback(Txn, State = #state { gm = GM, backing_queue = BQ, - backing_queue_state = BQS }) -> - ok = gm:confirmed_broadcast(GM, {tx_rollback, Txn}), - {AckTags, BQS1} = BQ:tx_rollback(Txn, BQS), - {AckTags, State #state { backing_queue_state = BQS1 }}. + backing_queue_state = BQS, + abandoned_txns = AbandonedTxns }) -> + case sets:is_element(Txn, AbandonedTxns) of + true -> {[], State}; + false -> ok = gm:confirmed_broadcast(GM, {tx_rollback, Txn}), + {AckTags, BQS1} = BQ:tx_rollback(Txn, BQS), + {AckTags, State #state { backing_queue_state = BQS1 }} + end. tx_commit(Txn, PostCommitFun, MsgPropsFun, State = #state { gm = GM, backing_queue = BQ, backing_queue_state = BQS, ack_msg_id = AM }) -> - ok = gm:confirmed_broadcast(GM, {tx_commit, Txn, MsgPropsFun}), - {AckTags, BQS1} = BQ:tx_commit(Txn, PostCommitFun, MsgPropsFun, BQS), - AM1 = lists:foldl(fun dict:erase/2, AM, AckTags), - {AckTags, State #state { backing_queue_state = BQS, - ack_msg_id = AM }}. + case sets:is_element(Txn, AbandonedTxns) of + true -> + %% Don't worry - the channel will explode as it'll still + %% try to commit on the old master. + {[], State}; + false -> + ok = gm:confirmed_broadcast(GM, {tx_commit, Txn, MsgPropsFun}), + {AckTags, BQS1} = BQ:tx_commit(Txn, PostCommitFun, MsgPropsFun, BQS), + AM1 = lists:foldl(fun dict:erase/2, AM, AckTags), + {AckTags, State #state { backing_queue_state = BQS, + ack_msg_id = AM }} + end. requeue(AckTags, MsgPropsFun, State = #state { gm = GM, backing_queue = BQ, @@ -291,7 +312,7 @@ invoke(Mod, Fun, State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> State #state { backing_queue_state = BQ:invoke(Mod, Fun, BQS) }. -is_duplicate(Message = #basic_message { id = MsgId }, +is_duplicate(none, Message = #basic_message { id = MsgId }, State = #state { seen_status = SS, backing_queue = BQ, backing_queue_state = BQS, @@ -330,15 +351,34 @@ is_duplicate(Message = #basic_message { id = MsgId }, {published, State #state { seen_status = dict:erase(MsgId, SS), confirmed = [MsgId | Confirmed] }}; {ok, discarded} -> - {discarded, State #state { seen_status = dict:erase(MsgId, SS) }} + %% Don't erase from SS here because discard/2 is about to + %% be called and we need to be able to detect this case + {discarded, State} + end; +is_duplicate(Txn, _Msg, State = #state { abandoned_txns = AbandonedTxns }) -> + %% There will be nothing in seen_status for any transactions that + %% are still in flight. + case sets:is_element(Txn, AbandonedTxns) of + true -> {published, State}; + false -> {false, State} end. -discard(Msg = #basic_message {}, ChPid, +discard(Msg = #basic_message { id = MsgId }, ChPid, State = #state { gm = GM, backing_queue = BQ, - backing_queue_state = BQS }) -> - ok = gm:broadcast(GM, {discard, ChPid, Msg}), - State#state{backing_queue_state = BQ:discard(Msg, ChPid, BQS)}. + backing_queue_state = BQS, + seen_status = SS }) -> + %% It's a massive error if we get told to discard something that's + %% already been published or published-and-confirmed. To do that + %% would require non FIFO access... + case dict:find(MsgId, SS) of + error -> + ok = gm:broadcast(GM, {discard, ChPid, Msg}), + State #state { backing_queue_state = BQ:discard(Msg, ChPid, BQS), + seen_status = dict:erase(MsgId, SS) }; + discarded -> + State + end. maybe_store_acktag(undefined, _MsgId, AM) -> AM; diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index 21a33341..34ec5109 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -107,7 +107,7 @@ init([#amqqueue { name = QueueName } = Q]) -> sender_queues = dict:new(), msg_id_ack = dict:new(), msg_id_status = dict:new(), - open_transactions = sets:new() + open_transactions = dict:new() }, hibernate, {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. @@ -157,8 +157,32 @@ handle_call({gm_deaths, Deaths}, From, end; handle_call({run_backing_queue, Mod, Fun}, _From, State) -> - reply(ok, run_backing_queue(Mod, Fun, State)). + reply(ok, run_backing_queue(Mod, Fun, State)); +handle_call({commit, Txn, ChPid}, From, + State = #state { open_transactions = OT }) -> + case dict:find(Txn, OT) of + error -> + %% curious. We've not received _anything_ about this txn + %% so far via gm! + OT1 = dict:store(Txn, {undefined, {committed, From}}, OT), + noreply(State #state { open_transactions = OT1 }); + {ok, {committed, undefined}} -> + %% We've already finished via GM (our BQ has actually + %% replied back to us in the case of commit), so just + %% reply and tidy up. Note that because no one can every + %% consume from a slave, there are never going to be any + %% acks to return. + reply(ok, State #state { open_transactions = dict:erase(Txn, OT) }); + {ok, {open, undefined}} -> + %% Save who we're from, but we're still waiting for the + %% commit to arrive via GM + OT1 = dict:store(Txn, {open, {committed, From}}, OT), + noreply(State #state { open_transactions = OT1 }); + {ok, {abandoned, undefined}} -> + %% GM must have told us to roll back. + reply(ok, State #state { open_transactions = dict:erase(Txn, OT) }) + end. handle_cast({run_backing_queue, Mod, Fun}, State) -> noreply(run_backing_queue(Mod, Fun, State)); @@ -192,7 +216,25 @@ handle_cast(update_ram_duration, handle_cast(sync_timeout, State) -> noreply(backing_queue_idle_timeout( - State #state { sync_timer_ref = undefined })). + State #state { sync_timer_ref = undefined })); + +handle_cast({rollback, Txn, ChPid}, + State #state { open_transactions = OT }) -> + %% Will never see {'committed', _} or {_, 'abandoned'} or + %% {_, {'committed', From}} here + case dict:find(Txn, OT) of + error -> + %% odd. We've not received anything from GM about this. + OT1 = dict:store(Txn, {undefined, abandoned}, OT), + noreply(State #state { open_transactions = OT1 }); + {ok, {open, undefined}} -> + %% The rollback is yet to arrive via GM. + OT1 = dict:store(Txn, {open, abandoned}, OT), + noreply(State #state { open_transactions = OT1 }); + {ok, {abandoned, undefined}} -> + %% GM has already rolled back. Tidy up. + noreply(State #state { open_transactions = dict:erase(Txn, OT) }) + end. handle_info(timeout, State) -> noreply(backing_queue_idle_timeout(State)); @@ -370,9 +412,12 @@ promote_me(From, #state { q = Q, ok = gm:confirmed_broadcast(GM, heartbeat), %% Start by rolling back all open transactions - - [ok = gm:confirmed_broadcast(GM, {tx_rollback, Txn}) - || Txn <- sets:to_list(OT)], + BQS1 = lists:foldl( + fun (Txn, BQSN) -> + ok = gm:confirmed_broadcast(GM, {tx_rollback, Txn}), + {_AckTags, BQSN1} = BQ:tx_rollback(Txn, BQSN), + BQSN1 + end, BQS, dict:fetch_keys(OT)), %% We find all the messages that we've received from channels but %% not from gm, and if they're due to be enqueued on promotion @@ -445,8 +490,7 @@ promote_me(From, #state { q = Q, Status =:= published orelse Status =:= confirmed]), MasterState = rabbit_mirror_queue_master:promote_backing_queue_state( - CPid, BQ, BQS, GM, SS), - + CPid, BQ, BQS1, GM, SS, OT), MTC = dict:from_list( [{MsgId, {ChPid, MsgSeqNo}} || @@ -516,7 +560,8 @@ stop_rate_timer(State = #state { rate_timer_ref = TRef }) -> maybe_enqueue_message( Delivery = #delivery { message = #basic_message { id = MsgId }, msg_seq_no = MsgSeqNo, - sender = ChPid }, + sender = ChPid, + txn = none }, EnqueueOnPromotion, State = #state { sender_queues = SQ, msg_id_status = MS }) -> @@ -553,7 +598,11 @@ maybe_enqueue_message( %% We've already heard from GM that the msg is to be %% discarded. We won't see this again. State #state { msg_id_status = dict:erase(MsgId, MS) } - end. + end; +maybe_enqueue_message(_Delivery, State) -> + %% In a txn. Txns are completely driven by gm for simplicity, so + %% we're not going to do anything here. + State. process_instruction( {publish, Deliver, ChPid, MsgProps, Msg = #basic_message { id = MsgId }}, diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl index 84987c88..7a3c17a2 100644 --- a/src/rabbit_variable_queue.erl +++ b/src/rabbit_variable_queue.erl @@ -22,7 +22,7 @@ requeue/3, len/1, is_empty/1, dropwhile/2, set_ram_duration_target/2, ram_duration/1, needs_idle_timeout/1, idle_timeout/1, handle_pre_hibernate/1, - status/1, invoke/3, is_duplicate/2, discard/3, + status/1, invoke/3, is_duplicate/3, discard/3, multiple_routing_keys/0]). -export([start/1, stop/0]). @@ -887,7 +887,7 @@ status(#vqstate { invoke(?MODULE, Fun, State) -> Fun(?MODULE, State). -is_duplicate(_Msg, State) -> {false, State}. +is_duplicate(_Txn, _Msg, State) -> {false, State}. discard(_Msg, _ChPid, State) -> State. -- cgit v1.2.1 From 6c81a81a454eb092f285bf824eaf5a655bc86c12 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Fri, 8 Apr 2011 00:11:23 +0100 Subject: Well, getting closer. But it's not done yet, and I may have discovered a rather fatal problem with the whole idea of supporting txns in mirrors anyway in that because of the coalescing going on, there is absolutely no indication of when the BQ finally completes adding the msgs to the queue. Thus the only solution here might be to ban coalescing in this case --- src/rabbit_mirror_queue_master.erl | 16 +++-- src/rabbit_mirror_queue_slave.erl | 133 ++++++++++++++++++++++++++++++++----- 2 files changed, 126 insertions(+), 23 deletions(-) diff --git a/src/rabbit_mirror_queue_master.erl b/src/rabbit_mirror_queue_master.erl index 8714c44d..a59d64d4 100644 --- a/src/rabbit_mirror_queue_master.erl +++ b/src/rabbit_mirror_queue_master.erl @@ -239,7 +239,8 @@ tx_ack(Txn, AckTags, State = #state { gm = GM, fun (AckTag, Acc) -> [dict:fetch(AckTag, AM) | Acc] end, [], AckTags), ok = gm:broadcast(GM, {tx_ack, Txn, MsgIds}), - State + BQS1 = BQ:tx_ack(Txn, AckTags, BQS), + State #state { backing_queue_state = BQS1 } end. tx_rollback(Txn, State = #state { gm = GM, @@ -248,8 +249,8 @@ tx_rollback(Txn, State = #state { gm = GM, abandoned_txns = AbandonedTxns }) -> case sets:is_element(Txn, AbandonedTxns) of true -> {[], State}; - false -> ok = gm:confirmed_broadcast(GM, {tx_rollback, Txn}), - {AckTags, BQS1} = BQ:tx_rollback(Txn, BQS), + false -> {AckTags, BQS1} = BQ:tx_rollback(Txn, BQS), + ok = gm:confirmed_broadcast(GM, {tx_rollback, Txn}), {AckTags, State #state { backing_queue_state = BQS1 }} end. @@ -264,9 +265,14 @@ tx_commit(Txn, PostCommitFun, MsgPropsFun, %% try to commit on the old master. {[], State}; false -> - ok = gm:confirmed_broadcast(GM, {tx_commit, Txn, MsgPropsFun}), {AckTags, BQS1} = BQ:tx_commit(Txn, PostCommitFun, MsgPropsFun, BQS), - AM1 = lists:foldl(fun dict:erase/2, AM, AckTags), + {MsgIds, AM1} = lists:foldl( + fun (AckTag, {MsgIdsN, AMN}) -> + MsgId = dict:fetch(AckTag, AMN), + {[MsgId|MsgIdsN], dict:erase(AckTag, AMN)} + end, {[], AM}, AckTags), + ok = gm:confirmed_broadcast( + GM, {tx_commit, Txn, MsgPropsFun, MsgIds}), {AckTags, State #state { backing_queue_state = BQS, ack_msg_id = AM }} end. diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index 34ec5109..a61abbd7 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -163,22 +163,20 @@ handle_call({commit, Txn, ChPid}, From, State = #state { open_transactions = OT }) -> case dict:find(Txn, OT) of error -> - %% curious. We've not received _anything_ about this txn - %% so far via gm! + %% We've not received anything about this txn so far via + %% gm! OT1 = dict:store(Txn, {undefined, {committed, From}}, OT), noreply(State #state { open_transactions = OT1 }); - {ok, {committed, undefined}} -> - %% We've already finished via GM (our BQ has actually - %% replied back to us in the case of commit), so just - %% reply and tidy up. Note that because no one can every - %% consume from a slave, there are never going to be any - %% acks to return. - reply(ok, State #state { open_transactions = dict:erase(Txn, OT) }); {ok, {open, undefined}} -> %% Save who we're from, but we're still waiting for the %% commit to arrive via GM OT1 = dict:store(Txn, {open, {committed, From}}, OT), noreply(State #state { open_transactions = OT1 }); + {ok, {committed, undefined}} -> + %% We've already finished via GM (our BQ has actually + %% replied back to us in the case of commit), so just + %% reply and tidy up. + reply(ok, State #state { open_transactions = dict:erase(Txn, OT) }); {ok, {abandoned, undefined}} -> %% GM must have told us to roll back. reply(ok, State #state { open_transactions = dict:erase(Txn, OT) }) @@ -224,7 +222,7 @@ handle_cast({rollback, Txn, ChPid}, %% {_, {'committed', From}} here case dict:find(Txn, OT) of error -> - %% odd. We've not received anything from GM about this. + %% We've not received anything from GM about this. OT1 = dict:store(Txn, {undefined, abandoned}, OT), noreply(State #state { open_transactions = OT1 }); {ok, {open, undefined}} -> @@ -292,6 +290,7 @@ prioritise_cast(Msg, _State) -> {run_backing_queue, _Mod, _Fun} -> 6; sync_timeout -> 6; {gm, _Msg} -> 5; + {post_commit, _Txn, _AckTags} -> 4; _ -> 0 end. @@ -340,6 +339,10 @@ bq_init(BQ, Q, Recover) -> end) end). +run_backing_queue(rabbit_mirror_queue_master, Fun, State) -> + %% Yes, this might look a little crazy, but see comments around + %% process_instruction({tx_commit,...}, State). + Fun(rabbit_mirror_queue_master, State); run_backing_queue(Mod, Fun, State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> State #state { backing_queue_state = BQ:invoke(Mod, Fun, BQS) }. @@ -412,12 +415,14 @@ promote_me(From, #state { q = Q, ok = gm:confirmed_broadcast(GM, heartbeat), %% Start by rolling back all open transactions + AbandonedTxns = [Txn || {Txn, {open, _TxnStatusByChannel}} + <- dict:to_list(OT)], BQS1 = lists:foldl( fun (Txn, BQSN) -> ok = gm:confirmed_broadcast(GM, {tx_rollback, Txn}), {_AckTags, BQSN1} = BQ:tx_rollback(Txn, BQSN), BQSN1 - end, BQS, dict:fetch_keys(OT)), + end, BQS, AbandonedTxns), %% We find all the messages that we've received from channels but %% not from gm, and if they're due to be enqueued on promotion @@ -490,7 +495,7 @@ promote_me(From, #state { q = Q, Status =:= published orelse Status =:= confirmed]), MasterState = rabbit_mirror_queue_master:promote_backing_queue_state( - CPid, BQ, BQS1, GM, SS, OT), + CPid, BQ, BQS1, GM, SS, sets:from_list(AbandonedTxns)), MTC = dict:from_list( [{MsgId, {ChPid, MsgSeqNo}} || @@ -750,7 +755,7 @@ process_instruction({ack, MsgIds}, State = #state { backing_queue = BQ, backing_queue_state = BQS, msg_id_ack = MA }) -> - {AckTags, MA1} = msg_ids_to_acktags(MsgIds, MA), + {AckTags, MA1} = msg_ids_to_acktags(MsgIds, MA, remove), {MsgIds1, BQS1} = BQ:ack(AckTags, BQS), [] = MsgIds1 -- MsgIds, %% ASSERTION {ok, State #state { msg_id_ack = MA1, @@ -759,7 +764,7 @@ process_instruction({requeue, MsgPropsFun, MsgIds}, State = #state { backing_queue = BQ, backing_queue_state = BQS, msg_id_ack = MA }) -> - {AckTags, MA1} = msg_ids_to_acktags(MsgIds, MA), + {AckTags, MA1} = msg_ids_to_acktags(MsgIds, MA, remove), {ok, case length(AckTags) =:= length(MsgIds) of true -> {MsgIds, BQS1} = BQ:requeue(AckTags, MsgPropsFun, BQS), @@ -774,19 +779,111 @@ process_instruction({requeue, MsgPropsFun, MsgIds}, State #state { msg_id_ack = dict:new(), backing_queue_state = BQS2 } end}; +process_instruction({tx_publish, Txn, ChPid, MsgProps, Msg}, + State = #state { backing_queue = BQ, + backing_queue_state = BQS, + open_transactions = OT }) -> + %% Will never see abandoned or committed in the LHS + OT1 = case dict:find(Txn, OT) of + error -> + dict:store(Txn, {open, undefined}, OT); + {ok, {open, _TxnStatusByChannel}} -> + OT + end, + BQS1 = BQ:tx_publish(Txn, Msg, MsgProps, ChPid, BQS), + {ok, State #state { backing_queue_state = BQS1, + open_transactions = OT1 }}; +process_instruction({tx_ack, Txn, MsgIds}, + State = #state { backing_queue = BQ, + backing_queue_state = BQS, + open_transactions = OT, + msg_id_ack = MA }) -> + %% Will never see abandoned or committed in the LHS + OT1 = case dict:find(Txn, OT) of + error -> + dict:store(Txn, {open, undefined}, OT); + {ok, {open, _TxnStatusByChannel}} -> + OT + end, + %% Remember, rollback of a txn with acks simply undoes the ack - + %% the msg itself is not requeued or anything. Thus we make sure + %% msg_ids_to_acktags does not remove the entry from MQ, and we + %% will do the remove when we commit. + {AckTags, MA1} = msg_ids_to_acktags(MsgIds, MA, keep), + BQS1 = BQ:tx_ack(Txn, AckTags, BQS), + {ok, State #store { backing_queue_state = BQS1, + open_transactions = OT1, + msg_id_ack = MA1 }}; +process_instruction({tx_commit, Txn, MsgPropsFun, MsgIds}, + State = #state { backing_queue = BQ, + backing_queue_state = BQS, + open_transactions = OT, + msg_id_ack = MA }) -> + %% We must remove the ack tags from MQ at this point + {AckTags, MA1} = msg_ids_to_acktags(MsgIds, MA, remove), + %% We won't adjust open_transactions until we get the post_commit + %% callback, unless we've already seen the commit from the channel + case dict:find(Txn, OT) of + {open, {committed, From}} -> + {AckTags1, BQS1} = + BQ:tx_commit(Txn, fun () -> gen_server2:reply(From, ok) end, + MsgPropsFun, BQS), + OT1 = dict:erase(Txn, OT), + true = lists:usort(AckTags) =:= lists:usort(AckTags1), %% ASSERTION + {ok, State #state { backing_queue_state = BQS, + open_transactions = OT1, + msg_id_ack = MA1 }}; + Status -> + %% We have to cope with the possibility that we'll get + %% promoted before the txn finishes, and rely on slight + %% magic if we do complete here. + Me = self(), + F = fun () -> rabbit_amqqueue:run_backing_queue_async( + Me, rabbit_mirror_queue_master, + fun (rabbit_mirror_queue_master, + State1 = #state { open_transactions = OT2 }) -> + OT3 = case dict:find(Txn, OT2) of + {committing, undefined} -> + dict:store( + Txn, {committed, undefined}, + OT2); + {committing, {committed, From}} -> + gen_server2:reply(From, ok), + dict:erase(Txn, OT2) + end, + State1 #state { open_transactions = OT3 } + end) + end, + {AckTags1, BQS1} = BQ:tx_commit(Txn, F, MsgPropsFun, BQS), + true = lists:usort(AckTags) =:= lists:usort(AckTags1), %% ASSERTION + OT1 = case Status of + error -> + dict:store(Txn, {committing, undefined}, OT); + {open, TxnStatusByChannel} -> + dict:store(Txn, {committing, TxnStatusByChannel}, OT) + end, + {ok, State #state { backing_queue_state = BQS, + open_transactions = OT1, + msg_id_ack = MA1 }}} + end; + process_instruction(delete_and_terminate, State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> BQ:delete_and_terminate(BQS), {stop, State #state { backing_queue_state = undefined }}. -msg_ids_to_acktags(MsgIds, MA) -> +msg_ids_to_acktags(MsgIds, MA, RemoveOrKeep) -> {AckTags, MA1} = lists:foldl(fun (MsgId, {AckTagsN, MAN}) -> case dict:find(MsgId, MA) of - error -> {AckTagsN, MAN}; - {ok, AckTag} -> {[AckTag | AckTagsN], - dict:erase(MsgId, MAN)} + error -> + {AckTagsN, MAN}; + {ok, AckTag} when RemoveOrKeep =:= remove -> + {[AckTag | AckTagsN], + dict:erase(MsgId, MAN)}; + {ok, AckTag} when RemoveOrKeep =:= keep -> + {[AckTag | AckTagsN], MAN} end end, {[], MA}, MsgIds), {lists:reverse(AckTags), MA1}. -- cgit v1.2.1 From b95d56981fceafdb17edad3be1d7bb70e0fb2268 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Fri, 8 Apr 2011 10:27:19 +0100 Subject: Sod it - transactions are too hard to do in mirror queues so don't bother. I know how to do it, but it's horrifically messy, and the margin is too small --- src/rabbit_mirror_queue_master.erl | 96 +++++-------------- src/rabbit_mirror_queue_slave.erl | 183 +++++-------------------------------- 2 files changed, 44 insertions(+), 235 deletions(-) diff --git a/src/rabbit_mirror_queue_master.erl b/src/rabbit_mirror_queue_master.erl index a59d64d4..387dfbc4 100644 --- a/src/rabbit_mirror_queue_master.erl +++ b/src/rabbit_mirror_queue_master.erl @@ -26,7 +26,7 @@ -export([start/1, stop/0]). --export([promote_backing_queue_state/6]). +-export([promote_backing_queue_state/5]). -behaviour(rabbit_backing_queue). @@ -39,8 +39,7 @@ set_delivered, seen_status, confirmed, - ack_msg_id, - abandoned_txns + ack_msg_id }). %% --------------------------------------------------------------------------- @@ -78,7 +77,7 @@ init(#amqqueue { arguments = Args, name = QName } = Q, Recover, confirmed = [], ack_msg_id = dict:new() }. -promote_backing_queue_state(CPid, BQ, BQS, GM, SeenStatus, AbandonedTxns) -> +promote_backing_queue_state(CPid, BQ, BQS, GM, SeenStatus) -> #state { gm = GM, coordinator = CPid, backing_queue = BQ, @@ -86,8 +85,7 @@ promote_backing_queue_state(CPid, BQ, BQS, GM, SeenStatus, AbandonedTxns) -> set_delivered = BQ:len(BQS), seen_status = SeenStatus, confirmed = [], - ack_msg_id = dict:new(), - abandoned_txns = AbandonedTxns }. + ack_msg_id = dict:new() }. terminate(State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> %% Backing queue termination. The queue is going down but @@ -214,68 +212,20 @@ ack(AckTags, State = #state { gm = GM, {MsgIds, State #state { backing_queue_state = BQS1, ack_msg_id = AM1 }}. -tx_publish(Txn, Msg, MsgProps, ChPid, - State = #state { gm = GM, - backing_queue = BQ, - backing_queue_state = BQS, - abandoned_txns = AbandonedTxns }) -> - case sets:is_element(Txn, AbandonedTxns) of - true -> State; - false -> ok = gm:broadcast(GM, {tx_publish, Txn, ChPid, MsgProps, Msg}), - BQS1 = BQ:tx_publish(Txn, Msg, MsgProps, ChPid, State), - State #state { backing_queue_state = BQS1 } - end. +tx_publish(_Txn, _Msg, _MsgProps, _ChPid, State) -> + %% We don't support txns in mirror queues + State. -tx_ack(Txn, AckTags, State = #state { gm = GM, - backing_queue = BQ, - backing_queue_state = BQS, - ack_msg_id = AM, - abandoned_txns = AbandonedTxns }) -> - case sets:is_element(Txn, AbandonedTxns) of - true -> - State; - false -> - MsgIds = lists:foldl( - fun (AckTag, Acc) -> [dict:fetch(AckTag, AM) | Acc] end, - [], AckTags), - ok = gm:broadcast(GM, {tx_ack, Txn, MsgIds}), - BQS1 = BQ:tx_ack(Txn, AckTags, BQS), - State #state { backing_queue_state = BQS1 } - end. +tx_ack(_Txn, _AckTags, State) -> + %% We don't support txns in mirror queues + State. -tx_rollback(Txn, State = #state { gm = GM, - backing_queue = BQ, - backing_queue_state = BQS, - abandoned_txns = AbandonedTxns }) -> - case sets:is_element(Txn, AbandonedTxns) of - true -> {[], State}; - false -> {AckTags, BQS1} = BQ:tx_rollback(Txn, BQS), - ok = gm:confirmed_broadcast(GM, {tx_rollback, Txn}), - {AckTags, State #state { backing_queue_state = BQS1 }} - end. +tx_rollback(_Txn, State) -> + {[], State}. -tx_commit(Txn, PostCommitFun, MsgPropsFun, - State = #state { gm = GM, - backing_queue = BQ, - backing_queue_state = BQS, - ack_msg_id = AM }) -> - case sets:is_element(Txn, AbandonedTxns) of - true -> - %% Don't worry - the channel will explode as it'll still - %% try to commit on the old master. - {[], State}; - false -> - {AckTags, BQS1} = BQ:tx_commit(Txn, PostCommitFun, MsgPropsFun, BQS), - {MsgIds, AM1} = lists:foldl( - fun (AckTag, {MsgIdsN, AMN}) -> - MsgId = dict:fetch(AckTag, AMN), - {[MsgId|MsgIdsN], dict:erase(AckTag, AMN)} - end, {[], AM}, AckTags), - ok = gm:confirmed_broadcast( - GM, {tx_commit, Txn, MsgPropsFun, MsgIds}), - {AckTags, State #state { backing_queue_state = BQS, - ack_msg_id = AM }} - end. +tx_commit(_Txn, PostCommitFun, _MsgPropsFun, State) -> + PostCommitFun(), %% Probably must run it to avoid deadlocks + {[], State}. requeue(AckTags, MsgPropsFun, State = #state { gm = GM, backing_queue = BQ, @@ -361,13 +311,10 @@ is_duplicate(none, Message = #basic_message { id = MsgId }, %% be called and we need to be able to detect this case {discarded, State} end; -is_duplicate(Txn, _Msg, State = #state { abandoned_txns = AbandonedTxns }) -> - %% There will be nothing in seen_status for any transactions that - %% are still in flight. - case sets:is_element(Txn, AbandonedTxns) of - true -> {published, State}; - false -> {false, State} - end. +is_duplicate(_Txn, _Msg, State) -> + %% In a transaction. We don't support txns in mirror queues. But + %% it's probably not a duplicate... + {false, State}. discard(Msg = #basic_message { id = MsgId }, ChPid, State = #state { gm = GM, @@ -376,13 +323,14 @@ discard(Msg = #basic_message { id = MsgId }, ChPid, seen_status = SS }) -> %% It's a massive error if we get told to discard something that's %% already been published or published-and-confirmed. To do that - %% would require non FIFO access... + %% would require non FIFO access. Hence we should not find + %% 'published' or 'confirmed' in this dict:find. case dict:find(MsgId, SS) of error -> ok = gm:broadcast(GM, {discard, ChPid, Msg}), State #state { backing_queue_state = BQ:discard(Msg, ChPid, BQS), seen_status = dict:erase(MsgId, SS) }; - discarded -> + {ok, discarded} -> State end. diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index a61abbd7..8ca82fa1 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -55,8 +55,7 @@ sender_queues, %% :: Pid -> MsgQ msg_id_ack, %% :: MsgId -> AckTag - msg_id_status, - open_transactions + msg_id_status }). -define(SYNC_INTERVAL, 25). %% milliseconds @@ -106,8 +105,7 @@ init([#amqqueue { name = QueueName } = Q]) -> sender_queues = dict:new(), msg_id_ack = dict:new(), - msg_id_status = dict:new(), - open_transactions = dict:new() + msg_id_status = dict:new() }, hibernate, {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. @@ -159,28 +157,9 @@ handle_call({gm_deaths, Deaths}, From, handle_call({run_backing_queue, Mod, Fun}, _From, State) -> reply(ok, run_backing_queue(Mod, Fun, State)); -handle_call({commit, Txn, ChPid}, From, - State = #state { open_transactions = OT }) -> - case dict:find(Txn, OT) of - error -> - %% We've not received anything about this txn so far via - %% gm! - OT1 = dict:store(Txn, {undefined, {committed, From}}, OT), - noreply(State #state { open_transactions = OT1 }); - {ok, {open, undefined}} -> - %% Save who we're from, but we're still waiting for the - %% commit to arrive via GM - OT1 = dict:store(Txn, {open, {committed, From}}, OT), - noreply(State #state { open_transactions = OT1 }); - {ok, {committed, undefined}} -> - %% We've already finished via GM (our BQ has actually - %% replied back to us in the case of commit), so just - %% reply and tidy up. - reply(ok, State #state { open_transactions = dict:erase(Txn, OT) }); - {ok, {abandoned, undefined}} -> - %% GM must have told us to roll back. - reply(ok, State #state { open_transactions = dict:erase(Txn, OT) }) - end. +handle_call({commit, _Txn, _ChPid}, _From, State) -> + %% We don't support transactions in mirror queues + reply(ok, State). handle_cast({run_backing_queue, Mod, Fun}, State) -> noreply(run_backing_queue(Mod, Fun, State)); @@ -216,23 +195,9 @@ handle_cast(sync_timeout, State) -> noreply(backing_queue_idle_timeout( State #state { sync_timer_ref = undefined })); -handle_cast({rollback, Txn, ChPid}, - State #state { open_transactions = OT }) -> - %% Will never see {'committed', _} or {_, 'abandoned'} or - %% {_, {'committed', From}} here - case dict:find(Txn, OT) of - error -> - %% We've not received anything from GM about this. - OT1 = dict:store(Txn, {undefined, abandoned}, OT), - noreply(State #state { open_transactions = OT1 }); - {ok, {open, undefined}} -> - %% The rollback is yet to arrive via GM. - OT1 = dict:store(Txn, {open, abandoned}, OT), - noreply(State #state { open_transactions = OT1 }); - {ok, {abandoned, undefined}} -> - %% GM has already rolled back. Tidy up. - noreply(State #state { open_transactions = dict:erase(Txn, OT) }) - end. +handle_cast({rollback, _Txn, _ChPid}, State) -> + %% We don't support transactions in mirror queues + noreply(State). handle_info(timeout, State) -> noreply(backing_queue_idle_timeout(State)); @@ -405,8 +370,7 @@ promote_me(From, #state { q = Q, rate_timer_ref = RateTRef, sender_queues = SQ, msg_id_ack = MA, - msg_id_status = MS, - open_transactions = OT }) -> + msg_id_status = MS }) -> rabbit_log:info("Promoting slave ~p for ~s~n", [self(), rabbit_misc:rs(Q #amqqueue.name)]), {ok, CPid} = rabbit_mirror_queue_coordinator:start_link(Q, GM), @@ -414,16 +378,6 @@ promote_me(From, #state { q = Q, gen_server2:reply(From, {promote, CPid}), ok = gm:confirmed_broadcast(GM, heartbeat), - %% Start by rolling back all open transactions - AbandonedTxns = [Txn || {Txn, {open, _TxnStatusByChannel}} - <- dict:to_list(OT)], - BQS1 = lists:foldl( - fun (Txn, BQSN) -> - ok = gm:confirmed_broadcast(GM, {tx_rollback, Txn}), - {_AckTags, BQSN1} = BQ:tx_rollback(Txn, BQSN), - BQSN1 - end, BQS, AbandonedTxns), - %% We find all the messages that we've received from channels but %% not from gm, and if they're due to be enqueued on promotion %% then we pass them to the @@ -495,7 +449,7 @@ promote_me(From, #state { q = Q, Status =:= published orelse Status =:= confirmed]), MasterState = rabbit_mirror_queue_master:promote_backing_queue_state( - CPid, BQ, BQS1, GM, SS, sets:from_list(AbandonedTxns)), + CPid, BQ, BQS, GM, SS), MTC = dict:from_list( [{MsgId, {ChPid, MsgSeqNo}} || @@ -604,9 +558,8 @@ maybe_enqueue_message( %% discarded. We won't see this again. State #state { msg_id_status = dict:erase(MsgId, MS) } end; -maybe_enqueue_message(_Delivery, State) -> - %% In a txn. Txns are completely driven by gm for simplicity, so - %% we're not going to do anything here. +maybe_enqueue_message(_Delivery, _EnqueueOnPromotion, State) -> + %% We don't support txns in mirror queues. State. process_instruction( @@ -755,7 +708,7 @@ process_instruction({ack, MsgIds}, State = #state { backing_queue = BQ, backing_queue_state = BQS, msg_id_ack = MA }) -> - {AckTags, MA1} = msg_ids_to_acktags(MsgIds, MA, remove), + {AckTags, MA1} = msg_ids_to_acktags(MsgIds, MA), {MsgIds1, BQS1} = BQ:ack(AckTags, BQS), [] = MsgIds1 -- MsgIds, %% ASSERTION {ok, State #state { msg_id_ack = MA1, @@ -764,7 +717,7 @@ process_instruction({requeue, MsgPropsFun, MsgIds}, State = #state { backing_queue = BQ, backing_queue_state = BQS, msg_id_ack = MA }) -> - {AckTags, MA1} = msg_ids_to_acktags(MsgIds, MA, remove), + {AckTags, MA1} = msg_ids_to_acktags(MsgIds, MA), {ok, case length(AckTags) =:= length(MsgIds) of true -> {MsgIds, BQS1} = BQ:requeue(AckTags, MsgPropsFun, BQS), @@ -779,113 +732,21 @@ process_instruction({requeue, MsgPropsFun, MsgIds}, State #state { msg_id_ack = dict:new(), backing_queue_state = BQS2 } end}; -process_instruction({tx_publish, Txn, ChPid, MsgProps, Msg}, - State = #state { backing_queue = BQ, - backing_queue_state = BQS, - open_transactions = OT }) -> - %% Will never see abandoned or committed in the LHS - OT1 = case dict:find(Txn, OT) of - error -> - dict:store(Txn, {open, undefined}, OT); - {ok, {open, _TxnStatusByChannel}} -> - OT - end, - BQS1 = BQ:tx_publish(Txn, Msg, MsgProps, ChPid, BQS), - {ok, State #state { backing_queue_state = BQS1, - open_transactions = OT1 }}; -process_instruction({tx_ack, Txn, MsgIds}, - State = #state { backing_queue = BQ, - backing_queue_state = BQS, - open_transactions = OT, - msg_id_ack = MA }) -> - %% Will never see abandoned or committed in the LHS - OT1 = case dict:find(Txn, OT) of - error -> - dict:store(Txn, {open, undefined}, OT); - {ok, {open, _TxnStatusByChannel}} -> - OT - end, - %% Remember, rollback of a txn with acks simply undoes the ack - - %% the msg itself is not requeued or anything. Thus we make sure - %% msg_ids_to_acktags does not remove the entry from MQ, and we - %% will do the remove when we commit. - {AckTags, MA1} = msg_ids_to_acktags(MsgIds, MA, keep), - BQS1 = BQ:tx_ack(Txn, AckTags, BQS), - {ok, State #store { backing_queue_state = BQS1, - open_transactions = OT1, - msg_id_ack = MA1 }}; -process_instruction({tx_commit, Txn, MsgPropsFun, MsgIds}, - State = #state { backing_queue = BQ, - backing_queue_state = BQS, - open_transactions = OT, - msg_id_ack = MA }) -> - %% We must remove the ack tags from MQ at this point - {AckTags, MA1} = msg_ids_to_acktags(MsgIds, MA, remove), - %% We won't adjust open_transactions until we get the post_commit - %% callback, unless we've already seen the commit from the channel - case dict:find(Txn, OT) of - {open, {committed, From}} -> - {AckTags1, BQS1} = - BQ:tx_commit(Txn, fun () -> gen_server2:reply(From, ok) end, - MsgPropsFun, BQS), - OT1 = dict:erase(Txn, OT), - true = lists:usort(AckTags) =:= lists:usort(AckTags1), %% ASSERTION - {ok, State #state { backing_queue_state = BQS, - open_transactions = OT1, - msg_id_ack = MA1 }}; - Status -> - %% We have to cope with the possibility that we'll get - %% promoted before the txn finishes, and rely on slight - %% magic if we do complete here. - Me = self(), - F = fun () -> rabbit_amqqueue:run_backing_queue_async( - Me, rabbit_mirror_queue_master, - fun (rabbit_mirror_queue_master, - State1 = #state { open_transactions = OT2 }) -> - OT3 = case dict:find(Txn, OT2) of - {committing, undefined} -> - dict:store( - Txn, {committed, undefined}, - OT2); - {committing, {committed, From}} -> - gen_server2:reply(From, ok), - dict:erase(Txn, OT2) - end, - State1 #state { open_transactions = OT3 } - end) - end, - {AckTags1, BQS1} = BQ:tx_commit(Txn, F, MsgPropsFun, BQS), - true = lists:usort(AckTags) =:= lists:usort(AckTags1), %% ASSERTION - OT1 = case Status of - error -> - dict:store(Txn, {committing, undefined}, OT); - {open, TxnStatusByChannel} -> - dict:store(Txn, {committing, TxnStatusByChannel}, OT) - end, - {ok, State #state { backing_queue_state = BQS, - open_transactions = OT1, - msg_id_ack = MA1 }}} - end; - process_instruction(delete_and_terminate, State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> BQ:delete_and_terminate(BQS), {stop, State #state { backing_queue_state = undefined }}. -msg_ids_to_acktags(MsgIds, MA, RemoveOrKeep) -> +msg_ids_to_acktags(MsgIds, MA) -> {AckTags, MA1} = - lists:foldl(fun (MsgId, {AckTagsN, MAN}) -> - case dict:find(MsgId, MA) of - error -> - {AckTagsN, MAN}; - {ok, AckTag} when RemoveOrKeep =:= remove -> - {[AckTag | AckTagsN], - dict:erase(MsgId, MAN)}; - {ok, AckTag} when RemoveOrKeep =:= keep -> - {[AckTag | AckTagsN], MAN} - end - end, {[], MA}, MsgIds), + lists:foldl( + fun (MsgId, {Acc, MAN}) -> + case dict:find(MsgId, MA) of + error -> {Acc, MAN}; + {ok, AckTag} -> {[AckTag | Acc], dict:erase(MsgId, MAN)} + end + end, {[], MA}, MsgIds), {lists:reverse(AckTags), MA1}. ack_all(BQ, MA, BQS) -> -- cgit v1.2.1 From bb80be93ece35590dd9ee1295a866b1d88583ade Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Fri, 8 Apr 2011 11:15:57 +0100 Subject: some notes --- src/rabbit_mirror_queue_master.erl | 40 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/src/rabbit_mirror_queue_master.erl b/src/rabbit_mirror_queue_master.erl index 387dfbc4..664c706d 100644 --- a/src/rabbit_mirror_queue_master.erl +++ b/src/rabbit_mirror_queue_master.erl @@ -42,6 +42,46 @@ ack_msg_id }). +%% Some notes on transactions +%% +%% We don't support transactions on mirror queues. To do so is +%% challenging. The underlying bq is free to add the contents of the +%% txn to the queue proper at any point after the tx.commit comes in +%% but before the tx.commit-ok goes out. This means that it is not +%% safe for all mirrors to simply issue the BQ:tx_commit at the same +%% time, as the addition of the txn's contents to the queue may +%% subsequently be inconsistently interwoven with other actions on the +%% BQ. The solution to this is, in the master, wrap the PostCommitFun +%% and do the gm:broadcast in there: at that point, you're in the BQ +%% (well, there's actually nothing to stop that function being invoked +%% by some other process, but let's pretend for now: you could always +%% use run_backing_queue_async to ensure you really are in the queue +%% process), the gm:broadcast is safe because you don't have to worry +%% about races with other gm:broadcast calls (same process). Thus this +%% signal would indicate sufficiently to all the slaves that they must +%% insert the complete contents of the txn at precisely this point in +%% the stream of events. +%% +%% However, it's quite difficult for the slaves to make that happen: +%% they would be forced to issue the tx_commit at that point, but then +%% stall processing any further instructions from gm until they +%% receive the notification from their bq that the tx_commit has fully +%% completed (i.e. they need to treat what is an async system as being +%% fully synchronous). This is not too bad (apart from the +%% vomit-inducing notion of it all): just need a queue of instructions +%% from the GM; but then it gets rather worse when you consider what +%% needs to happen if the master dies at this point and the slave in +%% the middle of this tx_commit needs to be promoted. +%% +%% Finally, we can't possibly hope to make transactions atomic across +%% mirror queues, and it's not even clear that that's desirable: if a +%% slave fails whilst there's an open transaction in progress then +%% when the channel comes to commit the txn, it will detect the +%% failure and destroy the channel. However, the txn will have +%% actually committed successfully in all the other mirrors (including +%% master). To do this bit properly would require 2PC and all the +%% baggage that goes with that. + %% --------------------------------------------------------------------------- %% Backing queue %% --------------------------------------------------------------------------- -- cgit v1.2.1 From ce51765ac7299ea27796d57c3903a15e4f4120ca Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Fri, 8 Apr 2011 14:12:26 +0100 Subject: Abstract out mainly timer maintanence functions --- src/rabbit_amqqueue_process.erl | 54 +++++++------------ src/rabbit_amqqueue_process_utils.erl | 99 +++++++++++++++++++++++++++++++++++ 2 files changed, 118 insertions(+), 35 deletions(-) create mode 100644 src/rabbit_amqqueue_process_utils.erl diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl index 2b0fe17e..435edc07 100644 --- a/src/rabbit_amqqueue_process.erl +++ b/src/rabbit_amqqueue_process.erl @@ -21,8 +21,6 @@ -behaviour(gen_server2). -define(UNSENT_MESSAGE_LIMIT, 100). --define(SYNC_INTERVAL, 25). %% milliseconds --define(RAM_DURATION_UPDATE_INTERVAL, 5000). -define(BASE_MESSAGE_PROPERTIES, #message_properties{expiry = undefined, needs_confirming = false}). @@ -226,37 +224,27 @@ next_state(State = #q{backing_queue = BQ, backing_queue_state = BQS}) -> false -> {stop_sync_timer(State1), hibernate} end. -ensure_sync_timer(State = #q{sync_timer_ref = undefined}) -> - {ok, TRef} = timer:apply_after( - ?SYNC_INTERVAL, rabbit_amqqueue, sync_timeout, [self()]), - State#q{sync_timer_ref = TRef}; ensure_sync_timer(State) -> - State. + rabbit_amqqueue_process_utils:ensure_sync_timer( + fun sync_timer_getter/1, fun sync_timer_setter/2, State). + +stop_sync_timer(State) -> + rabbit_amqqueue_process_utils:stop_sync_timer( + fun sync_timer_getter/1, fun sync_timer_setter/2, State). + +sync_timer_getter(State) -> State#q.sync_timer_ref. +sync_timer_setter(Timer, State) -> State#q{sync_timer_ref = Timer}. -stop_sync_timer(State = #q{sync_timer_ref = undefined}) -> - State; -stop_sync_timer(State = #q{sync_timer_ref = TRef}) -> - {ok, cancel} = timer:cancel(TRef), - State#q{sync_timer_ref = undefined}. - -ensure_rate_timer(State = #q{rate_timer_ref = undefined}) -> - {ok, TRef} = timer:apply_after( - ?RAM_DURATION_UPDATE_INTERVAL, - rabbit_amqqueue, update_ram_duration, - [self()]), - State#q{rate_timer_ref = TRef}; -ensure_rate_timer(State = #q{rate_timer_ref = just_measured}) -> - State#q{rate_timer_ref = undefined}; ensure_rate_timer(State) -> - State. + rabbit_amqqueue_process_utils:ensure_rate_timer( + fun rate_timer_getter/1, fun rate_timer_setter/2, State). -stop_rate_timer(State = #q{rate_timer_ref = undefined}) -> - State; -stop_rate_timer(State = #q{rate_timer_ref = just_measured}) -> - State#q{rate_timer_ref = undefined}; -stop_rate_timer(State = #q{rate_timer_ref = TRef}) -> - {ok, cancel} = timer:cancel(TRef), - State#q{rate_timer_ref = undefined}. +stop_rate_timer(State) -> + rabbit_amqqueue_process_utils:stop_rate_timer( + fun rate_timer_getter/1, fun rate_timer_setter/2, State). + +rate_timer_getter(State) -> State#q.rate_timer_ref. +rate_timer_setter(Timer, State) -> State#q{rate_timer_ref = Timer}. stop_expiry_timer(State = #q{expiry_timer_ref = undefined}) -> State; @@ -1160,15 +1148,11 @@ handle_pre_hibernate(State = #q{backing_queue_state = undefined}) -> handle_pre_hibernate(State = #q{backing_queue = BQ, backing_queue_state = BQS, stats_timer = StatsTimer}) -> - {RamDuration, BQS1} = BQ:ram_duration(BQS), - DesiredDuration = - rabbit_memory_monitor:report_ram_duration(self(), RamDuration), - BQS2 = BQ:set_ram_duration_target(DesiredDuration, BQS1), - BQS3 = BQ:handle_pre_hibernate(BQS2), + BQS1 = rabbit_amqqueue_process_utils:backing_queue_pre_hibernate(BQ, BQS), rabbit_event:if_enabled(StatsTimer, fun () -> emit_stats(State, [{idle_since, now()}]) end), State1 = State#q{stats_timer = rabbit_event:stop_stats_timer(StatsTimer), - backing_queue_state = BQS3}, + backing_queue_state = BQS1}, {hibernate, stop_rate_timer(State1)}. diff --git a/src/rabbit_amqqueue_process_utils.erl b/src/rabbit_amqqueue_process_utils.erl new file mode 100644 index 00000000..feb2a79c --- /dev/null +++ b/src/rabbit_amqqueue_process_utils.erl @@ -0,0 +1,99 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is VMware, Inc. +%% Copyright (c) 201-2011 VMware, Inc. All rights reserved. +%% + +-module(rabbit_amqqueue_process_utils). + +-define(SYNC_INTERVAL, 25). %% milliseconds +-define(RAM_DURATION_UPDATE_INTERVAL, 5000). + +-export([backing_queue_pre_hibernate/2, + ensure_sync_timer/3, stop_sync_timer/3, + ensure_rate_timer/3, stop_rate_timer/3]). + +%%---------------------------------------------------------------------------- + +-ifdef(use_specs). + +-type(bq_mod() :: atom()). +-type(bq_state() :: any()). %% A good example of dialyzer's shortcomings + +-type(queue_state() :: any()). %% Another such example. +-type(getter(A) :: fun ((queue_state()) -> A)). +-type(setter(A) :: fun ((A, queue_state()) -> queue_state())). + +-type(tref() :: term()). %% Sigh. According to timer docs. + +-spec(backing_queue_pre_hibernate/2 :: (bq_mod(), bq_state()) -> bq_state()). + +-spec(ensure_sync_timer/3 :: (getter('undefined'|tref()), + setter('undefined'|tref()), + queue_state()) -> queue_state()). +-spec(stop_sync_timer/3 :: (getter('undefined'|tref()), + setter('undefined'|tref()), + queue_state()) -> queue_state()). + +-spec(ensure_rate_timer/3 :: (getter('undefined'|'just_measured'|tref()), + setter('undefined'|'just_measured'|tref()), + queue_state()) -> queue_state()). +-spec(stop_rate_timer/3 :: (getter('undefined'|'just_measured'|tref()), + setter('undefined'|'just_measured'|tref()), + queue_state()) -> queue_state()). + +-endif. + +%%---------------------------------------------------------------------------- + +backing_queue_pre_hibernate(BQ, BQS) -> + {RamDuration, BQS1} = BQ:ram_duration(BQS), + DesiredDuration = + rabbit_memory_monitor:report_ram_duration(self(), RamDuration), + BQS2 = BQ:set_ram_duration_target(DesiredDuration, BQS1), + BQ:handle_pre_hibernate(BQS2). + +ensure_sync_timer(Getter, Setter, State) -> + case Getter(State) of + undefined -> {ok, TRef} = timer:apply_after( + ?SYNC_INTERVAL, rabbit_amqqueue, + sync_timeout, [self()]), + Setter(TRef, State); + _TRef -> State + end. + +stop_sync_timer(Getter, Setter, State) -> + case Getter(State) of + undefined -> State; + TRef -> {ok, cancel} = timer:cancel(TRef), + Setter(undefined, State) + end. + +ensure_rate_timer(Getter, Setter, State) -> + case Getter(State) of + undefined -> {ok, TRef} = + timer:apply_after( + ?RAM_DURATION_UPDATE_INTERVAL, rabbit_amqqueue, + update_ram_duration, [self()]), + Setter(TRef, State); + just_measured -> Setter(undefined, State); + _TRef -> State + end. + +stop_rate_timer(Getter, Setter, State) -> + case Getter(State) of + undefined -> State; + just_measured -> Setter(undefined, State); + TRef -> {ok, cancel} = timer:cancel(TRef), + Setter(undefined, State) + end. -- cgit v1.2.1 From c3decea2e53d15aec7db221110236ecacc8cd867 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Fri, 8 Apr 2011 18:13:35 +0100 Subject: Some initial HA design documentation --- src/rabbit_mirror_queue_coordinator.erl | 75 +++++++++++++++++++++++++++++++++ src/rabbit_mirror_queue_master.erl | 3 ++ src/rabbit_mirror_queue_slave.erl | 3 ++ 3 files changed, 81 insertions(+) diff --git a/src/rabbit_mirror_queue_coordinator.erl b/src/rabbit_mirror_queue_coordinator.erl index 5fd07e60..f780f6b5 100644 --- a/src/rabbit_mirror_queue_coordinator.erl +++ b/src/rabbit_mirror_queue_coordinator.erl @@ -35,6 +35,81 @@ -define(ONE_SECOND, 1000). +%%---------------------------------------------------------------------------- +%% +%% Mirror Queues +%% +%% A queue with mirrors consists of the following: +%% +%% #amqqueue{ pid, mirror_pids } +%% | | +%% +----------+ +-------+--------------+-----------...etc... +%% | | | +%% V V V +%% amqqueue_process---+ slave-----+ slave-----+ ...etc... +%% | BQ = master----+ | | BQ = vq | | BQ = vq | +%% | | BQ = vq | | +-+-------+ +-+-------+ +%% | +-+-------+ | | | +%% +-++-----|---------+ | | +%% || | | | +%% || coordinator-+ | | +%% || +-+---------+ | | +%% || | | | +%% || gm-+ -- -- -- -- gm-+- -- -- -- gm-+- -- --...etc... +%% || +--+ +--+ +--+ +%% || +%% consumers +%% +%% The master is merely an implementation of BQ, and thus is invoked +%% through the normal BQ interface by the amqqueue_process. The slaves +%% meanwhile are processes in their own right (as is the +%% coordinator). The coordinator and all slaves belong to the same gm +%% group. Every member of a gm group receives messages sent to the gm +%% group. Because the master is the BQ of amqqueue_process, it doesn't +%% have sole control over its mailbox, and as a result, the master +%% itself cannot be passed messages directly, yet it needs to react to +%% gm events, such as the death of slaves. Thus the master creates the +%% coordinator, and it is the coordinator that is the gm callback +%% module and event handler for the master. +%% +%% Consumers are only attached to the master. Thus the master is +%% responsible for informing all slaves when messages are fetched from +%% the BQ, when they're acked, and when they're requeued. +%% +%% The basic goal is to ensure that all slaves performs actions on +%% their BQ in the same order as the master. Thus the master +%% intercepts all events going to its BQ, and suitably broadcasts +%% these events on the gm. The slaves thus receive two streams of +%% events: one stream is via the gm, and one stream is from channels +%% directly. Note that whilst the stream via gm is guaranteed to be +%% consistently seen by all slaves, the same is not true of the stream +%% via channels. For example, in the event of an unexpected death of a +%% channel during a publish, only some of the mirrors may receive that +%% publish. As a result of this problem, the messages broadcast over +%% the gm contain published content, and thus slaves can operate +%% successfully on messages that they only receive via the gm. The key +%% purpose of also sending messages directly from the channels to the +%% slaves is that without this, in the event of the death of the +%% master, messages can be lost until a suitable slave is promoted. +%% +%% However, there are other reasons as well. For example, if confirms +%% are in use, then there is no guarantee that every slave will see +%% the delivery with the same msg_seq_no. As a result, the slaves have +%% to wait until they've seen both the publish via gm, and the publish +%% via the channel before they have enough information to be able to +%% issue the confirm, if necessary. Either form of publish can arrive +%% first, and a slave can be upgraded to the master at any point +%% during this process. Confirms continue to be issued correctly, +%% however. +%% +%% Because the slave is a full process, it impersonates parts of the +%% amqqueue API. However, it does not need to implement all parts: for +%% example, no ack or consumer-related message can arrive directly at +%% a slave from a channel: it is only publishes that pass both +%% directly to the slaves and go via gm. +%% +%%---------------------------------------------------------------------------- + start_link(Queue, GM) -> gen_server2:start_link(?MODULE, [Queue, GM], []). diff --git a/src/rabbit_mirror_queue_master.erl b/src/rabbit_mirror_queue_master.erl index 664c706d..e6a71370 100644 --- a/src/rabbit_mirror_queue_master.erl +++ b/src/rabbit_mirror_queue_master.erl @@ -42,6 +42,9 @@ ack_msg_id }). +%% For general documentation of HA design, see +%% rabbit_mirror_queue_coordinator +%% %% Some notes on transactions %% %% We don't support transactions on mirror queues. To do so is diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index 70b5c43d..89b8971c 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -16,6 +16,9 @@ -module(rabbit_mirror_queue_slave). +%% For general documentation of HA design, see +%% rabbit_mirror_queue_coordinator +%% %% We join the GM group before we add ourselves to the amqqueue %% record. As a result: %% 1. We can receive msgs from GM that correspond to messages we will -- cgit v1.2.1 From 15ea60267f1132150ebf89f9d2299e8d2323f688 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Fri, 8 Apr 2011 18:15:38 +0100 Subject: Additional hilarious witticism --- src/rabbit_mirror_queue_coordinator.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rabbit_mirror_queue_coordinator.erl b/src/rabbit_mirror_queue_coordinator.erl index f780f6b5..84220a5b 100644 --- a/src/rabbit_mirror_queue_coordinator.erl +++ b/src/rabbit_mirror_queue_coordinator.erl @@ -50,7 +50,7 @@ %% | BQ = master----+ | | BQ = vq | | BQ = vq | %% | | BQ = vq | | +-+-------+ +-+-------+ %% | +-+-------+ | | | -%% +-++-----|---------+ | | +%% +-++-----|---------+ | | (some details elided) %% || | | | %% || coordinator-+ | | %% || +-+---------+ | | -- cgit v1.2.1 From a49f0d0b733cdc2bc72716cb6c41083ce866aeb0 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Sun, 10 Apr 2011 12:53:52 +0100 Subject: Work on documentation of ha --- src/rabbit_mirror_queue_coordinator.erl | 146 +++++++++++++++++++++++++++----- src/rabbit_mirror_queue_master.erl | 40 --------- 2 files changed, 124 insertions(+), 62 deletions(-) diff --git a/src/rabbit_mirror_queue_coordinator.erl b/src/rabbit_mirror_queue_coordinator.erl index 84220a5b..7e521e49 100644 --- a/src/rabbit_mirror_queue_coordinator.erl +++ b/src/rabbit_mirror_queue_coordinator.erl @@ -60,47 +60,49 @@ %% || %% consumers %% -%% The master is merely an implementation of BQ, and thus is invoked -%% through the normal BQ interface by the amqqueue_process. The slaves +%% The master is merely an implementation of bq, and thus is invoked +%% through the normal bq interface by the amqqueue_process. The slaves %% meanwhile are processes in their own right (as is the %% coordinator). The coordinator and all slaves belong to the same gm %% group. Every member of a gm group receives messages sent to the gm -%% group. Because the master is the BQ of amqqueue_process, it doesn't +%% group. Because the master is the bq of amqqueue_process, it doesn't %% have sole control over its mailbox, and as a result, the master -%% itself cannot be passed messages directly, yet it needs to react to -%% gm events, such as the death of slaves. Thus the master creates the -%% coordinator, and it is the coordinator that is the gm callback -%% module and event handler for the master. +%% itself cannot be passed messages directly (well, it could by via +%% the amqqueue:run_backing_queue_async callback but that would induce +%% additional unnecessary loading on the master queue process), yet it +%% needs to react to gm events, such as the death of slaves. Thus the +%% master creates the coordinator, and it is the coordinator that is +%% the gm callback module and event handler for the master. %% %% Consumers are only attached to the master. Thus the master is %% responsible for informing all slaves when messages are fetched from -%% the BQ, when they're acked, and when they're requeued. +%% the bq, when they're acked, and when they're requeued. %% %% The basic goal is to ensure that all slaves performs actions on -%% their BQ in the same order as the master. Thus the master -%% intercepts all events going to its BQ, and suitably broadcasts +%% their bqs in the same order as the master. Thus the master +%% intercepts all events going to its bq, and suitably broadcasts %% these events on the gm. The slaves thus receive two streams of %% events: one stream is via the gm, and one stream is from channels -%% directly. Note that whilst the stream via gm is guaranteed to be -%% consistently seen by all slaves, the same is not true of the stream -%% via channels. For example, in the event of an unexpected death of a +%% directly. Whilst the stream via gm is guaranteed to be consistently +%% seen by all slaves, the same is not true of the stream via +%% channels. For example, in the event of an unexpected death of a %% channel during a publish, only some of the mirrors may receive that %% publish. As a result of this problem, the messages broadcast over %% the gm contain published content, and thus slaves can operate %% successfully on messages that they only receive via the gm. The key %% purpose of also sending messages directly from the channels to the %% slaves is that without this, in the event of the death of the -%% master, messages can be lost until a suitable slave is promoted. +%% master, messages could be lost until a suitable slave is promoted. %% -%% However, there are other reasons as well. For example, if confirms -%% are in use, then there is no guarantee that every slave will see -%% the delivery with the same msg_seq_no. As a result, the slaves have -%% to wait until they've seen both the publish via gm, and the publish +%% However, that is not the only reason. For example, if confirms are +%% in use, then there is no guarantee that every slave will see the +%% delivery with the same msg_seq_no. As a result, the slaves have to +%% wait until they've seen both the publish via gm, and the publish %% via the channel before they have enough information to be able to -%% issue the confirm, if necessary. Either form of publish can arrive -%% first, and a slave can be upgraded to the master at any point -%% during this process. Confirms continue to be issued correctly, -%% however. +%% perform the publish to their own bq, and subsequently issue the +%% confirm, if necessary. Either form of publish can arrive first, and +%% a slave can be upgraded to the master at any point during this +%% process. Confirms continue to be issued correctly, however. %% %% Because the slave is a full process, it impersonates parts of the %% amqqueue API. However, it does not need to implement all parts: for @@ -108,6 +110,106 @@ %% a slave from a channel: it is only publishes that pass both %% directly to the slaves and go via gm. %% +%% Slaves can be added dynamically. When this occurs, there is no +%% attempt made to sync the current contents of the master with the +%% new slave, thus the slave will start empty, regardless of the state +%% of the master. Thus the slave needs to be able to detect and ignore +%% operations which are for messages it has not received: because of +%% the strict FIFO nature of queues in general, this is +%% straightforward - all new publishes that the new slave receives via +%% gm should be processed as normal, but fetches which are for +%% messages the slave has never seen should be ignored. Similarly, +%% acks for messages the slave never fetched should be +%% ignored. Eventually, as the master is consumed from, the messages +%% at the head of the queue which were there before the slave joined +%% will disappear, and the slave will become fully synced with the +%% state of the master. The detection of the sync-status of a slave is +%% done entirely based on length: if the slave and the master both +%% agree on the length of the queue after the fetch of the head of the +%% queue, then the queues must be in sync. The only other possibility +%% is that the slave's queue is shorter, and thus the fetch should be +%% ignored. +%% +%% Because acktags are issued by the bq independently, and because +%% there is no requirement for the master and all slaves to use the +%% same bq, all references to msgs going over gm is by msg_id. Thus +%% upon acking, the master must convert the acktags back to msg_ids +%% (which happens to be what bq:ack returns), then sends the msg_ids +%% over gm, the slaves must convert the msg_ids to acktags (a mapping +%% the slaves themselves must maintain). +%% +%% When the master dies, a slave gets promoted. This will be the +%% eldest slave, and thus the hope is that that slave is most likely +%% to be sync'd with the master. The design of gm is that the +%% notification of the death of the master will only appear once all +%% messages in-flight from the master have been fully delivered to all +%% members of the gm group. Thus at this point, the slave that gets +%% promoted cannot broadcast different events in a different order +%% than the master for the same msgs: there is no possibility for the +%% same msg to be processed by the old master and the new master - if +%% it was processed by the old master then it will have been processed +%% by the slave before the slave was promoted, and vice versa. +%% +%% Upon promotion, all msgs pending acks are requeued as normal, the +%% slave constructs state suitable for use in the master module, and +%% then dynamically changes into an amqqueue_process with the master +%% as the bq, and the slave's bq as the master's bq. Thus the very +%% same process that was the slave is now a full amqqueue_process. +%% +%% In the event of channel failure, there is the possibility that a +%% msg that was being published only makes it to some of the +%% mirrors. If it makes it to the master, then the master will push +%% the entire message onto gm, and all the slaves will publish it to +%% their bq, even though they may not receive it directly from the +%% channel. This currently will create a small memory leak in the +%% slave's msg_id_status mapping as the slaves will expect that +%% eventually they'll receive the msg from the channel. If the message +%% does not make it to the master then the slaves that receive it will +%% hold onto the message, assuming it'll eventually appear via +%% gm. Again, this will currently result in a memory leak, though this +%% time, it's the entire message rather than tracking the status of +%% the message, which is potentially much worse. This may eventually +%% be solved by monitoring publishing channels in some way. +%% +%% We don't support transactions on mirror queues. To do so is +%% challenging. The underlying bq is free to add the contents of the +%% txn to the queue proper at any point after the tx.commit comes in +%% but before the tx.commit-ok goes out. This means that it is not +%% safe for all mirrors to simply issue the bq:tx_commit at the same +%% time, as the addition of the txn's contents to the queue may +%% subsequently be inconsistently interwoven with other actions on the +%% bq. The solution to this is, in the master, wrap the PostCommitFun +%% and do the gm:broadcast in there: at that point, you're in the bq +%% (well, there's actually nothing to stop that function being invoked +%% by some other process, but let's pretend for now: you could always +%% use run_backing_queue to ensure you really are in the queue process +%% (the _async variant would be unsafe from an ordering pov)), the +%% gm:broadcast is safe because you don't have to worry about races +%% with other gm:broadcast calls (same process). Thus this signal +%% would indicate sufficiently to all the slaves that they must insert +%% the complete contents of the txn at precisely this point in the +%% stream of events. +%% +%% However, it's quite difficult for the slaves to make that happen: +%% they would be forced to issue the bq:tx_commit at that point, but +%% then stall processing any further instructions from gm until they +%% receive the notification from their bq that the tx_commit has fully +%% completed (i.e. they need to treat what is an async system as being +%% fully synchronous). This is not too bad (apart from the +%% vomit-inducing notion of it all): just need a queue of instructions +%% from the GM; but then it gets rather worse when you consider what +%% needs to happen if the master dies at this point and the slave in +%% the middle of this tx_commit needs to be promoted. +%% +%% Finally, we can't possibly hope to make transactions atomic across +%% mirror queues, and it's not even clear that that's desirable: if a +%% slave fails whilst there's an open transaction in progress then +%% when the channel comes to commit the txn, it will detect the +%% failure and destroy the channel. However, the txn will have +%% actually committed successfully in all the other mirrors (including +%% master). To do this bit properly would require 2PC and all the +%% baggage that goes with that. +%% %%---------------------------------------------------------------------------- start_link(Queue, GM) -> diff --git a/src/rabbit_mirror_queue_master.erl b/src/rabbit_mirror_queue_master.erl index e6a71370..481ee7c4 100644 --- a/src/rabbit_mirror_queue_master.erl +++ b/src/rabbit_mirror_queue_master.erl @@ -44,46 +44,6 @@ %% For general documentation of HA design, see %% rabbit_mirror_queue_coordinator -%% -%% Some notes on transactions -%% -%% We don't support transactions on mirror queues. To do so is -%% challenging. The underlying bq is free to add the contents of the -%% txn to the queue proper at any point after the tx.commit comes in -%% but before the tx.commit-ok goes out. This means that it is not -%% safe for all mirrors to simply issue the BQ:tx_commit at the same -%% time, as the addition of the txn's contents to the queue may -%% subsequently be inconsistently interwoven with other actions on the -%% BQ. The solution to this is, in the master, wrap the PostCommitFun -%% and do the gm:broadcast in there: at that point, you're in the BQ -%% (well, there's actually nothing to stop that function being invoked -%% by some other process, but let's pretend for now: you could always -%% use run_backing_queue_async to ensure you really are in the queue -%% process), the gm:broadcast is safe because you don't have to worry -%% about races with other gm:broadcast calls (same process). Thus this -%% signal would indicate sufficiently to all the slaves that they must -%% insert the complete contents of the txn at precisely this point in -%% the stream of events. -%% -%% However, it's quite difficult for the slaves to make that happen: -%% they would be forced to issue the tx_commit at that point, but then -%% stall processing any further instructions from gm until they -%% receive the notification from their bq that the tx_commit has fully -%% completed (i.e. they need to treat what is an async system as being -%% fully synchronous). This is not too bad (apart from the -%% vomit-inducing notion of it all): just need a queue of instructions -%% from the GM; but then it gets rather worse when you consider what -%% needs to happen if the master dies at this point and the slave in -%% the middle of this tx_commit needs to be promoted. -%% -%% Finally, we can't possibly hope to make transactions atomic across -%% mirror queues, and it's not even clear that that's desirable: if a -%% slave fails whilst there's an open transaction in progress then -%% when the channel comes to commit the txn, it will detect the -%% failure and destroy the channel. However, the txn will have -%% actually committed successfully in all the other mirrors (including -%% master). To do this bit properly would require 2PC and all the -%% baggage that goes with that. %% --------------------------------------------------------------------------- %% Backing queue -- cgit v1.2.1 From 2fcc6f2cd5fc580dad0bd6419e311ad957bb29b7 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Sun, 10 Apr 2011 13:05:36 +0100 Subject: Work on documentation of ha --- src/rabbit_mirror_queue_coordinator.erl | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/rabbit_mirror_queue_coordinator.erl b/src/rabbit_mirror_queue_coordinator.erl index 7e521e49..05e4a808 100644 --- a/src/rabbit_mirror_queue_coordinator.erl +++ b/src/rabbit_mirror_queue_coordinator.erl @@ -210,6 +210,17 @@ %% master). To do this bit properly would require 2PC and all the %% baggage that goes with that. %% +%% Recovery of mirrored queues is straightforward: as nodes die, the +%% remaining nodes record this, and eventually a situation is reached +%% in which only one node is alive, which is the master. This is the +%% only node which, upon recovery, will resurrect a mirrored queue: +%% nodes which die and then rejoin as a slave will start off empty as +%% if they have no mirrored content at all. This is not surprising: to +%% achieve anything more sophisticated would require the master and +%% recovering slave to be able to check to see whether they agree on +%% the last seen state of the queue: checking length alone is not +%% sufficient in this case. +%% %%---------------------------------------------------------------------------- start_link(Queue, GM) -> -- cgit v1.2.1 From e326e7b7b90f232ef0c9d0e5875cf793d2175a9b Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Wed, 13 Apr 2011 12:09:54 +0100 Subject: Undo merge from bug24038 which has now become INVALID. --- src/rabbit_amqqueue_process.erl | 46 ++++++++++------ src/rabbit_amqqueue_process_utils.erl | 99 ----------------------------------- src/rabbit_mirror_queue_slave.erl | 47 +++++++++++------ 3 files changed, 59 insertions(+), 133 deletions(-) delete mode 100644 src/rabbit_amqqueue_process_utils.erl diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl index 3bcdf706..53bdd3b2 100644 --- a/src/rabbit_amqqueue_process.erl +++ b/src/rabbit_amqqueue_process.erl @@ -21,6 +21,8 @@ -behaviour(gen_server2). -define(UNSENT_MESSAGE_LIMIT, 100). +-define(SYNC_INTERVAL, 25). %% milliseconds +-define(RAM_DURATION_UPDATE_INTERVAL, 5000). -define(BASE_MESSAGE_PROPERTIES, #message_properties{expiry = undefined, needs_confirming = false}). @@ -260,27 +262,37 @@ backing_queue_module(#amqqueue{arguments = Args}) -> _Nodes -> rabbit_mirror_queue_master end. +ensure_sync_timer(State = #q{sync_timer_ref = undefined}) -> + {ok, TRef} = timer:apply_after( + ?SYNC_INTERVAL, rabbit_amqqueue, sync_timeout, [self()]), + State#q{sync_timer_ref = TRef}; ensure_sync_timer(State) -> - rabbit_amqqueue_process_utils:ensure_sync_timer( - fun sync_timer_getter/1, fun sync_timer_setter/2, State). - -stop_sync_timer(State) -> - rabbit_amqqueue_process_utils:stop_sync_timer( - fun sync_timer_getter/1, fun sync_timer_setter/2, State). - -sync_timer_getter(State) -> State#q.sync_timer_ref. -sync_timer_setter(Timer, State) -> State#q{sync_timer_ref = Timer}. + State. +stop_sync_timer(State = #q{sync_timer_ref = undefined}) -> + State; +stop_sync_timer(State = #q{sync_timer_ref = TRef}) -> + {ok, cancel} = timer:cancel(TRef), + State#q{sync_timer_ref = undefined}. + +ensure_rate_timer(State = #q{rate_timer_ref = undefined}) -> + {ok, TRef} = timer:apply_after( + ?RAM_DURATION_UPDATE_INTERVAL, + rabbit_amqqueue, update_ram_duration, + [self()]), + State#q{rate_timer_ref = TRef}; +ensure_rate_timer(State = #q{rate_timer_ref = just_measured}) -> + State#q{rate_timer_ref = undefined}; ensure_rate_timer(State) -> - rabbit_amqqueue_process_utils:ensure_rate_timer( - fun rate_timer_getter/1, fun rate_timer_setter/2, State). - -stop_rate_timer(State) -> - rabbit_amqqueue_process_utils:stop_rate_timer( - fun rate_timer_getter/1, fun rate_timer_setter/2, State). + State. -rate_timer_getter(State) -> State#q.rate_timer_ref. -rate_timer_setter(Timer, State) -> State#q{rate_timer_ref = Timer}. +stop_rate_timer(State = #q{rate_timer_ref = undefined}) -> + State; +stop_rate_timer(State = #q{rate_timer_ref = just_measured}) -> + State#q{rate_timer_ref = undefined}; +stop_rate_timer(State = #q{rate_timer_ref = TRef}) -> + {ok, cancel} = timer:cancel(TRef), + State#q{rate_timer_ref = undefined}. stop_expiry_timer(State = #q{expiry_timer_ref = undefined}) -> State; diff --git a/src/rabbit_amqqueue_process_utils.erl b/src/rabbit_amqqueue_process_utils.erl deleted file mode 100644 index feb2a79c..00000000 --- a/src/rabbit_amqqueue_process_utils.erl +++ /dev/null @@ -1,99 +0,0 @@ -%% The contents of this file are subject to the Mozilla Public License -%% Version 1.1 (the "License"); you may not use this file except in -%% compliance with the License. You may obtain a copy of the License at -%% http://www.mozilla.org/MPL/ -%% -%% Software distributed under the License is distributed on an "AS IS" -%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the -%% License for the specific language governing rights and limitations -%% under the License. -%% -%% The Original Code is RabbitMQ. -%% -%% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 201-2011 VMware, Inc. All rights reserved. -%% - --module(rabbit_amqqueue_process_utils). - --define(SYNC_INTERVAL, 25). %% milliseconds --define(RAM_DURATION_UPDATE_INTERVAL, 5000). - --export([backing_queue_pre_hibernate/2, - ensure_sync_timer/3, stop_sync_timer/3, - ensure_rate_timer/3, stop_rate_timer/3]). - -%%---------------------------------------------------------------------------- - --ifdef(use_specs). - --type(bq_mod() :: atom()). --type(bq_state() :: any()). %% A good example of dialyzer's shortcomings - --type(queue_state() :: any()). %% Another such example. --type(getter(A) :: fun ((queue_state()) -> A)). --type(setter(A) :: fun ((A, queue_state()) -> queue_state())). - --type(tref() :: term()). %% Sigh. According to timer docs. - --spec(backing_queue_pre_hibernate/2 :: (bq_mod(), bq_state()) -> bq_state()). - --spec(ensure_sync_timer/3 :: (getter('undefined'|tref()), - setter('undefined'|tref()), - queue_state()) -> queue_state()). --spec(stop_sync_timer/3 :: (getter('undefined'|tref()), - setter('undefined'|tref()), - queue_state()) -> queue_state()). - --spec(ensure_rate_timer/3 :: (getter('undefined'|'just_measured'|tref()), - setter('undefined'|'just_measured'|tref()), - queue_state()) -> queue_state()). --spec(stop_rate_timer/3 :: (getter('undefined'|'just_measured'|tref()), - setter('undefined'|'just_measured'|tref()), - queue_state()) -> queue_state()). - --endif. - -%%---------------------------------------------------------------------------- - -backing_queue_pre_hibernate(BQ, BQS) -> - {RamDuration, BQS1} = BQ:ram_duration(BQS), - DesiredDuration = - rabbit_memory_monitor:report_ram_duration(self(), RamDuration), - BQS2 = BQ:set_ram_duration_target(DesiredDuration, BQS1), - BQ:handle_pre_hibernate(BQS2). - -ensure_sync_timer(Getter, Setter, State) -> - case Getter(State) of - undefined -> {ok, TRef} = timer:apply_after( - ?SYNC_INTERVAL, rabbit_amqqueue, - sync_timeout, [self()]), - Setter(TRef, State); - _TRef -> State - end. - -stop_sync_timer(Getter, Setter, State) -> - case Getter(State) of - undefined -> State; - TRef -> {ok, cancel} = timer:cancel(TRef), - Setter(undefined, State) - end. - -ensure_rate_timer(Getter, Setter, State) -> - case Getter(State) of - undefined -> {ok, TRef} = - timer:apply_after( - ?RAM_DURATION_UPDATE_INTERVAL, rabbit_amqqueue, - update_ram_duration, [self()]), - Setter(TRef, State); - just_measured -> Setter(undefined, State); - _TRef -> State - end. - -stop_rate_timer(Getter, Setter, State) -> - case Getter(State) of - undefined -> State; - just_measured -> Setter(undefined, State); - TRef -> {ok, cancel} = timer:cancel(TRef), - Setter(undefined, State) - end. diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index 89b8971c..e3cfe54d 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -47,6 +47,9 @@ -include("rabbit.hrl"). -include("gm_specs.hrl"). +-define(SYNC_INTERVAL, 25). %% milliseconds +-define(RAM_DURATION_UPDATE_INTERVAL, 5000). + -record(state, { q, gm, master_node, @@ -478,27 +481,37 @@ next_state(State = #state{backing_queue = BQ, backing_queue_state = BQS}) -> backing_queue_idle_timeout(State = #state { backing_queue = BQ }) -> run_backing_queue(BQ, fun (M, BQS) -> M:idle_timeout(BQS) end, State). +ensure_sync_timer(State = #state { sync_timer_ref = undefined }) -> + {ok, TRef} = timer:apply_after( + ?SYNC_INTERVAL, rabbit_amqqueue, sync_timeout, [self()]), + State #state { sync_timer_ref = TRef }; ensure_sync_timer(State) -> - rabbit_amqqueue_process_utils:ensure_sync_timer( - fun sync_timer_getter/1, fun sync_timer_setter/2, State). - -stop_sync_timer(State) -> - rabbit_amqqueue_process_utils:stop_sync_timer( - fun sync_timer_getter/1, fun sync_timer_setter/2, State). - -sync_timer_getter(State) -> State#state.sync_timer_ref. -sync_timer_setter(Timer, State) -> State#state{sync_timer_ref = Timer}. + State. +stop_sync_timer(State = #state { sync_timer_ref = undefined }) -> + State; +stop_sync_timer(State = #state { sync_timer_ref = TRef }) -> + {ok, cancel} = timer:cancel(TRef), + State #state { sync_timer_ref = undefined }. + +ensure_rate_timer(State = #state { rate_timer_ref = undefined }) -> + {ok, TRef} = timer:apply_after( + ?RAM_DURATION_UPDATE_INTERVAL, + rabbit_amqqueue, update_ram_duration, + [self()]), + State #state { rate_timer_ref = TRef }; +ensure_rate_timer(State = #state { rate_timer_ref = just_measured }) -> + State #state { rate_timer_ref = undefined }; ensure_rate_timer(State) -> - rabbit_amqqueue_process_utils:ensure_rate_timer( - fun rate_timer_getter/1, fun rate_timer_setter/2, State). - -stop_rate_timer(State) -> - rabbit_amqqueue_process_utils:stop_rate_timer( - fun rate_timer_getter/1, fun rate_timer_setter/2, State). + State. -rate_timer_getter(State) -> State#state.rate_timer_ref. -rate_timer_setter(Timer, State) -> State#state{rate_timer_ref = Timer}. +stop_rate_timer(State = #state { rate_timer_ref = undefined }) -> + State; +stop_rate_timer(State = #state { rate_timer_ref = just_measured }) -> + State #state { rate_timer_ref = undefined }; +stop_rate_timer(State = #state { rate_timer_ref = TRef }) -> + {ok, cancel} = timer:cancel(TRef), + State #state { rate_timer_ref = undefined }. maybe_enqueue_message( Delivery = #delivery { message = #basic_message { id = MsgId }, -- cgit v1.2.1 From 65dab4241fb3edf8b1fd8da8fe9d5536623e6184 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Wed, 13 Apr 2011 15:27:29 +0100 Subject: Forgot to undo these bits, and the boot sequence has been changed, so debitrot --- src/rabbit_amqqueue_process.erl | 8 ++++++-- src/rabbit_mirror_queue_slave.erl | 8 ++++++-- src/rabbit_mirror_queue_slave_sup.erl | 2 +- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl index 53bdd3b2..a8b19b72 100644 --- a/src/rabbit_amqqueue_process.erl +++ b/src/rabbit_amqqueue_process.erl @@ -1234,11 +1234,15 @@ handle_pre_hibernate(State = #q{backing_queue_state = undefined}) -> handle_pre_hibernate(State = #q{backing_queue = BQ, backing_queue_state = BQS, stats_timer = StatsTimer}) -> - BQS1 = rabbit_amqqueue_process_utils:backing_queue_pre_hibernate(BQ, BQS), + {RamDuration, BQS1} = BQ:ram_duration(BQS), + DesiredDuration = + rabbit_memory_monitor:report_ram_duration(self(), RamDuration), + BQS2 = BQ:set_ram_duration_target(DesiredDuration, BQS1), + BQS3 = BQ:handle_pre_hibernate(BQS2), rabbit_event:if_enabled(StatsTimer, fun () -> emit_stats(State, [{idle_since, now()}]) end), State1 = State#q{stats_timer = rabbit_event:stop_stats_timer(StatsTimer), - backing_queue_state = BQS1}, + backing_queue_state = BQS3}, {hibernate, stop_rate_timer(State1)}. diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index e3cfe54d..cceb67e2 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -235,8 +235,12 @@ code_change(_OldVsn, State, _Extra) -> handle_pre_hibernate(State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> - BQS1 = rabbit_amqqueue_process_utils:backing_queue_pre_hibernate(BQ, BQS), - {hibernate, stop_rate_timer(State #state { backing_queue_state = BQS1 })}. + {RamDuration, BQS1} = BQ:ram_duration(BQS), + DesiredDuration = + rabbit_memory_monitor:report_ram_duration(self(), RamDuration), + BQS2 = BQ:set_ram_duration_target(DesiredDuration, BQS1), + BQS3 = BQ:handle_pre_hibernate(BQS2), + {hibernate, stop_rate_timer(State #state { backing_queue_state = BQS3 })}. prioritise_call(Msg, _From, _State) -> case Msg of diff --git a/src/rabbit_mirror_queue_slave_sup.erl b/src/rabbit_mirror_queue_slave_sup.erl index 2fb3be51..25ee1fd0 100644 --- a/src/rabbit_mirror_queue_slave_sup.erl +++ b/src/rabbit_mirror_queue_slave_sup.erl @@ -19,7 +19,7 @@ -rabbit_boot_step({mirror_queue_slave_sup, [{description, "mirror queue slave sup"}, {mfa, {rabbit_mirror_queue_slave_sup, start, []}}, - {requires, queue_sup_queue_recovery}, + {requires, recovery}, {enables, routing_ready}]}). -rabbit_boot_step({mirrored_queues, -- cgit v1.2.1 From 9080f592a2d9413138ee46a079a6ac761459a75c Mon Sep 17 00:00:00 2001 From: Rob Harrop Date: Mon, 16 May 2011 14:58:55 +0100 Subject: Fixed call to validate_message --- src/rabbit_mirror_queue_master.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rabbit_mirror_queue_master.erl b/src/rabbit_mirror_queue_master.erl index 481ee7c4..f54c8c37 100644 --- a/src/rabbit_mirror_queue_master.erl +++ b/src/rabbit_mirror_queue_master.erl @@ -287,7 +287,7 @@ is_duplicate(none, Message = #basic_message { id = MsgId }, error -> %% We permit the underlying BQ to have a peek at it, but %% only if we ourselves are not filtering out the msg. - {Result, BQS1} = BQ:validate_message(Message, BQS), + {Result, BQS1} = BQ:is_duplicate(none, Message, BQS), {Result, State #state { backing_queue_state = BQS1 }}; {ok, published} -> %% It already got published when we were a slave and no -- cgit v1.2.1 From 04c6113a9f40b08868b4c32cd1467002d849722c Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Tue, 17 May 2011 11:14:36 +0100 Subject: Not especially happy with this but can't think of how else to solve this. If the queue has mirrors then you may well find that during death you try to contact a dead queue process, because the mnesia table is yet to be updated. In such cases, loop. --- src/rabbit_amqqueue.erl | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl index d79fe9df..534d1002 100644 --- a/src/rabbit_amqqueue.erl +++ b/src/rabbit_amqqueue.erl @@ -256,8 +256,13 @@ lookup(Name) -> with(Name, F, E) -> case lookup(Name) of - {ok, Q} -> rabbit_misc:with_exit_handler(E, fun () -> F(Q) end); - {error, not_found} -> E() + {ok, Q = #amqqueue{mirror_pids = []}} -> + rabbit_misc:with_exit_handler(E, fun () -> F(Q) end); + {ok, Q} -> + E1 = fun () -> with(Name, F, E) end, + rabbit_misc:with_exit_handler(E1, fun () -> F(Q) end); + {error, not_found} -> + E() end. with(Name, F) -> -- cgit v1.2.1 From 990c53d772565fc6967b1cad17587bcc1e82b153 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Tue, 17 May 2011 11:25:58 +0100 Subject: Add a sleep, to avoid tight spinning --- src/rabbit_amqqueue.erl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl index 534d1002..8c374ef3 100644 --- a/src/rabbit_amqqueue.erl +++ b/src/rabbit_amqqueue.erl @@ -259,6 +259,7 @@ with(Name, F, E) -> {ok, Q = #amqqueue{mirror_pids = []}} -> rabbit_misc:with_exit_handler(E, fun () -> F(Q) end); {ok, Q} -> + timer:sleep(25), E1 = fun () -> with(Name, F, E) end, rabbit_misc:with_exit_handler(E1, fun () -> F(Q) end); {error, not_found} -> -- cgit v1.2.1 From 9e744ff212999ee6e4244504ffd4878334c7846a Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Tue, 17 May 2011 12:12:48 +0100 Subject: Ensure that when a slave gets promoted, it requeues msgs in the same order which they were fetched --- src/rabbit_mirror_queue_slave.erl | 40 ++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index cceb67e2..052078bd 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -60,6 +60,7 @@ sender_queues, %% :: Pid -> MsgQ msg_id_ack, %% :: MsgId -> AckTag + ack_num, msg_id_status }). @@ -108,6 +109,8 @@ init([#amqqueue { name = QueueName } = Q]) -> sender_queues = dict:new(), msg_id_ack = dict:new(), + ack_num = 0, + msg_id_status = dict:new() }, hibernate, {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. @@ -456,7 +459,8 @@ promote_me(From, #state { q = Q, MTC = dict:from_list( [{MsgId, {ChPid, MsgSeqNo}} || {MsgId, {published, ChPid, MsgSeqNo}} <- dict:to_list(MS)]), - AckTags = [AckTag || {_MsgId, AckTag} <- dict:to_list(MA)], + NumAckTags = [NumAckTag || {_MsgId, NumAckTag} <- dict:to_list(MA)], + AckTags = [AckTag || {_Num, AckTag} <- lists:sort(NumAckTags)], Deliveries = [Delivery || {_ChPid, PubQ} <- dict:to_list(SQ), {Delivery, true} <- queue:to_list(PubQ)], QueueState = rabbit_amqqueue_process:init_with_backing_queue_state( @@ -568,7 +572,6 @@ process_instruction( State = #state { sender_queues = SQ, backing_queue = BQ, backing_queue_state = BQS, - msg_id_ack = MA, msg_id_status = MS }) -> %% We really are going to do the publish right now, even though we @@ -628,12 +631,8 @@ process_instruction( {true, AckRequired} -> {AckTag, BQS1} = BQ:publish_delivered(AckRequired, Msg, MsgProps, ChPid, BQS), - MA1 = case AckRequired of - true -> dict:store(MsgId, AckTag, MA); - false -> MA - end, - State1 #state { backing_queue_state = BQS1, - msg_id_ack = MA1 } + maybe_store_ack(AckRequired, MsgId, AckTag, + State1 #state { backing_queue_state = BQS1 }) end}; process_instruction({discard, ChPid, Msg = #basic_message { id = MsgId }}, State = #state { sender_queues = SQ, @@ -688,19 +687,14 @@ process_instruction({set_length, Length}, end}; process_instruction({fetch, AckRequired, MsgId, Remaining}, State = #state { backing_queue = BQ, - backing_queue_state = BQS, - msg_id_ack = MA }) -> + backing_queue_state = BQS }) -> QLen = BQ:len(BQS), {ok, case QLen - 1 of Remaining -> {{_Msg, _IsDelivered, AckTag, Remaining}, BQS1} = BQ:fetch(AckRequired, BQS), - MA1 = case AckRequired of - true -> dict:store(MsgId, AckTag, MA); - false -> MA - end, - State #state { backing_queue_state = BQS1, - msg_id_ack = MA1 }; + maybe_store_ack(AckRequired, MsgId, AckTag, + State #state { backing_queue_state = BQS1 }); Other when Other < Remaining -> %% we must be shorter than the master State @@ -744,11 +738,19 @@ msg_ids_to_acktags(MsgIds, MA) -> lists:foldl( fun (MsgId, {Acc, MAN}) -> case dict:find(MsgId, MA) of - error -> {Acc, MAN}; - {ok, AckTag} -> {[AckTag | Acc], dict:erase(MsgId, MAN)} + error -> {Acc, MAN}; + {ok, {_Num, AckTag}} -> {[AckTag | Acc], + dict:erase(MsgId, MAN)} end end, {[], MA}, MsgIds), {lists:reverse(AckTags), MA1}. ack_all(BQ, MA, BQS) -> - BQ:ack([AckTag || {_MsgId, AckTag} <- dict:to_list(MA)], BQS). + BQ:ack([AckTag || {_MsgId, {_Num, AckTag}} <- dict:to_list(MA)], BQS). + +maybe_store_ack(false, _MsgId, _AckTag, State) -> + State; +maybe_store_ack(true, MsgId, AckTag, State = #state { msg_id_ack = MA, + ack_num = Num }) -> + State #state { msg_id_ack = dict:store(MsgId, {Num, AckTag}, MA), + ack_num = Num + 1 }. -- cgit v1.2.1 From 26b59ab8d2e38fab17d45470d17091ced7afca2a Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Tue, 17 May 2011 12:28:39 +0100 Subject: Add upgrade step to add the mirror pids to the queue --- src/rabbit_upgrade_functions.erl | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/rabbit_upgrade_functions.erl b/src/rabbit_upgrade_functions.erl index 31bbb929..325156b1 100644 --- a/src/rabbit_upgrade_functions.erl +++ b/src/rabbit_upgrade_functions.erl @@ -28,6 +28,7 @@ -rabbit_upgrade({topic_trie, mnesia, []}). -rabbit_upgrade({semi_durable_route, mnesia, []}). -rabbit_upgrade({exchange_event_serial, mnesia, []}). +-rabbit_upgrade({mirror_pids, mnesia, []}). %% ------------------------------------------------------------------- @@ -41,6 +42,7 @@ -spec(topic_trie/0 :: () -> 'ok'). -spec(exchange_event_serial/0 :: () -> 'ok'). -spec(semi_durable_route/0 :: () -> 'ok'). +-spec(mirror_pids/0 :: () -> 'ok'). -endif. @@ -113,6 +115,19 @@ exchange_event_serial() -> create(rabbit_exchange_serial, [{record_name, exchange_serial}, {attributes, [name, next]}]). +mirror_pids() -> + Tables = [rabbit_queue, rabbit_durable_queue], + AddMirrorPidsFun = + fun ({amqqueue, Name, Durable, AutoDelete, Owner, Arguments, Pid}) -> + {amqqueue, Name, Durable, AutoDelete, Owner, Arguments, Pid, []} + end, + [ ok = transform(T, + AddMirrorPidsFun, + [name, durable, auto_delete, exclusive_owner, arguments, + pid, mirror_pids]) + || T <- Tables ], + ok. + %%-------------------------------------------------------------------- transform(TableName, Fun, FieldList) -> -- cgit v1.2.1 From 86da4d3fdb023363d52eec0d95fd823d707ce29c Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Tue, 17 May 2011 16:40:01 +0100 Subject: Eliminate a race which was found to allow promotion to be non-atomic --- src/rabbit_mirror_queue_misc.erl | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/src/rabbit_mirror_queue_misc.erl b/src/rabbit_mirror_queue_misc.erl index bf341c74..5f180c5e 100644 --- a/src/rabbit_mirror_queue_misc.erl +++ b/src/rabbit_mirror_queue_misc.erl @@ -20,6 +20,11 @@ -include("rabbit.hrl"). +%% If the dead pids include the queue pid (i.e. the master has died) +%% then only remove that if we are about to be promoted. Otherwise we +%% can have the situation where a slave updates the mnesia record for +%% a queue, promoting another slave before that slave realises it has +%% become the new master. remove_from_queue(QueueName, DeadPids) -> DeadNodes = [node(DeadPid) || DeadPid <- DeadPids], rabbit_misc:execute_mnesia_transaction( @@ -35,13 +40,22 @@ remove_from_queue(QueueName, DeadPids) -> not lists:member(node(Pid), DeadNodes)], case {{QPid, MPids}, {QPid1, MPids1}} of {Same, Same} -> - {ok, QPid}; - _ -> + ok; + _ when QPid =:= QPid1 orelse node(QPid1) =:= node() -> + %% Either master hasn't changed, so + %% we're ok to update mnesia; or master + %% has changed to become us! Q1 = Q #amqqueue { pid = QPid1, mirror_pids = MPids1 }, - ok = rabbit_amqqueue:store_queue(Q1), - {ok, QPid1} - end + ok = rabbit_amqqueue:store_queue(Q1); + _ -> + %% Master has changed, and we're not it, + %% so leave alone to allow the promoted + %% slave to find it and make its + %% promotion atomic. + ok + end, + {ok, QPid1} end end). -- cgit v1.2.1 From 76bf983a059a1f431be1452896baed32b8eef4bf Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Tue, 17 May 2011 17:29:38 +0100 Subject: Improve logging information --- src/rabbit_mirror_queue_coordinator.erl | 3 ++- src/rabbit_mirror_queue_slave.erl | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/rabbit_mirror_queue_coordinator.erl b/src/rabbit_mirror_queue_coordinator.erl index 05e4a808..729749dc 100644 --- a/src/rabbit_mirror_queue_coordinator.erl +++ b/src/rabbit_mirror_queue_coordinator.erl @@ -257,7 +257,8 @@ handle_call(get_gm, _From, State = #state { gm = GM }) -> handle_cast({gm_deaths, Deaths}, State = #state { q = #amqqueue { name = QueueName } }) -> rabbit_log:info("Master ~p saw deaths ~p for ~s~n", - [self(), Deaths, rabbit_misc:rs(QueueName)]), + [self(), [{Pid, node(Pid)} || Pid <- Deaths], + rabbit_misc:rs(QueueName)]), case rabbit_mirror_queue_misc:remove_from_queue(QueueName, Deaths) of {ok, Pid} when node(Pid) =:= node() -> noreply(State); diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index 052078bd..fdf9d9bc 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -143,7 +143,8 @@ handle_call({gm_deaths, Deaths}, From, gm = GM, master_node = MNode }) -> rabbit_log:info("Slave ~p saw deaths ~p for ~s~n", - [self(), Deaths, rabbit_misc:rs(QueueName)]), + [self(), [{Pid, node(Pid)} || Pid <- Deaths], + rabbit_misc:rs(QueueName)]), %% The GM has told us about deaths, which means we're not going to %% receive any more messages from GM case rabbit_mirror_queue_misc:remove_from_queue(QueueName, Deaths) of -- cgit v1.2.1 From 3ecee483b94dd4c09875979d11cca3c58bcd89db Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Thu, 19 May 2011 15:10:11 +0100 Subject: Make slaves explicitly monitor the master (see comments to come in bug) --- src/rabbit_mirror_queue_slave.erl | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index fdf9d9bc..422b0d59 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -93,6 +93,7 @@ init([#amqqueue { name = QueueName } = Q]) -> write), {ok, QPid} end), + erlang:monitor(process, MPid), ok = file_handle_cache:register_callback( rabbit_amqqueue, set_maximum_since_use, [self()]), ok = rabbit_memory_monitor:register( @@ -149,11 +150,15 @@ handle_call({gm_deaths, Deaths}, From, %% receive any more messages from GM case rabbit_mirror_queue_misc:remove_from_queue(QueueName, Deaths) of {ok, Pid} when node(Pid) =:= MNode -> + %% master hasn't changed reply(ok, State); {ok, Pid} when node(Pid) =:= node() -> + %% we've become master promote_me(From, State); {ok, Pid} -> + %% master has changed to not us. gen_server2:reply(From, ok), + erlang:monitor(process, Pid), ok = gm:broadcast(GM, heartbeat), noreply(State #state { master_node = node(Pid) }); {error, not_found} -> @@ -209,6 +214,11 @@ handle_cast({rollback, _Txn, _ChPid}, State) -> handle_info(timeout, State) -> noreply(backing_queue_idle_timeout(State)); +handle_info({'DOWN', _MonitorRef, process, Pid, _Reason}, + State = #state { gm = GM }) -> + ok = gm:broadcast(GM, {process_death, Pid}), + noreply(State); + handle_info(Msg, State) -> {stop, {unexpected_info, Msg}, State}. @@ -276,6 +286,16 @@ joined([SPid], _Members) -> members_changed([_SPid], _Births, []) -> ok; members_changed([SPid], _Births, Deaths) -> + inform_deaths(SPid, Deaths). + +handle_msg([_SPid], _From, heartbeat) -> + ok; +handle_msg([SPid], _From, {process_death, Pid}) -> + inform_deaths(SPid, [Pid]); +handle_msg([SPid], _From, Msg) -> + ok = gen_server2:cast(SPid, {gm, Msg}). + +inform_deaths(SPid, Deaths) -> rabbit_misc:with_exit_handler( fun () -> {stop, normal} end, fun () -> @@ -287,11 +307,6 @@ members_changed([SPid], _Births, Deaths) -> end end). -handle_msg([_SPid], _From, heartbeat) -> - ok; -handle_msg([SPid], _From, Msg) -> - ok = gen_server2:cast(SPid, {gm, Msg}). - %% --------------------------------------------------------------------------- %% Others %% --------------------------------------------------------------------------- -- cgit v1.2.1 From 862080c059402681e29560a34b264ab5ea8769c3 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Thu, 19 May 2011 15:57:43 +0100 Subject: Be more assertive. Slave is the equiv of amqqueue_process, so it's ok here to know the message is actually a #basic_message and thus contains an id which can be matched against --- src/rabbit_mirror_queue_slave.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index 422b0d59..da01e076 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -707,8 +707,8 @@ process_instruction({fetch, AckRequired, MsgId, Remaining}, QLen = BQ:len(BQS), {ok, case QLen - 1 of Remaining -> - {{_Msg, _IsDelivered, AckTag, Remaining}, BQS1} = - BQ:fetch(AckRequired, BQS), + {{#basic_message{id = MsgId}, _IsDelivered, + AckTag, Remaining}, BQS1} = BQ:fetch(AckRequired, BQS), maybe_store_ack(AckRequired, MsgId, AckTag, State #state { backing_queue_state = BQS1 }); Other when Other < Remaining -> -- cgit v1.2.1 From a588eacca187df9f0e97aff6cce91423f9bc8539 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Fri, 20 May 2011 12:01:08 +0100 Subject: Make log messages prettier --- src/rabbit_mirror_queue_coordinator.erl | 7 ++++--- src/rabbit_mirror_queue_slave.erl | 12 +++++++----- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/src/rabbit_mirror_queue_coordinator.erl b/src/rabbit_mirror_queue_coordinator.erl index 729749dc..8ddda1cd 100644 --- a/src/rabbit_mirror_queue_coordinator.erl +++ b/src/rabbit_mirror_queue_coordinator.erl @@ -256,9 +256,10 @@ handle_call(get_gm, _From, State = #state { gm = GM }) -> handle_cast({gm_deaths, Deaths}, State = #state { q = #amqqueue { name = QueueName } }) -> - rabbit_log:info("Master ~p saw deaths ~p for ~s~n", - [self(), [{Pid, node(Pid)} || Pid <- Deaths], - rabbit_misc:rs(QueueName)]), + rabbit_log:info("Mirrored-queue (~s): Master ~s saw deaths of mirrors ~s~n", + [rabbit_misc:rs(QueueName), + rabbit_misc:pid_to_string(self()), + [[rabbit_misc:pid_to_string(Pid), $ ] || Pid <- Deaths]]), case rabbit_mirror_queue_misc:remove_from_queue(QueueName, Deaths) of {ok, Pid} when node(Pid) =:= node() -> noreply(State); diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index da01e076..fc50c932 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -143,9 +143,10 @@ handle_call({gm_deaths, Deaths}, From, State = #state { q = #amqqueue { name = QueueName }, gm = GM, master_node = MNode }) -> - rabbit_log:info("Slave ~p saw deaths ~p for ~s~n", - [self(), [{Pid, node(Pid)} || Pid <- Deaths], - rabbit_misc:rs(QueueName)]), + rabbit_log:info("Mirrored-queue (~s): Slave ~s saw deaths of mirrors ~s~n", + [rabbit_misc:rs(QueueName), + rabbit_misc:pid_to_string(self()), + [[rabbit_misc:pid_to_string(Pid), $ ] || Pid <- Deaths]]), %% The GM has told us about deaths, which means we're not going to %% receive any more messages from GM case rabbit_mirror_queue_misc:remove_from_queue(QueueName, Deaths) of @@ -392,8 +393,9 @@ promote_me(From, #state { q = Q, sender_queues = SQ, msg_id_ack = MA, msg_id_status = MS }) -> - rabbit_log:info("Promoting slave ~p for ~s~n", - [self(), rabbit_misc:rs(Q #amqqueue.name)]), + rabbit_log:info("Mirrored-queue (~s): Promoting slave ~s to master~n", + [rabbit_misc:rs(Q #amqqueue.name), + rabbit_misc:pid_to_string(self())]), {ok, CPid} = rabbit_mirror_queue_coordinator:start_link(Q, GM), true = unlink(GM), gen_server2:reply(From, {promote, CPid}), -- cgit v1.2.1 From a749a8f513b881ff35f1de0e225b58aa6575460f Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Fri, 20 May 2011 14:32:16 +0100 Subject: BQ:idle_timeout => BQ:timeout --- src/rabbit_mirror_queue_master.erl | 10 +++++----- src/rabbit_mirror_queue_slave.erl | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/rabbit_mirror_queue_master.erl b/src/rabbit_mirror_queue_master.erl index f54c8c37..e973ea78 100644 --- a/src/rabbit_mirror_queue_master.erl +++ b/src/rabbit_mirror_queue_master.erl @@ -21,7 +21,7 @@ tx_publish/5, tx_ack/3, tx_rollback/2, tx_commit/4, requeue/3, len/1, is_empty/1, drain_confirmed/1, dropwhile/2, set_ram_duration_target/2, ram_duration/1, - needs_idle_timeout/1, idle_timeout/1, handle_pre_hibernate/1, + needs_timeout/1, timeout/1, handle_pre_hibernate/1, status/1, invoke/3, is_duplicate/3, discard/3]). -export([start/1, stop/0]). @@ -252,11 +252,11 @@ ram_duration(State = #state { backing_queue = BQ, backing_queue_state = BQS}) -> {Result, BQS1} = BQ:ram_duration(BQS), {Result, State #state { backing_queue_state = BQS1 }}. -needs_idle_timeout(#state { backing_queue = BQ, backing_queue_state = BQS}) -> - BQ:needs_idle_timeout(BQS). +needs_timeout(#state { backing_queue = BQ, backing_queue_state = BQS}) -> + BQ:needs_timeout(BQS). -idle_timeout(State = #state { backing_queue = BQ, backing_queue_state = BQS}) -> - State #state { backing_queue_state = BQ:idle_timeout(BQS) }. +timeout(State = #state { backing_queue = BQ, backing_queue_state = BQS}) -> + State #state { backing_queue_state = BQ:timeout(BQS) }. handle_pre_hibernate(State = #state { backing_queue = BQ, backing_queue_state = BQS}) -> diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index fc50c932..46020271 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -205,7 +205,7 @@ handle_cast(update_ram_duration, backing_queue_state = BQS2 }); handle_cast(sync_timeout, State) -> - noreply(backing_queue_idle_timeout( + noreply(backing_queue_timeout( State #state { sync_timer_ref = undefined })); handle_cast({rollback, _Txn, _ChPid}, State) -> @@ -213,7 +213,7 @@ handle_cast({rollback, _Txn, _ChPid}, State) -> noreply(State). handle_info(timeout, State) -> - noreply(backing_queue_idle_timeout(State)); + noreply(backing_queue_timeout(State)); handle_info({'DOWN', _MonitorRef, process, Pid, _Reason}, State = #state { gm = GM }) -> @@ -499,13 +499,13 @@ next_state(State = #state{backing_queue = BQ, backing_queue_state = BQS}) -> State1 = ensure_rate_timer( confirm_messages(MsgIds, State #state { backing_queue_state = BQS1 })), - case BQ:needs_idle_timeout(BQS1) of + case BQ:needs_timeout(BQS1) of true -> {ensure_sync_timer(State1), 0}; false -> {stop_sync_timer(State1), hibernate} end. -backing_queue_idle_timeout(State = #state { backing_queue = BQ }) -> - run_backing_queue(BQ, fun (M, BQS) -> M:idle_timeout(BQS) end, State). +backing_queue_timeout(State = #state { backing_queue = BQ }) -> + run_backing_queue(BQ, fun (M, BQS) -> M:timeout(BQS) end, State). ensure_sync_timer(State = #state { sync_timer_ref = undefined }) -> {ok, TRef} = timer:apply_after( -- cgit v1.2.1 From 790dbcf244cd457692f084f99edbed64fa2cde15 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Fri, 20 May 2011 14:34:55 +0100 Subject: ...and the other bits I forgot. --- src/rabbit_mirror_queue_slave.erl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index 46020271..63a43197 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -500,8 +500,9 @@ next_state(State = #state{backing_queue = BQ, backing_queue_state = BQS}) -> confirm_messages(MsgIds, State #state { backing_queue_state = BQS1 })), case BQ:needs_timeout(BQS1) of - true -> {ensure_sync_timer(State1), 0}; - false -> {stop_sync_timer(State1), hibernate} + false -> {stop_sync_timer(State1), hibernate}; + idle -> {stop_sync_timer(State1), 0 }; + timed -> {ensure_sync_timer(State1), 0 } end. backing_queue_timeout(State = #state { backing_queue = BQ }) -> -- cgit v1.2.1 From 77b47e488406bb4c65f5c4b0773b209268038824 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Fri, 20 May 2011 18:27:35 +0100 Subject: That's an awful lot of work to solve a potential memory leak... --- src/rabbit_amqqueue.erl | 19 +++- src/rabbit_amqqueue_process.erl | 46 ++++---- src/rabbit_mirror_queue_coordinator.erl | 47 +++++++-- src/rabbit_mirror_queue_master.erl | 53 ++++++++-- src/rabbit_mirror_queue_slave.erl | 179 +++++++++++++++++++++++--------- 5 files changed, 257 insertions(+), 87 deletions(-) diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl index 8c374ef3..0550f13b 100644 --- a/src/rabbit_amqqueue.erl +++ b/src/rabbit_amqqueue.erl @@ -33,6 +33,7 @@ %% internal -export([internal_declare/2, internal_delete/1, run_backing_queue/3, run_backing_queue_async/3, + run_backing_queue/4, run_backing_queue_async/4, sync_timeout/1, update_ram_duration/1, set_ram_duration_target/2, set_maximum_since_use/2, maybe_expire/1, drop_expired/1, emit_stats/1]). @@ -149,6 +150,14 @@ -spec(run_backing_queue_async/3 :: (pid(), atom(), (fun ((atom(), A) -> {[rabbit_types:msg_id()], A}))) -> 'ok'). +-spec(run_backing_queue/4 :: + (pid(), atom(), + (fun ((atom(), A) -> {[rabbit_types:msg_id()], A})), + integer() | 'default') -> 'ok'). +-spec(run_backing_queue_async/4 :: + (pid(), atom(), + (fun ((atom(), A) -> {[rabbit_types:msg_id()], A})), + integer() | 'default') -> 'ok'). -spec(sync_timeout/1 :: (pid()) -> 'ok'). -spec(update_ram_duration/1 :: (pid()) -> 'ok'). -spec(set_ram_duration_target/2 :: (pid(), number() | 'infinity') -> 'ok'). @@ -448,10 +457,16 @@ internal_delete(QueueName) -> end). run_backing_queue(QPid, Mod, Fun) -> - gen_server2:call(QPid, {run_backing_queue, Mod, Fun}, infinity). + run_backing_queue(QPid, Mod, Fun, default). run_backing_queue_async(QPid, Mod, Fun) -> - gen_server2:cast(QPid, {run_backing_queue, Mod, Fun}). + run_backing_queue_async(QPid, Mod, Fun, default). + +run_backing_queue(QPid, Mod, Fun, Priority) -> + gen_server2:call(QPid, {run_backing_queue, Mod, Fun, Priority}, infinity). + +run_backing_queue_async(QPid, Mod, Fun, Priority) -> + gen_server2:cast(QPid, {run_backing_queue, Mod, Fun, Priority}). sync_timeout(QPid) -> gen_server2:cast(QPid, sync_timeout). diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl index d654f372..7daf869b 100644 --- a/src/rabbit_amqqueue_process.erl +++ b/src/rabbit_amqqueue_process.erl @@ -127,7 +127,7 @@ init_with_backing_queue_state(Q = #amqqueue{exclusive_owner = Owner}, BQ, BQS, State = requeue_and_run( AckTags, process_args( - #q{q = Q#amqqueue{pid = self()}, + #q{q = Q, exclusive_consumer = none, has_had_consumers = false, backing_queue = BQ, @@ -843,29 +843,31 @@ emit_consumer_deleted(ChPid, ConsumerTag) -> prioritise_call(Msg, _From, _State) -> case Msg of - info -> 9; - {info, _Items} -> 9; - consumers -> 9; - {run_backing_queue, _Mod, _Fun} -> 6; - _ -> 0 + info -> 9; + {info, _Items} -> 9; + consumers -> 9; + {run_backing_queue, _Mod, _Fun, default} -> 6; + {run_backing_queue, _Mod, _Fun, Priority} -> Priority; + _ -> 0 end. prioritise_cast(Msg, _State) -> case Msg of - update_ram_duration -> 8; - delete_immediately -> 8; - {set_ram_duration_target, _Duration} -> 8; - {set_maximum_since_use, _Age} -> 8; - maybe_expire -> 8; - drop_expired -> 8; - emit_stats -> 7; - {ack, _Txn, _AckTags, _ChPid} -> 7; - {reject, _AckTags, _Requeue, _ChPid} -> 7; - {notify_sent, _ChPid} -> 7; - {unblock, _ChPid} -> 7; - {run_backing_queue, _Mod, _Fun} -> 6; - sync_timeout -> 6; - _ -> 0 + update_ram_duration -> 8; + delete_immediately -> 8; + {set_ram_duration_target, _Duration} -> 8; + {set_maximum_since_use, _Age} -> 8; + maybe_expire -> 8; + drop_expired -> 8; + emit_stats -> 7; + {ack, _Txn, _AckTags, _ChPid} -> 7; + {reject, _AckTags, _Requeue, _ChPid} -> 7; + {notify_sent, _ChPid} -> 7; + {unblock, _ChPid} -> 7; + {run_backing_queue, _Mod, _Fun, default} -> 6; + {run_backing_queue, _Mod, _Fun, Priority} -> Priority; + sync_timeout -> 6; + _ -> 0 end. prioritise_info({'DOWN', _MonitorRef, process, DownPid, _Reason}, @@ -1079,11 +1081,11 @@ handle_call({requeue, AckTags, ChPid}, From, State) -> noreply(requeue_and_run(AckTags, State)) end; -handle_call({run_backing_queue, Mod, Fun}, _From, State) -> +handle_call({run_backing_queue, Mod, Fun, _Priority}, _From, State) -> reply(ok, run_backing_queue(Mod, Fun, State)). -handle_cast({run_backing_queue, Mod, Fun}, State) -> +handle_cast({run_backing_queue, Mod, Fun, _Priority}, State) -> noreply(run_backing_queue(Mod, Fun, State)); handle_cast(sync_timeout, State) -> diff --git a/src/rabbit_mirror_queue_coordinator.erl b/src/rabbit_mirror_queue_coordinator.erl index 8ddda1cd..5660112a 100644 --- a/src/rabbit_mirror_queue_coordinator.erl +++ b/src/rabbit_mirror_queue_coordinator.erl @@ -16,7 +16,7 @@ -module(rabbit_mirror_queue_coordinator). --export([start_link/2, get_gm/1]). +-export([start_link/3, get_gm/1, ensure_monitoring/2]). -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3]). @@ -30,7 +30,9 @@ -include("gm_specs.hrl"). -record(state, { q, - gm + gm, + monitors, + death_fun }). -define(ONE_SECOND, 1000). @@ -223,17 +225,20 @@ %% %%---------------------------------------------------------------------------- -start_link(Queue, GM) -> - gen_server2:start_link(?MODULE, [Queue, GM], []). +start_link(Queue, GM, DeathFun) -> + gen_server2:start_link(?MODULE, [Queue, GM, DeathFun], []). get_gm(CPid) -> gen_server2:call(CPid, get_gm, infinity). +ensure_monitoring(CPid, Pids) -> + gen_server2:cast(CPid, {ensure_monitoring, Pids}). + %% --------------------------------------------------------------------------- %% gen_server %% --------------------------------------------------------------------------- -init([#amqqueue { name = QueueName } = Q, GM]) -> +init([#amqqueue { name = QueueName } = Q, GM, DeathFun]) -> GM1 = case GM of undefined -> ok = gm:create_tables(), @@ -248,7 +253,11 @@ init([#amqqueue { name = QueueName } = Q, GM]) -> end, {ok, _TRef} = timer:apply_interval(?ONE_SECOND, gm, broadcast, [GM1, heartbeat]), - {ok, #state { q = Q, gm = GM1 }, hibernate, + {ok, #state { q = Q, + gm = GM1, + monitors = dict:new(), + death_fun = DeathFun }, + hibernate, {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. handle_call(get_gm, _From, State = #state { gm = GM }) -> @@ -265,7 +274,29 @@ handle_cast({gm_deaths, Deaths}, noreply(State); {error, not_found} -> {stop, normal, State} - end. + end; + +handle_cast({ensure_monitoring, Pids}, + State = #state { monitors = Monitors }) -> + Monitors1 = + lists:foldl(fun (Pid, MonitorsN) -> + case dict:is_key(Pid, MonitorsN) of + true -> MonitorsN; + false -> MRef = erlang:monitor(process, Pid), + dict:store(Pid, MRef, MonitorsN) + end + end, Monitors, Pids), + noreply(State #state { monitors = Monitors1 }). + +handle_info({'DOWN', _MonitorRef, process, Pid, _Reason}, + State = #state { monitors = Monitors, + death_fun = Fun }) -> + noreply( + case dict:is_key(Pid, Monitors) of + false -> State; + true -> ok = Fun(Pid), + State #state { monitors = dict:erase(Pid, Monitors) } + end); handle_info(Msg, State) -> {stop, {unexpected_info, Msg}, State}. @@ -295,6 +326,8 @@ members_changed([CPid], _Births, Deaths) -> handle_msg([_CPid], _From, heartbeat) -> ok; +handle_msg([CPid], _From, {ensure_monitoring, _Pids} = Msg) -> + ok = gen_server2:cast(CPid, Msg); handle_msg([_CPid], _From, _Msg) -> ok. diff --git a/src/rabbit_mirror_queue_master.erl b/src/rabbit_mirror_queue_master.erl index e973ea78..0e7f32f0 100644 --- a/src/rabbit_mirror_queue_master.erl +++ b/src/rabbit_mirror_queue_master.erl @@ -26,7 +26,7 @@ -export([start/1, stop/0]). --export([promote_backing_queue_state/5]). +-export([promote_backing_queue_state/6, sender_death_fun/0]). -behaviour(rabbit_backing_queue). @@ -39,7 +39,8 @@ set_delivered, seen_status, confirmed, - ack_msg_id + ack_msg_id, + known_senders }). %% For general documentation of HA design, see @@ -58,9 +59,31 @@ stop() -> %% Same as start/1. exit({not_valid_for_generic_backing_queue, ?MODULE}). +sender_death_fun() -> + Self = self(), + fun (DeadPid) -> + %% Purposefully set the priority to 0 here so that we + %% don't overtake any messages from DeadPid that are + %% already in the queue. + rabbit_amqqueue:run_backing_queue_async( + Self, ?MODULE, + fun (?MODULE, State = #state { gm = GM, known_senders = KS }) -> + rabbit_log:info("Master saw death of sender ~p~n", [DeadPid]), + case sets:is_element(DeadPid, KS) of + false -> + State; + true -> + ok = gm:broadcast(GM, {sender_death, DeadPid}), + KS1 = sets:del_element(DeadPid, KS), + State #state { known_senders = KS1 } + end + end, 0) + end. + init(#amqqueue { arguments = Args, name = QName } = Q, Recover, AsyncCallback, SyncCallback) -> - {ok, CPid} = rabbit_mirror_queue_coordinator:start_link(Q, undefined), + {ok, CPid} = rabbit_mirror_queue_coordinator:start_link( + Q, undefined, sender_death_fun()), GM = rabbit_mirror_queue_coordinator:get_gm(CPid), {_Type, Nodes} = rabbit_misc:table_lookup(Args, <<"x-mirror">>), Nodes1 = case Nodes of @@ -78,9 +101,10 @@ init(#amqqueue { arguments = Args, name = QName } = Q, Recover, set_delivered = 0, seen_status = dict:new(), confirmed = [], - ack_msg_id = dict:new() }. + ack_msg_id = dict:new(), + known_senders = sets:new() }. -promote_backing_queue_state(CPid, BQ, BQS, GM, SeenStatus) -> +promote_backing_queue_state(CPid, BQ, BQS, GM, SeenStatus, KS) -> #state { gm = GM, coordinator = CPid, backing_queue = BQ, @@ -88,7 +112,8 @@ promote_backing_queue_state(CPid, BQ, BQS, GM, SeenStatus) -> set_delivered = BQ:len(BQS), seen_status = SeenStatus, confirmed = [], - ack_msg_id = dict:new() }. + ack_msg_id = dict:new(), + known_senders = sets:from_list(KS) }. terminate(State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> %% Backing queue termination. The queue is going down but @@ -119,7 +144,7 @@ publish(Msg = #basic_message { id = MsgId }, MsgProps, ChPid, false = dict:is_key(MsgId, SS), %% ASSERTION ok = gm:broadcast(GM, {publish, false, ChPid, MsgProps, Msg}), BQS1 = BQ:publish(Msg, MsgProps, ChPid, BQS), - State #state { backing_queue_state = BQS1 }. + ensure_monitoring(ChPid, State #state { backing_queue_state = BQS1 }). publish_delivered(AckRequired, Msg = #basic_message { id = MsgId }, MsgProps, ChPid, State = #state { gm = GM, @@ -136,8 +161,9 @@ publish_delivered(AckRequired, Msg = #basic_message { id = MsgId }, MsgProps, {AckTag, BQS1} = BQ:publish_delivered(AckRequired, Msg, MsgProps, ChPid, BQS), AM1 = maybe_store_acktag(AckTag, MsgId, AM), - {AckTag, State #state { backing_queue_state = BQS1, - ack_msg_id = AM1 }}. + {AckTag, + ensure_monitoring(ChPid, State #state { backing_queue_state = BQS1, + ack_msg_id = AM1 })}. dropwhile(Fun, State = #state { gm = GM, backing_queue = BQ, @@ -341,3 +367,12 @@ maybe_store_acktag(undefined, _MsgId, AM) -> AM; maybe_store_acktag(AckTag, MsgId, AM) -> dict:store(AckTag, MsgId, AM). + +ensure_monitoring(ChPid, State = #state { coordinator = CPid, + known_senders = KS }) -> + case sets:is_element(ChPid, KS) of + true -> State; + false -> ok = rabbit_mirror_queue_coordinator:ensure_monitoring( + CPid, [ChPid]), + State #state { known_senders = sets:add_element(ChPid, KS) } + end. diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index 63a43197..7fc2c8cb 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -49,10 +49,11 @@ -define(SYNC_INTERVAL, 25). %% milliseconds -define(RAM_DURATION_UPDATE_INTERVAL, 5000). +-define(DEATH_TIMEOUT, 20000). %% 20 seconds -record(state, { q, gm, - master_node, + master_pid, backing_queue, backing_queue_state, sync_timer_ref, @@ -62,7 +63,8 @@ msg_id_ack, %% :: MsgId -> AckTag ack_num, - msg_id_status + msg_id_status, + known_senders }). start_link(Q) -> @@ -102,7 +104,7 @@ init([#amqqueue { name = QueueName } = Q]) -> BQS = bq_init(BQ, Q, false), {ok, #state { q = Q, gm = GM, - master_node = node(MPid), + master_pid = MPid, backing_queue = BQ, backing_queue_state = BQS, rate_timer_ref = undefined, @@ -112,7 +114,8 @@ init([#amqqueue { name = QueueName } = Q]) -> msg_id_ack = dict:new(), ack_num = 0, - msg_id_status = dict:new() + msg_id_status = dict:new(), + known_senders = dict:new() }, hibernate, {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. @@ -140,9 +143,9 @@ handle_call({deliver, Delivery = #delivery {}}, From, State) -> noreply(maybe_enqueue_message(Delivery, true, State)); handle_call({gm_deaths, Deaths}, From, - State = #state { q = #amqqueue { name = QueueName }, - gm = GM, - master_node = MNode }) -> + State = #state { q = #amqqueue { name = QueueName }, + gm = GM, + master_pid = MPid }) -> rabbit_log:info("Mirrored-queue (~s): Slave ~s saw deaths of mirrors ~s~n", [rabbit_misc:rs(QueueName), rabbit_misc:pid_to_string(self()), @@ -150,7 +153,7 @@ handle_call({gm_deaths, Deaths}, From, %% The GM has told us about deaths, which means we're not going to %% receive any more messages from GM case rabbit_mirror_queue_misc:remove_from_queue(QueueName, Deaths) of - {ok, Pid} when node(Pid) =:= MNode -> + {ok, Pid} when node(Pid) =:= node(MPid) -> %% master hasn't changed reply(ok, State); {ok, Pid} when node(Pid) =:= node() -> @@ -161,20 +164,20 @@ handle_call({gm_deaths, Deaths}, From, gen_server2:reply(From, ok), erlang:monitor(process, Pid), ok = gm:broadcast(GM, heartbeat), - noreply(State #state { master_node = node(Pid) }); + noreply(State #state { master_pid = Pid }); {error, not_found} -> gen_server2:reply(From, ok), {stop, normal, State} end; -handle_call({run_backing_queue, Mod, Fun}, _From, State) -> +handle_call({run_backing_queue, Mod, Fun, _Priority}, _From, State) -> reply(ok, run_backing_queue(Mod, Fun, State)); handle_call({commit, _Txn, _ChPid}, _From, State) -> %% We don't support transactions in mirror queues reply(ok, State). -handle_cast({run_backing_queue, Mod, Fun}, State) -> +handle_cast({run_backing_queue, Mod, Fun, _Priority}, State) -> noreply(run_backing_queue(Mod, Fun, State)); handle_cast({gm, Instruction}, State) -> @@ -215,11 +218,14 @@ handle_cast({rollback, _Txn, _ChPid}, State) -> handle_info(timeout, State) -> noreply(backing_queue_timeout(State)); -handle_info({'DOWN', _MonitorRef, process, Pid, _Reason}, - State = #state { gm = GM }) -> - ok = gm:broadcast(GM, {process_death, Pid}), +handle_info({'DOWN', _MonitorRef, process, MPid, _Reason}, + State = #state { gm = GM, master_pid = MPid }) -> + ok = gm:broadcast(GM, {process_death, MPid}), noreply(State); +handle_info({'DOWN', _MonitorRef, process, ChPid, _Reason}, State) -> + noreply(local_sender_death(ChPid, State)); + handle_info(Msg, State) -> {stop, {unexpected_info, Msg}, State}. @@ -259,21 +265,23 @@ handle_pre_hibernate(State = #state { backing_queue = BQ, prioritise_call(Msg, _From, _State) -> case Msg of - {run_backing_queue, _Mod, _Fun} -> 6; - {gm_deaths, _Deaths} -> 5; - _ -> 0 + {run_backing_queue, _Mod, _Fun, default} -> 6; + {run_backing_queue, _Mod, _Fun, Priority} -> Priority; + {gm_deaths, _Deaths} -> 5; + _ -> 0 end. prioritise_cast(Msg, _State) -> case Msg of - update_ram_duration -> 8; - {set_ram_duration_target, _Duration} -> 8; - {set_maximum_since_use, _Age} -> 8; - {run_backing_queue, _Mod, _Fun} -> 6; - sync_timeout -> 6; - {gm, _Msg} -> 5; - {post_commit, _Txn, _AckTags} -> 4; - _ -> 0 + update_ram_duration -> 8; + {set_ram_duration_target, _Duration} -> 8; + {set_maximum_since_use, _Age} -> 8; + {run_backing_queue, _Mod, _Fun, default} -> 6; + {run_backing_queue, _Mod, _Fun, Priority} -> Priority; + sync_timeout -> 6; + {gm, _Msg} -> 5; + {post_commit, _Txn, _AckTags} -> 4; + _ -> 0 end. %% --------------------------------------------------------------------------- @@ -291,6 +299,9 @@ members_changed([SPid], _Births, Deaths) -> handle_msg([_SPid], _From, heartbeat) -> ok; +handle_msg([_SPid], _From, {ensure_monitoring, _Pid}) -> + %% This is only of value to the master + ok; handle_msg([SPid], _From, {process_death, Pid}) -> inform_deaths(SPid, [Pid]); handle_msg([SPid], _From, Msg) -> @@ -327,9 +338,9 @@ bq_init(BQ, Q, Recover) -> end). run_backing_queue(rabbit_mirror_queue_master, Fun, State) -> - %% Yes, this might look a little crazy, but see comments around - %% process_instruction({tx_commit,...}, State). - Fun(rabbit_mirror_queue_master, State); + %% Yes, this might look a little crazy, but see comments in + %% local_sender_death/2 + Fun(?MODULE, State); run_backing_queue(Mod, Fun, State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> State #state { backing_queue_state = BQ:invoke(Mod, Fun, BQS) }. @@ -392,15 +403,27 @@ promote_me(From, #state { q = Q, rate_timer_ref = RateTRef, sender_queues = SQ, msg_id_ack = MA, - msg_id_status = MS }) -> + msg_id_status = MS, + known_senders = KS }) -> rabbit_log:info("Mirrored-queue (~s): Promoting slave ~s to master~n", [rabbit_misc:rs(Q #amqqueue.name), rabbit_misc:pid_to_string(self())]), - {ok, CPid} = rabbit_mirror_queue_coordinator:start_link(Q, GM), + Q1 = Q #amqqueue { pid = self() }, + {ok, CPid} = rabbit_mirror_queue_coordinator:start_link( + Q1, GM, rabbit_mirror_queue_master:sender_death_fun()), true = unlink(GM), gen_server2:reply(From, {promote, CPid}), ok = gm:confirmed_broadcast(GM, heartbeat), + %% Everything that we're monitoring, we need to ensure our new + %% coordinator is monitoring. + + MonitoringPids = [begin true = erlang:demonitor(MRef), + Pid + end || {Pid, MRef} <- dict:to_list(KS)], + ok = rabbit_mirror_queue_coordinator:ensure_monitoring( + CPid, MonitoringPids), + %% We find all the messages that we've received from channels but %% not from gm, and if they're due to be enqueued on promotion %% then we pass them to the @@ -472,7 +495,7 @@ promote_me(From, #state { q = Q, Status =:= published orelse Status =:= confirmed]), MasterState = rabbit_mirror_queue_master:promote_backing_queue_state( - CPid, BQ, BQS, GM, SS), + CPid, BQ, BQS, GM, SS, MonitoringPids), MTC = dict:from_list( [{MsgId, {ChPid, MsgSeqNo}} || @@ -482,7 +505,7 @@ promote_me(From, #state { q = Q, Deliveries = [Delivery || {_ChPid, PubQ} <- dict:to_list(SQ), {Delivery, true} <- queue:to_list(PubQ)], QueueState = rabbit_amqqueue_process:init_with_backing_queue_state( - Q, rabbit_mirror_queue_master, MasterState, RateTRef, + Q1, rabbit_mirror_queue_master, MasterState, RateTRef, AckTags, Deliveries, MTC), {become, rabbit_amqqueue_process, QueueState, hibernate}. @@ -540,6 +563,52 @@ stop_rate_timer(State = #state { rate_timer_ref = TRef }) -> {ok, cancel} = timer:cancel(TRef), State #state { rate_timer_ref = undefined }. +ensure_monitoring(ChPid, State = #state { known_senders = KS }) -> + case dict:is_key(ChPid, KS) of + true -> State; + false -> MRef = erlang:monitor(process, ChPid), + State #state { known_senders = dict:store(ChPid, MRef, KS) } + end. + +local_sender_death(ChPid, State = #state { known_senders = KS }) -> + case dict:is_key(ChPid, KS) of + false -> + ok; + true -> + %% We have to deal with the possibility that we'll be + %% promoted to master before this thing gets + %% run. Consequently we set the module to + %% rabbit_mirror_queue_master so that if we do become a + %% rabbit_amqqueue_process before then, sane things will + %% happen. + Fun = + fun (?MODULE, State1 = #state { known_senders = KS1, + gm = GM }) -> + %% We're running still as a slave + ok = case dict:is_key(ChPid, KS1) of + false -> + ok; + true -> + gm:broadcast( + GM, {ensure_monitoring, [ChPid]}) + end, + State1; + (rabbit_mirror_queue_master, State1) -> + %% We've become a master. State1 is now opaque + %% to us. When we became master, if ChPid was + %% still known to us then we'd have set up + %% monitoring of it then, so this is now a + %% noop. + State1 + end, + %% Note that we do not remove our knowledge of this ChPid + %% until we get the sender_death from GM. + timer:apply_after( + ?DEATH_TIMEOUT, rabbit_amqqueue, run_backing_queue_async, + [self(), rabbit_mirror_queue_master, Fun]) + end, + State. + maybe_enqueue_message( Delivery = #delivery { message = #basic_message { id = MsgId }, msg_seq_no = MsgSeqNo, @@ -548,6 +617,7 @@ maybe_enqueue_message( EnqueueOnPromotion, State = #state { sender_queues = SQ, msg_id_status = MS }) -> + State1 = ensure_monitoring(ChPid, State), %% We will never see {published, ChPid, MsgSeqNo} here. case dict:find(MsgId, MS) of error -> @@ -557,30 +627,30 @@ maybe_enqueue_message( end, SQ1 = dict:store(ChPid, queue:in({Delivery, EnqueueOnPromotion}, MQ), SQ), - State #state { sender_queues = SQ1 }; + State1 #state { sender_queues = SQ1 }; {ok, {confirmed, ChPid}} -> %% BQ has confirmed it but we didn't know what the %% msg_seq_no was at the time. We do now! ok = rabbit_channel:confirm(ChPid, [MsgSeqNo]), - State #state { msg_id_status = dict:erase(MsgId, MS) }; + State1 #state { msg_id_status = dict:erase(MsgId, MS) }; {ok, {published, ChPid}} -> %% It was published to the BQ and we didn't know the %% msg_seq_no so couldn't confirm it at the time. - case needs_confirming(Delivery, State) of + case needs_confirming(Delivery, State1) of never -> - State #state { msg_id_status = dict:erase(MsgId, MS) }; + State1 #state { msg_id_status = dict:erase(MsgId, MS) }; eventually -> - State #state { + State1 #state { msg_id_status = dict:store(MsgId, {published, ChPid, MsgSeqNo}, MS) }; immediately -> ok = rabbit_channel:confirm(ChPid, [MsgSeqNo]), - State #state { msg_id_status = dict:erase(MsgId, MS) } + State1 #state { msg_id_status = dict:erase(MsgId, MS) } end; {ok, discarded} -> %% We've already heard from GM that the msg is to be %% discarded. We won't see this again. - State #state { msg_id_status = dict:erase(MsgId, MS) } + State1 #state { msg_id_status = dict:erase(MsgId, MS) } end; maybe_enqueue_message(_Delivery, _EnqueueOnPromotion, State) -> %% We don't support txns in mirror queues. @@ -601,6 +671,7 @@ process_instruction( %% which means that we're going to have to hang on to the fact %% that we've seen the msg_id confirmed until we can associate it %% with a msg_seq_no. + State1 = ensure_monitoring(ChPid, State), MS1 = dict:store(MsgId, {published, ChPid}, MS), {SQ1, MS2} = case dict:find(ChPid, SQ) of @@ -618,7 +689,7 @@ process_instruction( %% first. Thus we need to deal with confirms %% here. {dict:store(ChPid, MQ1, SQ), - case needs_confirming(Delivery, State) of + case needs_confirming(Delivery, State1) of never -> MS; eventually -> @@ -639,19 +710,19 @@ process_instruction( end end, - State1 = State #state { sender_queues = SQ1, - msg_id_status = MS2 }, + State2 = State1 #state { sender_queues = SQ1, + msg_id_status = MS2 }, {ok, case Deliver of false -> BQS1 = BQ:publish(Msg, MsgProps, ChPid, BQS), - State1 #state { backing_queue_state = BQS1 }; + State2 #state { backing_queue_state = BQS1 }; {true, AckRequired} -> {AckTag, BQS1} = BQ:publish_delivered(AckRequired, Msg, MsgProps, ChPid, BQS), maybe_store_ack(AckRequired, MsgId, AckTag, - State1 #state { backing_queue_state = BQS1 }) + State2 #state { backing_queue_state = BQS1 }) end}; process_instruction({discard, ChPid, Msg = #basic_message { id = MsgId }}, State = #state { sender_queues = SQ, @@ -660,6 +731,7 @@ process_instruction({discard, ChPid, Msg = #basic_message { id = MsgId }}, msg_id_status = MS }) -> %% Many of the comments around the publish head above apply here %% too. + State1 = ensure_monitoring(ChPid, State), MS1 = dict:store(MsgId, discarded, MS), {SQ1, MS2} = case dict:find(ChPid, SQ) of @@ -685,9 +757,9 @@ process_instruction({discard, ChPid, Msg = #basic_message { id = MsgId }}, end end, BQS1 = BQ:discard(Msg, ChPid, BQS), - {ok, State #state { sender_queues = SQ1, - msg_id_status = MS2, - backing_queue_state = BQS1 }}; + {ok, State1 #state { sender_queues = SQ1, + msg_id_status = MS2, + backing_queue_state = BQS1 }}; process_instruction({set_length, Length}, State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> @@ -746,6 +818,19 @@ process_instruction({requeue, MsgPropsFun, MsgIds}, State #state { msg_id_ack = dict:new(), backing_queue_state = BQS2 } end}; +process_instruction({sender_death, ChPid}, + State = #state { sender_queues = SQ, + known_senders = KS }) -> + rabbit_log:info("Slave received death of sender ~p~n", [ChPid]), + {ok, case dict:find(ChPid, KS) of + error -> + State; + {ok, MRef} -> + true = erlang:demonitor(MRef), + KS1 = dict:erase(ChPid, KS), + SQ1 = dict:erase(ChPid, SQ), + State #state { sender_queues = SQ1, known_senders = KS1} + end}; process_instruction(delete_and_terminate, State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> -- cgit v1.2.1 From 1270b265d77799c97af7ec6f16a7637ed4caefda Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Fri, 20 May 2011 18:39:07 +0100 Subject: Critical bug fixed --- src/rabbit_mirror_queue_slave.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index 7fc2c8cb..f065f667 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -829,7 +829,7 @@ process_instruction({sender_death, ChPid}, true = erlang:demonitor(MRef), KS1 = dict:erase(ChPid, KS), SQ1 = dict:erase(ChPid, SQ), - State #state { sender_queues = SQ1, known_senders = KS1} + State #state { sender_queues = SQ1, known_senders = KS1 } end}; process_instruction(delete_and_terminate, State = #state { backing_queue = BQ, -- cgit v1.2.1 From cc4012012d860425781ea1e8da3f8ec6ce8c9c39 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Mon, 23 May 2011 13:27:31 +0100 Subject: master must broadcast, esp if it doesn't know about the sender. Also rip out the varying priority run_backing_queue* stuff as it turns out it's not needed --- src/rabbit_amqqueue.erl | 19 ++-------------- src/rabbit_amqqueue_process.erl | 44 ++++++++++++++++++-------------------- src/rabbit_mirror_queue_master.erl | 16 ++++---------- src/rabbit_mirror_queue_slave.erl | 28 +++++++++++------------- 4 files changed, 40 insertions(+), 67 deletions(-) diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl index 0550f13b..8c374ef3 100644 --- a/src/rabbit_amqqueue.erl +++ b/src/rabbit_amqqueue.erl @@ -33,7 +33,6 @@ %% internal -export([internal_declare/2, internal_delete/1, run_backing_queue/3, run_backing_queue_async/3, - run_backing_queue/4, run_backing_queue_async/4, sync_timeout/1, update_ram_duration/1, set_ram_duration_target/2, set_maximum_since_use/2, maybe_expire/1, drop_expired/1, emit_stats/1]). @@ -150,14 +149,6 @@ -spec(run_backing_queue_async/3 :: (pid(), atom(), (fun ((atom(), A) -> {[rabbit_types:msg_id()], A}))) -> 'ok'). --spec(run_backing_queue/4 :: - (pid(), atom(), - (fun ((atom(), A) -> {[rabbit_types:msg_id()], A})), - integer() | 'default') -> 'ok'). --spec(run_backing_queue_async/4 :: - (pid(), atom(), - (fun ((atom(), A) -> {[rabbit_types:msg_id()], A})), - integer() | 'default') -> 'ok'). -spec(sync_timeout/1 :: (pid()) -> 'ok'). -spec(update_ram_duration/1 :: (pid()) -> 'ok'). -spec(set_ram_duration_target/2 :: (pid(), number() | 'infinity') -> 'ok'). @@ -457,16 +448,10 @@ internal_delete(QueueName) -> end). run_backing_queue(QPid, Mod, Fun) -> - run_backing_queue(QPid, Mod, Fun, default). + gen_server2:call(QPid, {run_backing_queue, Mod, Fun}, infinity). run_backing_queue_async(QPid, Mod, Fun) -> - run_backing_queue_async(QPid, Mod, Fun, default). - -run_backing_queue(QPid, Mod, Fun, Priority) -> - gen_server2:call(QPid, {run_backing_queue, Mod, Fun, Priority}, infinity). - -run_backing_queue_async(QPid, Mod, Fun, Priority) -> - gen_server2:cast(QPid, {run_backing_queue, Mod, Fun, Priority}). + gen_server2:cast(QPid, {run_backing_queue, Mod, Fun}). sync_timeout(QPid) -> gen_server2:cast(QPid, sync_timeout). diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl index 7daf869b..ea31ec13 100644 --- a/src/rabbit_amqqueue_process.erl +++ b/src/rabbit_amqqueue_process.erl @@ -843,31 +843,29 @@ emit_consumer_deleted(ChPid, ConsumerTag) -> prioritise_call(Msg, _From, _State) -> case Msg of - info -> 9; - {info, _Items} -> 9; - consumers -> 9; - {run_backing_queue, _Mod, _Fun, default} -> 6; - {run_backing_queue, _Mod, _Fun, Priority} -> Priority; - _ -> 0 + info -> 9; + {info, _Items} -> 9; + consumers -> 9; + {run_backing_queue, _Mod, _Fun} -> 6; + _ -> 0 end. prioritise_cast(Msg, _State) -> case Msg of - update_ram_duration -> 8; - delete_immediately -> 8; - {set_ram_duration_target, _Duration} -> 8; - {set_maximum_since_use, _Age} -> 8; - maybe_expire -> 8; - drop_expired -> 8; - emit_stats -> 7; - {ack, _Txn, _AckTags, _ChPid} -> 7; - {reject, _AckTags, _Requeue, _ChPid} -> 7; - {notify_sent, _ChPid} -> 7; - {unblock, _ChPid} -> 7; - {run_backing_queue, _Mod, _Fun, default} -> 6; - {run_backing_queue, _Mod, _Fun, Priority} -> Priority; - sync_timeout -> 6; - _ -> 0 + update_ram_duration -> 8; + delete_immediately -> 8; + {set_ram_duration_target, _Duration} -> 8; + {set_maximum_since_use, _Age} -> 8; + maybe_expire -> 8; + drop_expired -> 8; + emit_stats -> 7; + {ack, _Txn, _AckTags, _ChPid} -> 7; + {reject, _AckTags, _Requeue, _ChPid} -> 7; + {notify_sent, _ChPid} -> 7; + {unblock, _ChPid} -> 7; + {run_backing_queue, _Mod, _Fun} -> 6; + sync_timeout -> 6; + _ -> 0 end. prioritise_info({'DOWN', _MonitorRef, process, DownPid, _Reason}, @@ -1081,11 +1079,11 @@ handle_call({requeue, AckTags, ChPid}, From, State) -> noreply(requeue_and_run(AckTags, State)) end; -handle_call({run_backing_queue, Mod, Fun, _Priority}, _From, State) -> +handle_call({run_backing_queue, Mod, Fun}, _From, State) -> reply(ok, run_backing_queue(Mod, Fun, State)). -handle_cast({run_backing_queue, Mod, Fun, _Priority}, State) -> +handle_cast({run_backing_queue, Mod, Fun}, State) -> noreply(run_backing_queue(Mod, Fun, State)); handle_cast(sync_timeout, State) -> diff --git a/src/rabbit_mirror_queue_master.erl b/src/rabbit_mirror_queue_master.erl index 0e7f32f0..78c771cc 100644 --- a/src/rabbit_mirror_queue_master.erl +++ b/src/rabbit_mirror_queue_master.erl @@ -62,22 +62,14 @@ stop() -> sender_death_fun() -> Self = self(), fun (DeadPid) -> - %% Purposefully set the priority to 0 here so that we - %% don't overtake any messages from DeadPid that are - %% already in the queue. rabbit_amqqueue:run_backing_queue_async( Self, ?MODULE, fun (?MODULE, State = #state { gm = GM, known_senders = KS }) -> rabbit_log:info("Master saw death of sender ~p~n", [DeadPid]), - case sets:is_element(DeadPid, KS) of - false -> - State; - true -> - ok = gm:broadcast(GM, {sender_death, DeadPid}), - KS1 = sets:del_element(DeadPid, KS), - State #state { known_senders = KS1 } - end - end, 0) + ok = gm:broadcast(GM, {sender_death, DeadPid}), + KS1 = sets:del_element(DeadPid, KS), + State #state { known_senders = KS1 } + end) end. init(#amqqueue { arguments = Args, name = QName } = Q, Recover, diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index f065f667..265657de 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -170,14 +170,14 @@ handle_call({gm_deaths, Deaths}, From, {stop, normal, State} end; -handle_call({run_backing_queue, Mod, Fun, _Priority}, _From, State) -> +handle_call({run_backing_queue, Mod, Fun}, _From, State) -> reply(ok, run_backing_queue(Mod, Fun, State)); handle_call({commit, _Txn, _ChPid}, _From, State) -> %% We don't support transactions in mirror queues reply(ok, State). -handle_cast({run_backing_queue, Mod, Fun, _Priority}, State) -> +handle_cast({run_backing_queue, Mod, Fun}, State) -> noreply(run_backing_queue(Mod, Fun, State)); handle_cast({gm, Instruction}, State) -> @@ -265,23 +265,21 @@ handle_pre_hibernate(State = #state { backing_queue = BQ, prioritise_call(Msg, _From, _State) -> case Msg of - {run_backing_queue, _Mod, _Fun, default} -> 6; - {run_backing_queue, _Mod, _Fun, Priority} -> Priority; - {gm_deaths, _Deaths} -> 5; - _ -> 0 + {run_backing_queue, _Mod, _Fun} -> 6; + {gm_deaths, _Deaths} -> 5; + _ -> 0 end. prioritise_cast(Msg, _State) -> case Msg of - update_ram_duration -> 8; - {set_ram_duration_target, _Duration} -> 8; - {set_maximum_since_use, _Age} -> 8; - {run_backing_queue, _Mod, _Fun, default} -> 6; - {run_backing_queue, _Mod, _Fun, Priority} -> Priority; - sync_timeout -> 6; - {gm, _Msg} -> 5; - {post_commit, _Txn, _AckTags} -> 4; - _ -> 0 + update_ram_duration -> 8; + {set_ram_duration_target, _Duration} -> 8; + {set_maximum_since_use, _Age} -> 8; + {run_backing_queue, _Mod, _Fun} -> 6; + sync_timeout -> 6; + {gm, _Msg} -> 5; + {post_commit, _Txn, _AckTags} -> 4; + _ -> 0 end. %% --------------------------------------------------------------------------- -- cgit v1.2.1 From 1dab0e6045a444b91da762286d4f164f050dd6c7 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Mon, 23 May 2011 13:29:35 +0100 Subject: Remove debug log entries --- src/rabbit_mirror_queue_master.erl | 1 - src/rabbit_mirror_queue_slave.erl | 1 - 2 files changed, 2 deletions(-) diff --git a/src/rabbit_mirror_queue_master.erl b/src/rabbit_mirror_queue_master.erl index 78c771cc..1d2b1676 100644 --- a/src/rabbit_mirror_queue_master.erl +++ b/src/rabbit_mirror_queue_master.erl @@ -65,7 +65,6 @@ sender_death_fun() -> rabbit_amqqueue:run_backing_queue_async( Self, ?MODULE, fun (?MODULE, State = #state { gm = GM, known_senders = KS }) -> - rabbit_log:info("Master saw death of sender ~p~n", [DeadPid]), ok = gm:broadcast(GM, {sender_death, DeadPid}), KS1 = sets:del_element(DeadPid, KS), State #state { known_senders = KS1 } diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index 265657de..5c0730dd 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -819,7 +819,6 @@ process_instruction({requeue, MsgPropsFun, MsgIds}, process_instruction({sender_death, ChPid}, State = #state { sender_queues = SQ, known_senders = KS }) -> - rabbit_log:info("Slave received death of sender ~p~n", [ChPid]), {ok, case dict:find(ChPid, KS) of error -> State; -- cgit v1.2.1 From 25b2dbe483f1e71efb080a0b7e8e92525b36adfb Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Mon, 23 May 2011 15:00:50 +0100 Subject: Updated documentation, and in the course of writing it, thought up another scenario I wasn't coping with. Fixed. However, not all documented causes of memory leaks are yet fixed in the code --- src/rabbit_mirror_queue_coordinator.erl | 109 ++++++++++++++++++++++++++++---- src/rabbit_mirror_queue_slave.erl | 69 ++++++++++---------- 2 files changed, 128 insertions(+), 50 deletions(-) diff --git a/src/rabbit_mirror_queue_coordinator.erl b/src/rabbit_mirror_queue_coordinator.erl index 5660112a..96d0e15b 100644 --- a/src/rabbit_mirror_queue_coordinator.erl +++ b/src/rabbit_mirror_queue_coordinator.erl @@ -158,20 +158,101 @@ %% as the bq, and the slave's bq as the master's bq. Thus the very %% same process that was the slave is now a full amqqueue_process. %% -%% In the event of channel failure, there is the possibility that a -%% msg that was being published only makes it to some of the -%% mirrors. If it makes it to the master, then the master will push -%% the entire message onto gm, and all the slaves will publish it to -%% their bq, even though they may not receive it directly from the -%% channel. This currently will create a small memory leak in the -%% slave's msg_id_status mapping as the slaves will expect that -%% eventually they'll receive the msg from the channel. If the message -%% does not make it to the master then the slaves that receive it will -%% hold onto the message, assuming it'll eventually appear via -%% gm. Again, this will currently result in a memory leak, though this -%% time, it's the entire message rather than tracking the status of -%% the message, which is potentially much worse. This may eventually -%% be solved by monitoring publishing channels in some way. +%% It is important that we avoid memory leaks due to the death of +%% senders (i.e. channels) and partial publications. A sender +%% publishing a message may fail mid way through the publish and thus +%% only some of the mirrors will receive the message. We need the +%% mirrors to be able to detect this and tidy up as necessary to avoid +%% leaks. If we just had the master monitoring all senders then we +%% would have the possibility that a sender appears and only sends the +%% message to a few of the slaves before dying. Those slaves would +%% then hold on to the message, assuming they'll receive some +%% instruction eventually from the master. Thus we have both slaves +%% and the master monitor all senders they become aware of. But there +%% is a race: if the slave receives a DOWN of a sender, how does it +%% know whether or not the master is going to send it instructions +%% regarding those messages? +%% +%% Whilst the master monitors senders, it can't access its mailbox +%% directly, so it delegates monitoring to the coordinator. When the +%% coordinator receives a DOWN message from a sender, it informs the +%% master via a callback. This allows the master to do any tidying +%% necessary, but more importantly allows the master to broadcast a +%% sender_death message to all the slaves, saying the sender has +%% died. Once the slaves receive the sender_death message, they know +%% that they're not going to receive any more instructions from the gm +%% regarding that sender, thus they throw away any publications from +%% the sender pending publication instructions. However, it is +%% possible that the coordinator receives the DOWN and communicates +%% that to the master before the master has finished receiving and +%% processing publishes from the sender. This turns out not to be a +%% problem: the sender has actually died, and so will not need to +%% receive confirms or other feedback, and should further messages be +%% "received" from the sender, the master will ask the coordinator to +%% set up a new monitor, and will continue to process the messages +%% normally. Slaves may thus receive publishes via gm from previously +%% declared "dead" senders, but again, this is fine: should the slave +%% have just thrown out the message it had received directly from the +%% sender (due to receiving a sender_death message via gm), it will be +%% able to cope with the publication purely from the master via gm. +%% +%% When a slave receives a DOWN message for a sender, if it has not +%% received the sender_death message from the master via gm already, +%% then it will wait 20 seconds before broadcasting a request for +%% confirmation from the master that the sender really has died. +%% Should a sender have only sent a publish to slaves, this allows +%% slaves to inform the master of the previous existence of the +%% sender. The master will thus monitor the sender, receive the DOWN, +%% and subsequently broadcast the sender_death message, allowing the +%% slaves to tidy up. This process can repeat for the same sender: +%% consider one slave receives the publication, then the DOWN, then +%% asks for confirmation of death, then the master broadcasts the +%% sender_death message. Only then does another slave receive the +%% publication and thus set up its monitoring. Eventually that slave +%% too will receive the DOWN, ask for confirmation and the master will +%% monitor the sender again, receive another DOWN, and send out +%% another sender_death message. Given the 20 second delay before +%% requesting death confirmation, this is highly unlikely, but it is a +%% possibility. +%% +%% When the 20 second timer expires, the slave first checks to see +%% whether it still needs confirmation of the death before requesting +%% it. This prevents unnecessary traffic on gm as it allows one +%% broadcast of the sender_death message to satisfy many slaves. +%% +%% If we consider the promotion of a slave at this point, we have two +%% possibilities: that of the slave that has received the DOWN and is +%% thus waiting for confirmation from the master that the sender +%% really is down; and that of the slave that has not received the +%% DOWN. In the first case, in the act of promotion to master, the new +%% master will monitor again the dead sender, and after it has +%% finished promoting itself, it should find another DOWN waiting, +%% which it will then broadcast. This will allow slaves to tidy up as +%% normal. In the second case, we have the possibility that +%% confirmation-of-sender-death request has been broadcast, but that +%% it was broadcast before the master failed, and that the slave being +%% promoted does not know anything about that sender, and so will not +%% monitor it on promotion. Thus a slave that broadcasts such a +%% request, at the point of broadcasting it, recurses, setting another +%% 20 second timer. As before, on expiry of the timer, the slaves +%% checks to see whether it still has not received a sender_death +%% message for the dead sender, and if not, broadcasts a death +%% confirmation request. Thus this ensures that even when a master +%% dies and the new slave has no knowledge of the dead sender, it will +%% eventually receive a death confirmation request, shall monitor the +%% dead sender, receive the DOWN and broadcast the sender_death +%% message. +%% +%% The preceding commentary deals with the possibility of slaves +%% receiving publications from senders which the master does not, and +%% the need to prevent memory leaks in such scenarios. The inverse is +%% also possible: a partial publication may cause only the master to +%% receive a publication. It will then publish the message via gm. The +%% slaves will receive it via gm, will publish it to their BQ and will +%% set up monitoring on the sender. They will then receive the DOWN +%% message and the master will eventually publish the corresponding +%% sender_death message. The slave will then be able to tidy up its +%% state as normal. %% %% We don't support transactions on mirror queues. To do so is %% challenging. The underlying bq is free to add the contents of the diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index 5c0730dd..558e372e 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -569,44 +569,41 @@ ensure_monitoring(ChPid, State = #state { known_senders = KS }) -> end. local_sender_death(ChPid, State = #state { known_senders = KS }) -> - case dict:is_key(ChPid, KS) of - false -> - ok; - true -> - %% We have to deal with the possibility that we'll be - %% promoted to master before this thing gets - %% run. Consequently we set the module to - %% rabbit_mirror_queue_master so that if we do become a - %% rabbit_amqqueue_process before then, sane things will - %% happen. - Fun = - fun (?MODULE, State1 = #state { known_senders = KS1, - gm = GM }) -> - %% We're running still as a slave - ok = case dict:is_key(ChPid, KS1) of - false -> - ok; - true -> - gm:broadcast( - GM, {ensure_monitoring, [ChPid]}) - end, - State1; - (rabbit_mirror_queue_master, State1) -> - %% We've become a master. State1 is now opaque - %% to us. When we became master, if ChPid was - %% still known to us then we'd have set up - %% monitoring of it then, so this is now a - %% noop. - State1 - end, - %% Note that we do not remove our knowledge of this ChPid - %% until we get the sender_death from GM. - timer:apply_after( - ?DEATH_TIMEOUT, rabbit_amqqueue, run_backing_queue_async, - [self(), rabbit_mirror_queue_master, Fun]) - end, + ok = case dict:is_key(ChPid, KS) of + false -> ok; + true -> confirm_sender_death(ChPid) + end, State. +confirm_sender_death(Pid) -> + %% We have to deal with the possibility that we'll be promoted to + %% master before this thing gets run. Consequently we set the + %% module to rabbit_mirror_queue_master so that if we do become a + %% rabbit_amqqueue_process before then, sane things will happen. + Fun = + fun (?MODULE, State = #state { known_senders = KS, + gm = GM }) -> + %% We're running still as a slave + ok = case dict:is_key(Pid, KS) of + false -> ok; + true -> gm:broadcast(GM, {ensure_monitoring, [Pid]}), + confirm_sender_death(Pid) + end, + State; + (rabbit_mirror_queue_master, State) -> + %% We've become a master. State is now opaque to + %% us. When we became master, if Pid was still known + %% to us then we'd have set up monitoring of it then, + %% so this is now a noop. + State + end, + %% Note that we do not remove our knowledge of this ChPid until we + %% get the sender_death from GM. + {ok, _TRef} = timer:apply_after( + ?DEATH_TIMEOUT, rabbit_amqqueue, run_backing_queue_async, + [self(), rabbit_mirror_queue_master, Fun]), + ok. + maybe_enqueue_message( Delivery = #delivery { message = #basic_message { id = MsgId }, msg_seq_no = MsgSeqNo, -- cgit v1.2.1 From 6195f8752ee4b2775c3697c0862969d9a4e7005a Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Mon, 23 May 2011 16:45:27 +0100 Subject: Fix the remaining memory leak --- src/rabbit_mirror_queue_slave.erl | 172 +++++++++++++++++++++----------------- 1 file changed, 93 insertions(+), 79 deletions(-) diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index 558e372e..b6aaecb7 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -59,7 +59,7 @@ sync_timer_ref, rate_timer_ref, - sender_queues, %% :: Pid -> MsgQ + sender_queues, %% :: Pid -> {Q {Msg, Bool}, Set MsgId} msg_id_ack, %% :: MsgId -> AckTag ack_num, @@ -500,7 +500,7 @@ promote_me(From, #state { q = Q, {MsgId, {published, ChPid, MsgSeqNo}} <- dict:to_list(MS)]), NumAckTags = [NumAckTag || {_MsgId, NumAckTag} <- dict:to_list(MA)], AckTags = [AckTag || {_Num, AckTag} <- lists:sort(NumAckTags)], - Deliveries = [Delivery || {_ChPid, PubQ} <- dict:to_list(SQ), + Deliveries = [Delivery || {_ChPid, {PubQ, _PendCh}} <- dict:to_list(SQ), {Delivery, true} <- queue:to_list(PubQ)], QueueState = rabbit_amqqueue_process:init_with_backing_queue_state( Q1, rabbit_mirror_queue_master, MasterState, RateTRef, @@ -610,47 +610,65 @@ maybe_enqueue_message( sender = ChPid, txn = none }, EnqueueOnPromotion, - State = #state { sender_queues = SQ, - msg_id_status = MS }) -> + State = #state { sender_queues = SQ, msg_id_status = MS }) -> State1 = ensure_monitoring(ChPid, State), %% We will never see {published, ChPid, MsgSeqNo} here. case dict:find(MsgId, MS) of error -> - MQ = case dict:find(ChPid, SQ) of - {ok, MQ1} -> MQ1; - error -> queue:new() - end, - SQ1 = dict:store(ChPid, - queue:in({Delivery, EnqueueOnPromotion}, MQ), SQ), + {MQ, PendingCh} = get_sender_queue(ChPid, SQ), + MQ1 = queue:in({Delivery, EnqueueOnPromotion}, MQ), + SQ1 = dict:store(ChPid, {MQ1, PendingCh}, SQ), State1 #state { sender_queues = SQ1 }; {ok, {confirmed, ChPid}} -> %% BQ has confirmed it but we didn't know what the %% msg_seq_no was at the time. We do now! ok = rabbit_channel:confirm(ChPid, [MsgSeqNo]), - State1 #state { msg_id_status = dict:erase(MsgId, MS) }; + SQ1 = remove_from_pending_ch(MsgId, ChPid, SQ), + State1 #state { sender_queues = SQ1, + msg_id_status = dict:erase(MsgId, MS) }; {ok, {published, ChPid}} -> %% It was published to the BQ and we didn't know the %% msg_seq_no so couldn't confirm it at the time. case needs_confirming(Delivery, State1) of never -> - State1 #state { msg_id_status = dict:erase(MsgId, MS) }; + SQ1 = remove_from_pending_ch(MsgId, ChPid, SQ), + State1 #state { msg_id_status = dict:erase(MsgId, MS), + sender_queues = SQ1 }; eventually -> State1 #state { msg_id_status = dict:store(MsgId, {published, ChPid, MsgSeqNo}, MS) }; immediately -> ok = rabbit_channel:confirm(ChPid, [MsgSeqNo]), - State1 #state { msg_id_status = dict:erase(MsgId, MS) } + SQ1 = remove_from_pending_ch(MsgId, ChPid, SQ), + State1 #state { msg_id_status = dict:erase(MsgId, MS), + sender_queues = SQ1 } end; {ok, discarded} -> %% We've already heard from GM that the msg is to be %% discarded. We won't see this again. - State1 #state { msg_id_status = dict:erase(MsgId, MS) } + SQ1 = remove_from_pending_ch(MsgId, ChPid, SQ), + State1 #state { msg_id_status = dict:erase(MsgId, MS), + sender_queues = SQ1 } end; maybe_enqueue_message(_Delivery, _EnqueueOnPromotion, State) -> %% We don't support txns in mirror queues. State. +get_sender_queue(ChPid, SQ) -> + case dict:find(ChPid, SQ) of + error -> {queue:new(), sets:new()}; + {ok, Val} -> Val + end. + +remove_from_pending_ch(MsgId, ChPid, SQ) -> + case dict:find(ChPid, SQ) of + error -> + SQ; + {ok, {MQ, PendingCh}} -> + dict:store(ChPid, {MQ, sets:del_element(MsgId, PendingCh)}, SQ) + end. + process_instruction( {publish, Deliver, ChPid, MsgProps, Msg = #basic_message { id = MsgId }}, State = #state { sender_queues = SQ, @@ -667,46 +685,39 @@ process_instruction( %% that we've seen the msg_id confirmed until we can associate it %% with a msg_seq_no. State1 = ensure_monitoring(ChPid, State), - MS1 = dict:store(MsgId, {published, ChPid}, MS), - {SQ1, MS2} = - case dict:find(ChPid, SQ) of - error -> - {SQ, MS1}; - {ok, MQ} -> - case queue:out(MQ) of - {empty, _MQ} -> - {SQ, MS1}; - {{value, {Delivery = #delivery { - msg_seq_no = MsgSeqNo, - message = #basic_message { id = MsgId } }, - _EnqueueOnPromotion}}, MQ1} -> - %% We received the msg from the channel - %% first. Thus we need to deal with confirms - %% here. - {dict:store(ChPid, MQ1, SQ), - case needs_confirming(Delivery, State1) of - never -> - MS; - eventually -> - dict:store( - MsgId, {published, ChPid, MsgSeqNo}, MS); - immediately -> - ok = rabbit_channel:confirm(ChPid, [MsgSeqNo]), - MS - end}; - {{value, {#delivery {}, _EnqueueOnPromotion}}, _MQ1} -> - %% The instruction was sent to us before we - %% were within the mirror_pids within the - %% #amqqueue{} record. We'll never receive the - %% message directly from the channel. And the - %% channel will not be expecting any confirms - %% from us. - {SQ, MS} - end + {MQ, PendingCh} = get_sender_queue(ChPid, SQ), + {MQ1, PendingCh1, MS1} = + case queue:out(MQ) of + {empty, _MQ2} -> + {MQ, sets:add_element(MsgId, PendingCh), + dict:store(MsgId, {published, ChPid}, MS)}; + {{value, {Delivery = #delivery { + msg_seq_no = MsgSeqNo, + message = #basic_message { id = MsgId } }, + _EnqueueOnPromotion}}, MQ2} -> + %% We received the msg from the channel first. Thus we + %% need to deal with confirms here. + case needs_confirming(Delivery, State1) of + never -> + {MQ2, PendingCh, MS}; + eventually -> + {MQ2, sets:add_element(MsgId, PendingCh), + dict:store(MsgId, {published, ChPid, MsgSeqNo}, MS)}; + immediately -> + ok = rabbit_channel:confirm(ChPid, [MsgSeqNo]), + {MQ2, PendingCh, MS} + end; + {{value, {#delivery {}, _EnqueueOnPromotion}}, _MQ2} -> + %% The instruction was sent to us before we were + %% within the mirror_pids within the #amqqueue{} + %% record. We'll never receive the message directly + %% from the channel. And the channel will not be + %% expecting any confirms from us. + {MQ, PendingCh, MS} end, - State2 = State1 #state { sender_queues = SQ1, - msg_id_status = MS2 }, + SQ1 = dict:store(ChPid, {MQ1, PendingCh1}, SQ), + State2 = State1 #state { sender_queues = SQ1, msg_id_status = MS1 }, {ok, case Deliver of @@ -727,33 +738,28 @@ process_instruction({discard, ChPid, Msg = #basic_message { id = MsgId }}, %% Many of the comments around the publish head above apply here %% too. State1 = ensure_monitoring(ChPid, State), - MS1 = dict:store(MsgId, discarded, MS), - {SQ1, MS2} = - case dict:find(ChPid, SQ) of - error -> - {SQ, MS1}; - {ok, MQ} -> - case queue:out(MQ) of - {empty, _MQ} -> - {SQ, MS1}; - {{value, {#delivery { - message = #basic_message { id = MsgId } }, - _EnqueueOnPromotion}}, MQ1} -> - %% We've already seen it from the channel, - %% we're not going to see this again, so don't - %% add it to MS - {dict:store(ChPid, MQ1, SQ), MS}; - {{value, {#delivery {}, _EnqueueOnPromotion}}, _MQ1} -> - %% The instruction was sent to us before we - %% were within the mirror_pids within the - %% #amqqueue{} record. We'll never receive the - %% message directly from the channel. - {SQ, MS} - end + {MQ, PendingCh} = get_sender_queue(ChPid, SQ), + {MQ1, PendingCh1, MS1} = + case queue:out(MQ) of + {empty, _MQ} -> + {MQ, sets:add_element(MsgId, PendingCh), + dict:store(MsgId, discarded, MS)}; + {{value, {#delivery { message = #basic_message { id = MsgId } }, + _EnqueueOnPromotion}}, MQ2} -> + %% We've already seen it from the channel, we're not + %% going to see this again, so don't add it to MS + {MQ2, PendingCh, MS}; + {{value, {#delivery {}, _EnqueueOnPromotion}}, _MQ2} -> + %% The instruction was sent to us before we were + %% within the mirror_pids within the #amqqueue{} + %% record. We'll never receive the message directly + %% from the channel. + {MQ, PendingCh, MS} end, + SQ1 = dict:store(ChPid, {MQ1, PendingCh1}, SQ), BQS1 = BQ:discard(Msg, ChPid, BQS), {ok, State1 #state { sender_queues = SQ1, - msg_id_status = MS2, + msg_id_status = MS1, backing_queue_state = BQS1 }}; process_instruction({set_length, Length}, State = #state { backing_queue = BQ, @@ -815,15 +821,23 @@ process_instruction({requeue, MsgPropsFun, MsgIds}, end}; process_instruction({sender_death, ChPid}, State = #state { sender_queues = SQ, + msg_id_status = MS, known_senders = KS }) -> {ok, case dict:find(ChPid, KS) of error -> State; {ok, MRef} -> true = erlang:demonitor(MRef), - KS1 = dict:erase(ChPid, KS), - SQ1 = dict:erase(ChPid, SQ), - State #state { sender_queues = SQ1, known_senders = KS1 } + MS1 = case dict:find(ChPid, SQ) of + error -> + MS; + {ok, {_MQ, PendingCh}} -> + lists:foldl(fun dict:erase/2, MS, + sets:to_list(PendingCh)) + end, + State #state { sender_queues = dict:erase(ChPid, SQ), + msg_id_status = MS1, + known_senders = dict:erase(ChPid, KS) } end}; process_instruction(delete_and_terminate, State = #state { backing_queue = BQ, -- cgit v1.2.1 From cbb2e20ff65999293be4bcbc08bdee588a731435 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Mon, 23 May 2011 16:48:34 +0100 Subject: Update comment --- src/rabbit_mirror_queue_slave.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index b6aaecb7..c7ff4480 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -337,7 +337,7 @@ bq_init(BQ, Q, Recover) -> run_backing_queue(rabbit_mirror_queue_master, Fun, State) -> %% Yes, this might look a little crazy, but see comments in - %% local_sender_death/2 + %% confirm_sender_death/1 Fun(?MODULE, State); run_backing_queue(Mod, Fun, State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> -- cgit v1.2.1 From d99801020fa148777d1a3586eab0ea32e9119ab4 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Mon, 23 May 2011 16:50:48 +0100 Subject: Add pointer back to bug given the value of the additional commentary in the bug --- src/rabbit_mirror_queue_coordinator.erl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/rabbit_mirror_queue_coordinator.erl b/src/rabbit_mirror_queue_coordinator.erl index 96d0e15b..ee849088 100644 --- a/src/rabbit_mirror_queue_coordinator.erl +++ b/src/rabbit_mirror_queue_coordinator.erl @@ -304,6 +304,8 @@ %% the last seen state of the queue: checking length alone is not %% sufficient in this case. %% +%% For more documentation see the comments in bug 23554. +%% %%---------------------------------------------------------------------------- start_link(Queue, GM, DeathFun) -> -- cgit v1.2.1 From b4963dd7aae39e8f8c5306b9d39af6deb7623e63 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Tue, 24 May 2011 12:50:58 +0100 Subject: Whitespace --- src/rabbit_mirror_queue_master.erl | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/rabbit_mirror_queue_master.erl b/src/rabbit_mirror_queue_master.erl index 1d2b1676..99de1b18 100644 --- a/src/rabbit_mirror_queue_master.erl +++ b/src/rabbit_mirror_queue_master.erl @@ -254,32 +254,32 @@ requeue(AckTags, MsgPropsFun, State = #state { gm = GM, ok = gm:broadcast(GM, {requeue, MsgPropsFun, MsgIds}), {MsgIds, State #state { backing_queue_state = BQS1 }}. -len(#state { backing_queue = BQ, backing_queue_state = BQS}) -> +len(#state { backing_queue = BQ, backing_queue_state = BQS }) -> BQ:len(BQS). -is_empty(#state { backing_queue = BQ, backing_queue_state = BQS}) -> +is_empty(#state { backing_queue = BQ, backing_queue_state = BQS }) -> BQ:is_empty(BQS). set_ram_duration_target(Target, State = #state { backing_queue = BQ, - backing_queue_state = BQS}) -> + backing_queue_state = BQS }) -> State #state { backing_queue_state = BQ:set_ram_duration_target(Target, BQS) }. -ram_duration(State = #state { backing_queue = BQ, backing_queue_state = BQS}) -> +ram_duration(State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> {Result, BQS1} = BQ:ram_duration(BQS), {Result, State #state { backing_queue_state = BQS1 }}. -needs_timeout(#state { backing_queue = BQ, backing_queue_state = BQS}) -> +needs_timeout(#state { backing_queue = BQ, backing_queue_state = BQS }) -> BQ:needs_timeout(BQS). -timeout(State = #state { backing_queue = BQ, backing_queue_state = BQS}) -> +timeout(State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> State #state { backing_queue_state = BQ:timeout(BQS) }. handle_pre_hibernate(State = #state { backing_queue = BQ, - backing_queue_state = BQS}) -> + backing_queue_state = BQS }) -> State #state { backing_queue_state = BQ:handle_pre_hibernate(BQS) }. -status(#state { backing_queue = BQ, backing_queue_state = BQS}) -> +status(#state { backing_queue = BQ, backing_queue_state = BQS }) -> BQ:status(BQS). invoke(?MODULE, Fun, State) -> -- cgit v1.2.1 From f0cb7e165ced609d9909b3ef9b528442f63dc658 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Tue, 24 May 2011 17:38:14 +0100 Subject: Permit dropping nodes of mirrored queues. This turns out to be much much messier than I'd hoped as the principle problem becomes ensuring an add after a drop works. Normally, an add would only occur on a node that has not seen said queue before: if it had, in a previous lifetime, then the booting of rabbit would have ripped out any locally stored files regarding that queue. But now this step may be missed. Having tried many different approaches, the simplest became expanding bq so that the shutdown reason is exposed to the BQ. Thus both slave and master can then detect that they're being dropped, and, in the case of master, it can convert a bq:terminate to a bq:delete_and_terminate. Every other approach I could think of turned out worse. --- docs/rabbitmqctl.1.xml | 43 ++++++++++++++++++ include/rabbit_backing_queue_spec.hrl | 4 +- src/rabbit_amqqueue_process.erl | 12 ++--- src/rabbit_backing_queue.erl | 4 +- src/rabbit_control.erl | 6 +++ src/rabbit_mirror_queue_master.erl | 27 ++++++++---- src/rabbit_mirror_queue_misc.erl | 83 ++++++++++++++++++++++------------- src/rabbit_mirror_queue_slave.erl | 11 ++++- src/rabbit_mirror_queue_slave_sup.erl | 2 +- src/rabbit_tests.erl | 8 ++-- src/rabbit_variable_queue.erl | 6 +-- 11 files changed, 147 insertions(+), 59 deletions(-) diff --git a/docs/rabbitmqctl.1.xml b/docs/rabbitmqctl.1.xml index 62869158..908ca973 100644 --- a/docs/rabbitmqctl.1.xml +++ b/docs/rabbitmqctl.1.xml @@ -1325,6 +1325,49 @@ + + + Mirrored Queue Management + + Mirrored queues can have slaves dynamically added, and slaves + or the master dynamically dropped. Refer to the High Availability + guide for further details about mirrored queues in + general. + + + + + add_queue_mirror queue_name node + + + Attempts to add a mirror of the queue + queue_name on + node. This will only succeed if the + queue was declared a mirrored queue and if there is no + mirror of the queue already on the node. If it succeeds, + the new mirror will start off as an empty slave. + + + + + + drop_queue_mirror queue_name node + + + Attempts to drop a mirror of the queue + queue_name on + node. This will only succeed if the + queue was declared a mirrored queue and if there is a + mirror of the queue already on the node. If the node + contains the master of the queue, a slave on some other + node will be promoted to become the new master. It is + not permitted to drop the only node of a mirrored-queue. + + + + + diff --git a/include/rabbit_backing_queue_spec.hrl b/include/rabbit_backing_queue_spec.hrl index 1c2b94e2..295d9039 100644 --- a/include/rabbit_backing_queue_spec.hrl +++ b/include/rabbit_backing_queue_spec.hrl @@ -32,8 +32,8 @@ -spec(stop/0 :: () -> 'ok'). -spec(init/4 :: (rabbit_types:amqqueue(), attempt_recovery(), async_callback(), sync_callback()) -> state()). --spec(terminate/1 :: (state()) -> state()). --spec(delete_and_terminate/1 :: (state()) -> state()). +-spec(terminate/2 :: (any(), state()) -> state()). +-spec(delete_and_terminate/2 :: (any(), state()) -> state()). -spec(purge/1 :: (state()) -> {purged_msg_count(), state()}). -spec(publish/4 :: (rabbit_types:basic_message(), rabbit_types:message_properties(), pid(), state()) -> diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl index ea31ec13..b1c95338 100644 --- a/src/rabbit_amqqueue_process.erl +++ b/src/rabbit_amqqueue_process.erl @@ -145,16 +145,16 @@ init_with_backing_queue_state(Q = #amqqueue{exclusive_owner = Owner}, BQ, BQS, fun (Delivery, StateN) -> deliver_or_enqueue(Delivery, StateN) end, State, Deliveries). -terminate(shutdown, State = #q{backing_queue = BQ}) -> - terminate_shutdown(fun (BQS) -> BQ:terminate(BQS) end, State); -terminate({shutdown, _}, State = #q{backing_queue = BQ}) -> - terminate_shutdown(fun (BQS) -> BQ:terminate(BQS) end, State); -terminate(_Reason, State = #q{backing_queue = BQ}) -> +terminate(shutdown = R, State = #q{backing_queue = BQ}) -> + terminate_shutdown(fun (BQS) -> BQ:terminate(R, BQS) end, State); +terminate({shutdown, _} = R, State = #q{backing_queue = BQ}) -> + terminate_shutdown(fun (BQS) -> BQ:terminate(R, BQS) end, State); +terminate(Reason, State = #q{backing_queue = BQ}) -> %% FIXME: How do we cancel active subscriptions? terminate_shutdown(fun (BQS) -> rabbit_event:notify( queue_deleted, [{pid, self()}]), - BQS1 = BQ:delete_and_terminate(BQS), + BQS1 = BQ:delete_and_terminate(Reason, BQS), %% don't care if the internal delete %% doesn't return 'ok'. rabbit_amqqueue:internal_delete(qname(State)), diff --git a/src/rabbit_backing_queue.erl b/src/rabbit_backing_queue.erl index addaabc5..217ad3eb 100644 --- a/src/rabbit_backing_queue.erl +++ b/src/rabbit_backing_queue.erl @@ -49,11 +49,11 @@ behaviour_info(callbacks) -> {init, 4}, %% Called on queue shutdown when queue isn't being deleted. - {terminate, 1}, + {terminate, 2}, %% Called when the queue is terminating and needs to delete all %% its content. - {delete_and_terminate, 1}, + {delete_and_terminate, 2}, %% Remove all messages in the queue, but not messages which have %% been fetched and are pending acks. diff --git a/src/rabbit_control.erl b/src/rabbit_control.erl index 1140a2f0..b4b6255e 100644 --- a/src/rabbit_control.erl +++ b/src/rabbit_control.erl @@ -244,6 +244,12 @@ action(add_queue_mirror, Node, [Queue, MirrorNode], Opts, Inform) -> rpc_call(Node, rabbit_mirror_queue_misc, add_slave, [VHostArg, list_to_binary(Queue), list_to_atom(MirrorNode)]); +action(drop_queue_mirror, Node, [Queue, MirrorNode], Opts, Inform) -> + Inform("Dropping mirror of queue ~p on node ~p~n", [Queue, MirrorNode]), + VHostArg = list_to_binary(proplists:get_value(?VHOST_OPT, Opts)), + rpc_call(Node, rabbit_mirror_queue_misc, drop_slave, + [VHostArg, list_to_binary(Queue), list_to_atom(MirrorNode)]); + action(list_exchanges, Node, Args, Opts, Inform) -> Inform("Listing exchanges", []), VHostArg = list_to_binary(proplists:get_value(?VHOST_OPT, Opts)), diff --git a/src/rabbit_mirror_queue_master.erl b/src/rabbit_mirror_queue_master.erl index 99de1b18..9bd8565f 100644 --- a/src/rabbit_mirror_queue_master.erl +++ b/src/rabbit_mirror_queue_master.erl @@ -16,7 +16,7 @@ -module(rabbit_mirror_queue_master). --export([init/4, terminate/1, delete_and_terminate/1, +-export([init/4, terminate/2, delete_and_terminate/2, purge/1, publish/4, publish_delivered/5, fetch/2, ack/2, tx_publish/5, tx_ack/3, tx_rollback/2, tx_commit/4, requeue/3, len/1, is_empty/1, drain_confirmed/1, dropwhile/2, @@ -106,17 +106,28 @@ promote_backing_queue_state(CPid, BQ, BQS, GM, SeenStatus, KS) -> ack_msg_id = dict:new(), known_senders = sets:from_list(KS) }. -terminate(State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> +terminate({shutdown, dropped} = Reason, + State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> + %% Backing queue termination - this node has been explicitly + %% dropped. Normally, non-durable queues would be tidied up on + %% startup, but there's a possibility that we will be added back + %% in without this node being restarted. Thus we must do the full + %% blown delete_and_terminate now, but only locally: we do not + %% broadcast delete_and_terminate. + State #state { backing_queue_state = BQ:delete_and_terminate(Reason, BQS), + set_delivered = 0 }; +terminate(Reason, + State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> %% Backing queue termination. The queue is going down but %% shouldn't be deleted. Most likely safe shutdown of this %% node. Thus just let some other slave take over. - State #state { backing_queue_state = BQ:terminate(BQS) }. + State #state { backing_queue_state = BQ:terminate(Reason, BQS) }. -delete_and_terminate(State = #state { gm = GM, - backing_queue = BQ, - backing_queue_state = BQS }) -> - ok = gm:broadcast(GM, delete_and_terminate), - State #state { backing_queue_state = BQ:delete_and_terminate(BQS), +delete_and_terminate(Reason, State = #state { gm = GM, + backing_queue = BQ, + backing_queue_state = BQS }) -> + ok = gm:broadcast(GM, {delete_and_terminate, Reason}), + State #state { backing_queue_state = BQ:delete_and_terminate(Reason, BQS), set_delivered = 0 }. purge(State = #state { gm = GM, diff --git a/src/rabbit_mirror_queue_misc.erl b/src/rabbit_mirror_queue_misc.erl index 5f180c5e..046d3380 100644 --- a/src/rabbit_mirror_queue_misc.erl +++ b/src/rabbit_mirror_queue_misc.erl @@ -16,7 +16,8 @@ -module(rabbit_mirror_queue_misc). --export([remove_from_queue/2, add_slave/2, add_slave/3, on_node_up/0]). +-export([remove_from_queue/2, on_node_up/0, + drop_slave/2, drop_slave/3, add_slave/2, add_slave/3]). -include("rabbit.hrl"). @@ -59,36 +60,6 @@ remove_from_queue(QueueName, DeadPids) -> end end). -add_slave(VHostPath, QueueName, MirrorNode) -> - add_slave(rabbit_misc:r(VHostPath, queue, QueueName), MirrorNode). - -add_slave(Queue, MirrorNode) -> - rabbit_amqqueue:with( - Queue, - fun (#amqqueue { arguments = Args, name = Name, - pid = QPid, mirror_pids = MPids } = Q) -> - case rabbit_misc:table_lookup(Args, <<"x-mirror">>) of - undefined -> - ok; - _ -> - case [MirrorNode || Pid <- [QPid | MPids], - node(Pid) =:= MirrorNode] of - [] -> - Result = - rabbit_mirror_queue_slave_sup:start_child( - MirrorNode, [Q]), - rabbit_log:info("Adding slave node for ~s: ~p~n", - [rabbit_misc:rs(Name), Result]), - case Result of - {ok, _Pid} -> ok; - _ -> Result - end; - [_] -> - {error, queue_already_mirrored_on_node} - end - end - end). - on_node_up() -> Qs = rabbit_misc:execute_mnesia_transaction( @@ -113,3 +84,53 @@ on_node_up() -> end), [add_slave(Q, node()) || Q <- Qs], ok. + +drop_slave(VHostPath, QueueName, MirrorNode) -> + drop_slave(rabbit_misc:r(VHostPath, queue, QueueName), MirrorNode). + +drop_slave(Queue, MirrorNode) -> + if_mirrored_queue( + Queue, + fun (#amqqueue { name = Name, pid = QPid, mirror_pids = MPids }) -> + case [Pid || Pid <- [QPid | MPids], node(Pid) =:= MirrorNode] of + [] -> + {error, {queue_not_mirrored_on_node, MirrorNode}}; + [QPid | MPids] -> + {error, cannot_drop_only_mirror}; + [Pid] -> + rabbit_log:info("Dropping slave node on ~p for ~s~n", + [MirrorNode, rabbit_misc:rs(Name)]), + exit(Pid, {shutdown, dropped}), + ok + end + end). + +add_slave(VHostPath, QueueName, MirrorNode) -> + add_slave(rabbit_misc:r(VHostPath, queue, QueueName), MirrorNode). + +add_slave(Queue, MirrorNode) -> + if_mirrored_queue( + Queue, + fun (#amqqueue { name = Name, pid = QPid, mirror_pids = MPids } = Q) -> + case [Pid || Pid <- [QPid | MPids], node(Pid) =:= MirrorNode] of + [] -> Result = rabbit_mirror_queue_slave_sup:start_child( + MirrorNode, [Q]), + rabbit_log:info( + "Adding slave node for ~s on node ~p: ~p~n", + [rabbit_misc:rs(Name), MirrorNode, Result]), + case Result of + {ok, _Pid} -> ok; + _ -> Result + end; + [_] -> {error, {queue_already_mirrored_on_node, MirrorNode}} + end + end). + +if_mirrored_queue(Queue, Fun) -> + rabbit_amqqueue:with( + Queue, fun (#amqqueue { arguments = Args } = Q) -> + case rabbit_misc:table_lookup(Args, <<"x-mirror">>) of + undefined -> ok; + _ -> Fun(Q) + end + end). diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index c7ff4480..666687a5 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -226,6 +226,9 @@ handle_info({'DOWN', _MonitorRef, process, MPid, _Reason}, handle_info({'DOWN', _MonitorRef, process, ChPid, _Reason}, State) -> noreply(local_sender_death(ChPid, State)); +handle_info({'EXIT', _Pid, Reason}, State) -> + {stop, Reason, State}; + handle_info(Msg, State) -> {stop, {unexpected_info, Msg}, State}. @@ -238,6 +241,10 @@ terminate(_Reason, #state { backing_queue_state = undefined }) -> %% We've received a delete_and_terminate from gm, thus nothing to %% do here. ok; +terminate({shutdown, dropped} = R, #state { backing_queue = BQ, + backing_queue_state = BQS }) -> + %% See rabbit_mirror_queue_master:terminate/2 + BQ:delete_and_terminate(R, BQS); terminate(Reason, #state { q = Q, gm = GM, backing_queue = BQ, @@ -839,10 +846,10 @@ process_instruction({sender_death, ChPid}, msg_id_status = MS1, known_senders = dict:erase(ChPid, KS) } end}; -process_instruction(delete_and_terminate, +process_instruction({delete_and_terminate, Reason}, State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> - BQ:delete_and_terminate(BQS), + BQ:delete_and_terminate(Reason, BQS), {stop, State #state { backing_queue_state = undefined }}. msg_ids_to_acktags(MsgIds, MA) -> diff --git a/src/rabbit_mirror_queue_slave_sup.erl b/src/rabbit_mirror_queue_slave_sup.erl index 25ee1fd0..2ce5941e 100644 --- a/src/rabbit_mirror_queue_slave_sup.erl +++ b/src/rabbit_mirror_queue_slave_sup.erl @@ -40,7 +40,7 @@ start() -> {ok, _} = - supervisor:start_child( + supervisor2:start_child( rabbit_sup, {rabbit_mirror_queue_slave_sup, {rabbit_mirror_queue_slave_sup, start_link, []}, diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl index 1a37cdff..3f4aa54e 100644 --- a/src/rabbit_tests.erl +++ b/src/rabbit_tests.erl @@ -2116,7 +2116,7 @@ with_fresh_variable_queue(Fun) -> {delta, {delta, undefined, 0, undefined}}, {q3, 0}, {q4, 0}, {len, 0}]), - _ = rabbit_variable_queue:delete_and_terminate(Fun(VQ)), + _ = rabbit_variable_queue:delete_and_terminate(shutdown, Fun(VQ)), passed. test_variable_queue() -> @@ -2284,7 +2284,7 @@ test_variable_queue_all_the_bits_not_covered_elsewhere1(VQ0) -> Count + Count, VQ3), {VQ5, _AckTags1} = variable_queue_fetch(Count, false, false, Count, VQ4), - _VQ6 = rabbit_variable_queue:terminate(VQ5), + _VQ6 = rabbit_variable_queue:terminate(shutdown, VQ5), VQ7 = variable_queue_init(test_amqqueue(true), true), {{_Msg1, true, _AckTag1, Count1}, VQ8} = rabbit_variable_queue:fetch(true, VQ7), @@ -2301,7 +2301,7 @@ test_variable_queue_all_the_bits_not_covered_elsewhere2(VQ0) -> {_Guids, VQ4} = rabbit_variable_queue:requeue(AckTags, fun(X) -> X end, VQ3), VQ5 = rabbit_variable_queue:timeout(VQ4), - _VQ6 = rabbit_variable_queue:terminate(VQ5), + _VQ6 = rabbit_variable_queue:terminate(shutdown, VQ5), VQ7 = variable_queue_init(test_amqqueue(true), true), {empty, VQ8} = rabbit_variable_queue:fetch(false, VQ7), VQ8. @@ -2336,7 +2336,7 @@ test_queue_recover() -> VQ1 = variable_queue_init(Q, true), {{_Msg1, true, _AckTag1, CountMinusOne}, VQ2} = rabbit_variable_queue:fetch(true, VQ1), - _VQ3 = rabbit_variable_queue:delete_and_terminate(VQ2), + _VQ3 = rabbit_variable_queue:delete_and_terminate(shutdown, VQ2), rabbit_amqqueue:internal_delete(QName) end), passed. diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl index 8ac3ad43..a167cca0 100644 --- a/src/rabbit_variable_queue.erl +++ b/src/rabbit_variable_queue.erl @@ -16,7 +16,7 @@ -module(rabbit_variable_queue). --export([init/4, terminate/1, delete_and_terminate/1, +-export([init/4, terminate/2, delete_and_terminate/2, purge/1, publish/4, publish_delivered/5, drain_confirmed/1, fetch/2, ack/2, tx_publish/5, tx_ack/3, tx_rollback/2, tx_commit/4, requeue/3, len/1, is_empty/1, dropwhile/2, @@ -452,7 +452,7 @@ init(#amqqueue { name = QueueName, durable = true }, true, init(true, IndexState, DeltaCount, Terms1, AsyncCallback, SyncCallback, PersistentClient, TransientClient). -terminate(State) -> +terminate(_Reason, State) -> State1 = #vqstate { persistent_count = PCount, index_state = IndexState, msg_store_clients = {MSCStateP, MSCStateT} } = @@ -473,7 +473,7 @@ terminate(State) -> %% the only difference between purge and delete is that delete also %% needs to delete everything that's been delivered and not ack'd. -delete_and_terminate(State) -> +delete_and_terminate(_Reason, State) -> %% TODO: there is no need to interact with qi at all - which we do %% as part of 'purge' and 'remove_pending_ack', other than %% deleting it. -- cgit v1.2.1 From 0e0bcf22aad825811ccb7b633bfcfa134dde3d0e Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Wed, 25 May 2011 12:05:08 +0100 Subject: Enforce checking of x-mirror arg --- src/rabbit_amqqueue.erl | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl index 8c374ef3..50f5a9da 100644 --- a/src/rabbit_amqqueue.erl +++ b/src/rabbit_amqqueue.erl @@ -312,7 +312,8 @@ check_declare_arguments(QueueName, Args) -> [Key, rabbit_misc:rs(QueueName), Error]) end || {Key, Fun} <- [{<<"x-expires">>, fun check_integer_argument/1}, - {<<"x-message-ttl">>, fun check_integer_argument/1}]], + {<<"x-message-ttl">>, fun check_integer_argument/1}, + {<<"x-mirror">>, fun check_array_of_longstr_argument/1}]], ok. check_integer_argument(undefined) -> @@ -325,6 +326,18 @@ check_integer_argument({Type, Val}) when Val > 0 -> check_integer_argument({_Type, Val}) -> {error, {value_zero_or_less, Val}}. +check_array_of_longstr_argument(undefined) -> + ok; +check_array_of_longstr_argument({array, Array}) -> + case lists:all(fun ({longstr, _NodeName}) -> true; + (_) -> false + end, Array) of + true -> ok; + false -> {error, {array_contains_non_longstrs, Array}} + end; +check_array_of_longstr_argument({Type, _Val}) -> + {error, {unacceptable_type, Type}}. + list(VHostPath) -> mnesia:dirty_match_object( rabbit_queue, -- cgit v1.2.1 From ed73a0d3c85aa3bd0fe7226f4c3c7de0f1452f02 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Wed, 25 May 2011 12:35:21 +0100 Subject: Not quite sure how I managed to get that quite so wrong... --- src/rabbit_amqqueue.erl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl index 50f5a9da..268199e5 100644 --- a/src/rabbit_amqqueue.erl +++ b/src/rabbit_amqqueue.erl @@ -259,8 +259,7 @@ with(Name, F, E) -> {ok, Q = #amqqueue{mirror_pids = []}} -> rabbit_misc:with_exit_handler(E, fun () -> F(Q) end); {ok, Q} -> - timer:sleep(25), - E1 = fun () -> with(Name, F, E) end, + E1 = fun () -> timer:sleep(25), with(Name, F, E) end, rabbit_misc:with_exit_handler(E1, fun () -> F(Q) end); {error, not_found} -> E() -- cgit v1.2.1 From 6a3be4ce63e9e92f47e8299d4282ab111818315a Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Wed, 25 May 2011 13:23:21 +0100 Subject: correct use of nodes() --- src/rabbit_mirror_queue_master.erl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/rabbit_mirror_queue_master.erl b/src/rabbit_mirror_queue_master.erl index 9bd8565f..da12ea82 100644 --- a/src/rabbit_mirror_queue_master.erl +++ b/src/rabbit_mirror_queue_master.erl @@ -77,11 +77,11 @@ init(#amqqueue { arguments = Args, name = QName } = Q, Recover, Q, undefined, sender_death_fun()), GM = rabbit_mirror_queue_coordinator:get_gm(CPid), {_Type, Nodes} = rabbit_misc:table_lookup(Args, <<"x-mirror">>), - Nodes1 = case Nodes of - [] -> nodes(); - _ -> [list_to_atom(binary_to_list(Node)) || - {longstr, Node} <- Nodes] - end, + Nodes1 = (case Nodes of + [] -> rabbit_mnesia:all_clustered_nodes(); + _ -> [list_to_atom(binary_to_list(Node)) || + {longstr, Node} <- Nodes] + end) -- [node()], [rabbit_mirror_queue_misc:add_slave(QName, Node) || Node <- Nodes1], {ok, BQ} = application:get_env(backing_queue_module), BQS = BQ:init(Q, Recover, AsyncCallback, SyncCallback), -- cgit v1.2.1 From 36f68e916ff7319e027b7545d987ecd920284324 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Wed, 25 May 2011 13:32:06 +0100 Subject: enforce equivalence checking of x-mirror arg --- src/rabbit_amqqueue.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl index 268199e5..f9e84443 100644 --- a/src/rabbit_amqqueue.erl +++ b/src/rabbit_amqqueue.erl @@ -300,7 +300,7 @@ with_exclusive_access_or_die(Name, ReaderPid, F) -> assert_args_equivalence(#amqqueue{name = QueueName, arguments = Args}, RequiredArgs) -> rabbit_misc:assert_args_equivalence(Args, RequiredArgs, QueueName, - [<<"x-expires">>]). + [<<"x-expires">>, <<"x-mirror">>]). check_declare_arguments(QueueName, Args) -> [case Fun(rabbit_misc:table_lookup(Args, Key)) of -- cgit v1.2.1 From 6a8b341e4c4bd6a7f3c08f005416defc20077b91 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Wed, 25 May 2011 13:42:34 +0100 Subject: Work in gm table creation as part of the normal upgrade steps, and then assume that it'll continue to exist --- src/rabbit_mirror_queue_coordinator.erl | 1 - src/rabbit_mirror_queue_slave.erl | 1 - src/rabbit_upgrade_functions.erl | 22 ++++++++++++++-------- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/src/rabbit_mirror_queue_coordinator.erl b/src/rabbit_mirror_queue_coordinator.erl index ee849088..2727c1d0 100644 --- a/src/rabbit_mirror_queue_coordinator.erl +++ b/src/rabbit_mirror_queue_coordinator.erl @@ -324,7 +324,6 @@ ensure_monitoring(CPid, Pids) -> init([#amqqueue { name = QueueName } = Q, GM, DeathFun]) -> GM1 = case GM of undefined -> - ok = gm:create_tables(), {ok, GM2} = gm:start_link(QueueName, ?MODULE, [self()]), receive {joined, GM2, _Members} -> ok diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index 666687a5..678926af 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -75,7 +75,6 @@ set_maximum_since_use(QPid, Age) -> init([#amqqueue { name = QueueName } = Q]) -> process_flag(trap_exit, true), %% amqqueue_process traps exits too. - ok = gm:create_tables(), {ok, GM} = gm:start_link(QueueName, ?MODULE, [self()]), receive {joined, GM} -> ok diff --git a/src/rabbit_upgrade_functions.erl b/src/rabbit_upgrade_functions.erl index a6f02a0e..04744aa4 100644 --- a/src/rabbit_upgrade_functions.erl +++ b/src/rabbit_upgrade_functions.erl @@ -30,21 +30,23 @@ -rabbit_upgrade({exchange_event_serial, mnesia, []}). -rabbit_upgrade({trace_exchanges, mnesia, []}). -rabbit_upgrade({mirror_pids, mnesia, []}). +-rabbit_upgrade({gm, mnesia, []}). %% ------------------------------------------------------------------- -ifdef(use_specs). --spec(remove_user_scope/0 :: () -> 'ok'). --spec(hash_passwords/0 :: () -> 'ok'). --spec(add_ip_to_listener/0 :: () -> 'ok'). --spec(internal_exchanges/0 :: () -> 'ok'). +-spec(remove_user_scope/0 :: () -> 'ok'). +-spec(hash_passwords/0 :: () -> 'ok'). +-spec(add_ip_to_listener/0 :: () -> 'ok'). +-spec(internal_exchanges/0 :: () -> 'ok'). -spec(user_to_internal_user/0 :: () -> 'ok'). --spec(topic_trie/0 :: () -> 'ok'). +-spec(topic_trie/0 :: () -> 'ok'). -spec(exchange_event_serial/0 :: () -> 'ok'). --spec(semi_durable_route/0 :: () -> 'ok'). --spec(trace_exchanges/0 :: () -> 'ok'). --spec(mirror_pids/0 :: () -> 'ok'). +-spec(semi_durable_route/0 :: () -> 'ok'). +-spec(trace_exchanges/0 :: () -> 'ok'). +-spec(mirror_pids/0 :: () -> 'ok'). +-spec(gm/0 :: () -> 'ok'). -endif. @@ -136,6 +138,10 @@ mirror_pids() -> || T <- Tables ], ok. +gm() -> + create(gm_group, [{record_name, gm_group}, + {attributes, [name, version, members]}]). + %%-------------------------------------------------------------------- transform(TableName, Fun, FieldList) -> -- cgit v1.2.1 From cf7d7556ceb76ee2bdaa4a31cdd3bef129bac920 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Wed, 25 May 2011 17:01:39 +0100 Subject: Remove R13ism --- src/rabbit_mirror_queue_slave.erl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index 678926af..c5f83c24 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -386,9 +386,8 @@ confirm_messages(MsgIds, State = #state { msg_id_status = MS }) -> Acc end end, {MS, gb_trees:empty()}, MsgIds), - gb_trees:map(fun (ChPid, MsgSeqNos) -> - ok = rabbit_channel:confirm(ChPid, MsgSeqNos) - end, CMs), + [ok = rabbit_channel:confirm(ChPid, MsgSeqNos) + || {ChPid, MsgSeqNos} <- gb_trees:to_list(CMs)], State #state { msg_id_status = MS1 }. gb_trees_cons(Key, Value, Tree) -> -- cgit v1.2.1 From da71b755e5da5eaebb732205d9ee43f8fef263fc Mon Sep 17 00:00:00 2001 From: Matthias Radestock Date: Thu, 2 Jun 2011 10:46:00 +0100 Subject: cosmetic: eliminate gratuitous diffs vs 'default' --- src/rabbit_amqqueue_process.erl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl index b1c95338..17c35e90 100644 --- a/src/rabbit_amqqueue_process.erl +++ b/src/rabbit_amqqueue_process.erl @@ -35,7 +35,7 @@ -export([init_with_backing_queue_state/7]). -% Queue's state +%% Queue's state -record(q, {q, exclusive_consumer, has_had_consumers, @@ -843,11 +843,11 @@ emit_consumer_deleted(ChPid, ConsumerTag) -> prioritise_call(Msg, _From, _State) -> case Msg of - info -> 9; - {info, _Items} -> 9; - consumers -> 9; - {run_backing_queue, _Mod, _Fun} -> 6; - _ -> 0 + info -> 9; + {info, _Items} -> 9; + consumers -> 9; + {run_backing_queue, _Mod, _Fun} -> 6; + _ -> 0 end. prioritise_cast(Msg, _State) -> -- cgit v1.2.1 From 1d72bbec0dbd79af9efc0fb81d48ff216e04a2de Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Thu, 2 Jun 2011 11:08:52 +0100 Subject: Undo unneeded change to boots --- src/rabbit.erl | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/rabbit.erl b/src/rabbit.erl index 02477b65..e6e80b4a 100644 --- a/src/rabbit.erl +++ b/src/rabbit.erl @@ -39,12 +39,6 @@ {requires, pre_boot}, {enables, external_infrastructure}]}). --rabbit_boot_step({rabbit_registry, - [{description, "plugin registry"}, - {mfa, {rabbit_sup, start_child, - [rabbit_registry]}}, - {enables, external_infrastructure}]}). - -rabbit_boot_step({database, [{mfa, {rabbit_mnesia, init, []}}, {requires, file_handle_cache}, @@ -66,6 +60,13 @@ -rabbit_boot_step({external_infrastructure, [{description, "external infrastructure ready"}]}). +-rabbit_boot_step({rabbit_registry, + [{description, "plugin registry"}, + {mfa, {rabbit_sup, start_child, + [rabbit_registry]}}, + {requires, external_infrastructure}, + {enables, kernel_ready}]}). + -rabbit_boot_step({rabbit_log, [{description, "logging server"}, {mfa, {rabbit_sup, start_restartable_child, -- cgit v1.2.1