diff options
author | Matthew Sackman <matthew@rabbitmq.com> | 2011-08-17 16:20:32 +0100 |
---|---|---|
committer | Matthew Sackman <matthew@rabbitmq.com> | 2011-08-17 16:20:32 +0100 |
commit | 8e7dc82bd3c5b5677feb830cef86d8eaef253425 (patch) | |
tree | 07507fbb0550f4b5d396a59c6da52b87640b854e | |
parent | cbe09896ffd9c29b6df1d8f1f810193fdec32bf3 (diff) | |
parent | 8b3ff283afea04bd8ae1a53e941e10b759e955ee (diff) | |
download | rabbitmq-server-8e7dc82bd3c5b5677feb830cef86d8eaef253425.tar.gz |
Merging bug24230 to default
41 files changed, 1835 insertions, 798 deletions
@@ -20,6 +20,8 @@ MANPAGES=$(patsubst %.xml, %.gz, $(wildcard $(DOCS_DIR)/*.[0-9].xml)) WEB_MANPAGES=$(patsubst %.xml, %.man.xml, $(wildcard $(DOCS_DIR)/*.[0-9].xml) $(DOCS_DIR)/rabbitmq-service.xml) USAGES_XML=$(DOCS_DIR)/rabbitmqctl.1.xml USAGES_ERL=$(foreach XML, $(USAGES_XML), $(call usage_xml_to_erl, $(XML))) +QC_MODULES := rabbit_backing_queue_qc +QC_TRIALS ?= 100 ifeq ($(shell python -c 'import simplejson' 2>/dev/null && echo yes),yes) PYTHON=python @@ -45,8 +47,14 @@ ifndef USE_SPECS USE_SPECS:=$(shell erl -noshell -eval 'io:format([list_to_integer(X) || X <- string:tokens(erlang:system_info(version), ".")] >= [5,8,4]), halt().') endif +ifndef USE_PROPER_QC +# PropEr needs to be installed for property checking +# http://proper.softlab.ntua.gr/ +USE_PROPER_QC:=$(shell erl -noshell -eval 'io:format({module, proper} =:= code:ensure_loaded(proper)), halt().') +endif + #other args: +native +"{hipe,[o3,verbose]}" -Ddebug=true +debug_info +no_strict_record_tests -ERLC_OPTS=-I $(INCLUDE_DIR) -o $(EBIN_DIR) -Wall -v +debug_info $(if $(filter true,$(USE_SPECS)),-Duse_specs) +ERLC_OPTS=-I $(INCLUDE_DIR) -o $(EBIN_DIR) -Wall -v +debug_info $(call boolean_macro,$(USE_SPECS),use_specs) $(call boolean_macro,$(USE_PROPER_QC),use_proper_qc) VERSION=0.0.0 TARBALL_NAME=rabbitmq-server-$(VERSION) @@ -69,6 +77,10 @@ define usage_dep $(call usage_xml_to_erl, $(1)): $(1) $(DOCS_DIR)/usage.xsl endef +define boolean_macro +$(if $(filter true,$(1)),-D$(2)) +endef + ifneq "$(SBIN_DIR)" "" ifneq "$(TARGET_DIR)" "" SCRIPTS_REL_PATH=$(shell ./calculate-relative $(TARGET_DIR)/sbin $(SBIN_DIR)) @@ -165,6 +177,9 @@ run-tests: all OUT=$$(echo "rabbit_tests:all_tests()." | $(ERL_CALL)) ; \ echo $$OUT ; echo $$OUT | grep '^{ok, passed}$$' > /dev/null +run-qc: all + $(foreach MOD,$(QC_MODULES),./quickcheck $(RABBITMQ_NODENAME) $(MOD) $(QC_TRIALS)) + start-background-node: $(BASIC_SCRIPT_ENVIRONMENT_SETTINGS) \ RABBITMQ_NODE_ONLY=true \ @@ -223,7 +238,7 @@ srcdist: distclean chmod 0755 $(TARGET_SRC_DIR)/scripts/* (cd dist; tar -zcf $(TARBALL_NAME).tar.gz $(TARBALL_NAME)) - (cd dist; zip -r $(TARBALL_NAME).zip $(TARBALL_NAME)) + (cd dist; zip -q -r $(TARBALL_NAME).zip $(TARBALL_NAME)) rm -rf $(TARGET_SRC_DIR) distclean: clean @@ -314,3 +329,4 @@ ifneq "$(strip $(patsubst clean%,,$(patsubst %clean,,$(TESTABLEGOALS))))" "" -include $(DEPS_FILE) endif +.PHONY: run-qc diff --git a/docs/rabbitmqctl.1.xml b/docs/rabbitmqctl.1.xml index ee000215..ba87c836 100644 --- a/docs/rabbitmqctl.1.xml +++ b/docs/rabbitmqctl.1.xml @@ -860,6 +860,17 @@ <listitem><para>Bytes of memory consumed by the Erlang process associated with the queue, including stack, heap and internal structures.</para></listitem> </varlistentry> + <varlistentry> + <term>slave_pids</term> + <listitem><para>If the queue is mirrored, this gives the IDs of the current slaves.</para></listitem> + </varlistentry> + <varlistentry> + <term>synchronised_slave_pids</term> + <listitem><para>If the queue is mirrored, this gives the IDs of + the current slaves which are synchronised with the master - + i.e. those which could take over from the master without + message loss.</para></listitem> + </varlistentry> </variablelist> <para> If no <command>queueinfoitem</command>s are specified then queue name and depth are diff --git a/packaging/RPMS/Fedora/rabbitmq-server.spec b/packaging/RPMS/Fedora/rabbitmq-server.spec index ffc826eb..11f5f01c 100644 --- a/packaging/RPMS/Fedora/rabbitmq-server.spec +++ b/packaging/RPMS/Fedora/rabbitmq-server.spec @@ -120,6 +120,9 @@ done rm -rf %{buildroot} %changelog +* Mon Jun 27 2011 simon@rabbitmq.com 2.5.1-1 +- New Upstream Release + * Thu Jun 9 2011 jerryk@vmware.com 2.5.0-1 - New Upstream Release diff --git a/packaging/debs/Debian/Makefile b/packaging/debs/Debian/Makefile index 31979a8e..38c81134 100644 --- a/packaging/debs/Debian/Makefile +++ b/packaging/debs/Debian/Makefile @@ -19,7 +19,7 @@ all: package: clean cp $(TARBALL_DIR)/$(TARBALL) $(DEBIAN_ORIG_TARBALL) - tar -zxvf $(DEBIAN_ORIG_TARBALL) + tar -zxf $(DEBIAN_ORIG_TARBALL) cp -r debian $(UNPACKED_DIR) cp $(COMMON_DIR)/* $(UNPACKED_DIR)/debian/ # Debian and descendants differ from most other distros in that diff --git a/packaging/debs/Debian/debian/changelog b/packaging/debs/Debian/debian/changelog index 1cab4235..9063a6ed 100644 --- a/packaging/debs/Debian/debian/changelog +++ b/packaging/debs/Debian/debian/changelog @@ -1,3 +1,9 @@ +rabbitmq-server (2.5.1-1) lucid; urgency=low + + * New Upstream Release + + -- Simon MacMullen <simon@rabbitmq.com> Mon, 27 Jun 2011 11:21:49 +0100 + rabbitmq-server (2.5.0-1) lucid; urgency=low * New Upstream Release diff --git a/packaging/generic-unix/Makefile b/packaging/generic-unix/Makefile index c4e01f4a..b5c342aa 100644 --- a/packaging/generic-unix/Makefile +++ b/packaging/generic-unix/Makefile @@ -4,7 +4,7 @@ TARGET_DIR=rabbitmq_server-$(VERSION) TARGET_TARBALL=rabbitmq-server-generic-unix-$(VERSION) dist: - tar -zxvf ../../dist/$(SOURCE_DIR).tar.gz + tar -zxf ../../dist/$(SOURCE_DIR).tar.gz $(MAKE) -C $(SOURCE_DIR) \ TARGET_DIR=`pwd`/$(TARGET_DIR) \ diff --git a/packaging/windows-exe/Makefile b/packaging/windows-exe/Makefile index 59803f9c..ab50e30b 100644 --- a/packaging/windows-exe/Makefile +++ b/packaging/windows-exe/Makefile @@ -2,7 +2,7 @@ VERSION=0.0.0 ZIP=../windows/rabbitmq-server-windows-$(VERSION) dist: rabbitmq-$(VERSION).nsi rabbitmq_server-$(VERSION) - makensis rabbitmq-$(VERSION).nsi + makensis -V2 rabbitmq-$(VERSION).nsi rabbitmq-$(VERSION).nsi: rabbitmq_nsi.in sed \ @@ -10,7 +10,7 @@ rabbitmq-$(VERSION).nsi: rabbitmq_nsi.in $< > $@ rabbitmq_server-$(VERSION): - unzip $(ZIP) + unzip -q $(ZIP) clean: rm -rf rabbitmq-*.nsi rabbitmq_server-* rabbitmq-server-*.exe diff --git a/packaging/windows-exe/rabbitmq_nsi.in b/packaging/windows-exe/rabbitmq_nsi.in index 1ed4064e..27e4e1dc 100644 --- a/packaging/windows-exe/rabbitmq_nsi.in +++ b/packaging/windows-exe/rabbitmq_nsi.in @@ -113,17 +113,17 @@ Section "Start Menu" RabbitStartMenu CreateDirectory "$APPDATA\RabbitMQ\db" CreateDirectory "$SMPROGRAMS\RabbitMQ Server" - CreateShortCut "$SMPROGRAMS\RabbitMQ Server\Uninstall.lnk" "$INSTDIR\uninstall.exe" "" "$INSTDIR\uninstall.exe" 0 - CreateShortCut "$SMPROGRAMS\RabbitMQ Server\Plugins Directory.lnk" "$INSTDIR\rabbitmq_server-%%VERSION%%\plugins" - CreateShortCut "$SMPROGRAMS\RabbitMQ Server\Log Directory.lnk" "$APPDATA\RabbitMQ\log" - CreateShortCut "$SMPROGRAMS\RabbitMQ Server\Database Directory.lnk" "$APPDATA\RabbitMQ\db" - CreateShortCut "$SMPROGRAMS\RabbitMQ Server\(Re)Install Service.lnk" "$INSTDIR\rabbitmq_server-%%VERSION%%\sbin\rabbitmq-service.bat" "install" "$INSTDIR\rabbitmq.ico" - CreateShortCut "$SMPROGRAMS\RabbitMQ Server\Remove Service.lnk" "$INSTDIR\rabbitmq_server-%%VERSION%%\sbin\rabbitmq-service.bat" "remove" "$INSTDIR\rabbitmq.ico" - CreateShortCut "$SMPROGRAMS\RabbitMQ Server\Start Service.lnk" "$INSTDIR\rabbitmq_server-%%VERSION%%\sbin\rabbitmq-service.bat" "start" "$INSTDIR\rabbitmq.ico" - CreateShortCut "$SMPROGRAMS\RabbitMQ Server\Stop Service.lnk" "$INSTDIR\rabbitmq_server-%%VERSION%%\sbin\rabbitmq-service.bat" "stop" "$INSTDIR\rabbitmq.ico" + CreateShortCut "$SMPROGRAMS\RabbitMQ Server\Uninstall RabbitMQ.lnk" "$INSTDIR\uninstall.exe" "" "$INSTDIR\uninstall.exe" 0 + CreateShortCut "$SMPROGRAMS\RabbitMQ Server\RabbitMQ Plugins.lnk" "$INSTDIR\rabbitmq_server-%%VERSION%%\plugins" + CreateShortCut "$SMPROGRAMS\RabbitMQ Server\RabbitMQ Logs.lnk" "$APPDATA\RabbitMQ\log" + CreateShortCut "$SMPROGRAMS\RabbitMQ Server\RabbitMQ Database Directory.lnk" "$APPDATA\RabbitMQ\db" + CreateShortCut "$SMPROGRAMS\RabbitMQ Server\RabbitMQ Service - (re)install.lnk" "$INSTDIR\rabbitmq_server-%%VERSION%%\sbin\rabbitmq-service.bat" "install" "$INSTDIR\rabbitmq.ico" + CreateShortCut "$SMPROGRAMS\RabbitMQ Server\RabbitMQ Service - remove.lnk" "$INSTDIR\rabbitmq_server-%%VERSION%%\sbin\rabbitmq-service.bat" "remove" "$INSTDIR\rabbitmq.ico" + CreateShortCut "$SMPROGRAMS\RabbitMQ Server\RabbitMQ Service - start.lnk" "$INSTDIR\rabbitmq_server-%%VERSION%%\sbin\rabbitmq-service.bat" "start" "$INSTDIR\rabbitmq.ico" + CreateShortCut "$SMPROGRAMS\RabbitMQ Server\RabbitMQ Service - stop.lnk" "$INSTDIR\rabbitmq_server-%%VERSION%%\sbin\rabbitmq-service.bat" "stop" "$INSTDIR\rabbitmq.ico" SetOutPath "$INSTDIR\rabbitmq_server-%%VERSION%%\sbin" - CreateShortCut "$SMPROGRAMS\RabbitMQ Server\Command Prompt (sbin dir).lnk" "$WINDIR\system32\cmd.exe" "" "$WINDIR\system32\cmd.exe" + CreateShortCut "$SMPROGRAMS\RabbitMQ Server\RabbitMQ Command Prompt (sbin dir).lnk" "$WINDIR\system32\cmd.exe" "" "$WINDIR\system32\cmd.exe" SetOutPath $INSTDIR SectionEnd diff --git a/packaging/windows/Makefile b/packaging/windows/Makefile index dacfa620..a0be8d89 100644 --- a/packaging/windows/Makefile +++ b/packaging/windows/Makefile @@ -4,7 +4,7 @@ TARGET_DIR=rabbitmq_server-$(VERSION) TARGET_ZIP=rabbitmq-server-windows-$(VERSION) dist: - tar -zxvf ../../dist/$(SOURCE_DIR).tar.gz + tar -zxf ../../dist/$(SOURCE_DIR).tar.gz $(MAKE) -C $(SOURCE_DIR) mkdir $(SOURCE_DIR)/sbin @@ -24,7 +24,7 @@ dist: elinks -dump -no-references -no-numbering rabbitmq-service.html \ > $(TARGET_DIR)/readme-service.txt todos $(TARGET_DIR)/readme-service.txt - zip -r $(TARGET_ZIP).zip $(TARGET_DIR) + zip -q -r $(TARGET_ZIP).zip $(TARGET_DIR) rm -rf $(TARGET_DIR) rabbitmq-service.html clean: clean_partial diff --git a/quickcheck b/quickcheck new file mode 100755 index 00000000..a36cf3ed --- /dev/null +++ b/quickcheck @@ -0,0 +1,36 @@ +#!/usr/bin/env escript +%% -*- erlang -*- +%%! -sname quickcheck +-mode(compile). + +%% A helper to test quickcheck properties on a running broker +%% NodeStr is a local broker node name +%% ModStr is the module containing quickcheck properties +%% The number of trials is optional +main([NodeStr, ModStr | TrialsStr]) -> + {ok, Hostname} = inet:gethostname(), + Node = list_to_atom(NodeStr ++ "@" ++ Hostname), + Mod = list_to_atom(ModStr), + Trials = lists:map(fun erlang:list_to_integer/1, TrialsStr), + case rpc:call(Node, code, ensure_loaded, [proper]) of + {module, proper} -> + case rpc:call(Node, proper, module, [Mod] ++ Trials) of + [] -> ok; + _ -> quit(1) + end; + {badrpc, Reason} -> + io:format("Could not contact node ~p: ~p.~n", [Node, Reason]), + quit(2); + {error,nofile} -> + io:format("Module PropEr was not found on node ~p~n", [Node]), + quit(2) + end; +main([]) -> + io:format("This script requires a node name and a module.~n"). + +quit(Status) -> + case os:type() of + {unix, _} -> halt(Status); + {win32, _} -> init:stop(Status) + end. + diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl index 61b08d49..3c2111dc 100644 --- a/src/file_handle_cache.erl +++ b/src/file_handle_cache.erl @@ -44,7 +44,6 @@ %% 4) You can find out what your 'real' offset is, and what your %% 'virtual' offset is (i.e. where the hdl really is, and where it %% would be after the write buffer is written out). -%% 5) You can find out what the offset was when you last sync'd. %% %% There is also a server component which serves to limit the number %% of open file descriptors. This is a hard limit: the server @@ -144,8 +143,8 @@ -export([register_callback/3]). -export([open/3, close/1, read/2, append/2, sync/1, position/2, truncate/1, - last_sync_offset/1, current_virtual_offset/1, current_raw_offset/1, - flush/1, copy/3, set_maximum_since_use/1, delete/1, clear/1]). + current_virtual_offset/1, current_raw_offset/1, flush/1, copy/3, + set_maximum_since_use/1, delete/1, clear/1]). -export([obtain/0, transfer/1, set_limit/1, get_limit/0, info_keys/0, info/0, info/1]). -export([ulimit/0]). @@ -172,7 +171,6 @@ -record(handle, { hdl, offset, - trusted_offset, is_dirty, write_buffer_size, write_buffer_size_limit, @@ -240,7 +238,6 @@ -spec(sync/1 :: (ref()) -> ok_or_error()). -spec(position/2 :: (ref(), position()) -> val_or_error(offset())). -spec(truncate/1 :: (ref()) -> ok_or_error()). --spec(last_sync_offset/1 :: (ref()) -> val_or_error(offset())). -spec(current_virtual_offset/1 :: (ref()) -> val_or_error(offset())). -spec(current_raw_offset/1 :: (ref()) -> val_or_error(offset())). -spec(flush/1 :: (ref()) -> ok_or_error()). @@ -365,11 +362,10 @@ sync(Ref) -> [Ref], fun ([#handle { is_dirty = false, write_buffer = [] }]) -> ok; - ([Handle = #handle { hdl = Hdl, offset = Offset, + ([Handle = #handle { hdl = Hdl, is_dirty = true, write_buffer = [] }]) -> case file:sync(Hdl) of - ok -> {ok, [Handle #handle { trusted_offset = Offset, - is_dirty = false }]}; + ok -> {ok, [Handle #handle { is_dirty = false }]}; Error -> {Error, [Handle]} end end). @@ -384,21 +380,13 @@ position(Ref, NewOffset) -> truncate(Ref) -> with_flushed_handles( [Ref], - fun ([Handle1 = #handle { hdl = Hdl, offset = Offset, - trusted_offset = TOffset }]) -> + fun ([Handle1 = #handle { hdl = Hdl }]) -> case file:truncate(Hdl) of - ok -> TOffset1 = lists:min([Offset, TOffset]), - {ok, [Handle1 #handle { trusted_offset = TOffset1, - at_eof = true }]}; + ok -> {ok, [Handle1 #handle { at_eof = true }]}; Error -> {Error, [Handle1]} end end). -last_sync_offset(Ref) -> - with_handles([Ref], fun ([#handle { trusted_offset = TOffset }]) -> - {ok, TOffset} - end). - current_virtual_offset(Ref) -> with_handles([Ref], fun ([#handle { at_eof = true, is_write = true, offset = Offset, @@ -456,8 +444,7 @@ clear(Ref) -> write_buffer_size = 0 }) of {{ok, 0}, Handle1 = #handle { hdl = Hdl }} -> case file:truncate(Hdl) of - ok -> {ok, [Handle1 #handle {trusted_offset = 0, - at_eof = true }]}; + ok -> {ok, [Handle1 #handle { at_eof = true }]}; Error -> {Error, [Handle1]} end; {{error, _} = Error, Handle1} -> @@ -585,14 +572,13 @@ reopen([{Ref, NewOrReopen, Handle = #handle { hdl = closed, end) of {ok, Hdl} -> Now = now(), - {{ok, Offset1}, Handle1} = + {{ok, _Offset}, Handle1} = maybe_seek(Offset, Handle #handle { hdl = Hdl, offset = 0, last_used_at = Now }), - Handle2 = Handle1 #handle { trusted_offset = Offset1 }, - put({Ref, fhc_handle}, Handle2), + put({Ref, fhc_handle}, Handle1), reopen(RefNewOrReopenHdls, gb_trees:insert(Now, Ref, Tree), - [{Ref, Handle2} | RefHdls]); + [{Ref, Handle1} | RefHdls]); Error -> %% NB: none of the handles in ToOpen are in the age tree Oldest = oldest(Tree, fun () -> undefined end), @@ -677,7 +663,6 @@ new_closed_handle(Path, Mode, Options) -> Ref = make_ref(), put({Ref, fhc_handle}, #handle { hdl = closed, offset = 0, - trusted_offset = 0, is_dirty = false, write_buffer_size = 0, write_buffer_size_limit = WriteBufferSize, @@ -705,7 +690,6 @@ soft_close(Handle = #handle { hdl = closed }) -> soft_close(Handle) -> case write_buffer(Handle) of {ok, #handle { hdl = Hdl, - offset = Offset, is_dirty = IsDirty, last_used_at = Then } = Handle1 } -> ok = case IsDirty of @@ -715,7 +699,6 @@ soft_close(Handle) -> ok = file:close(Hdl), age_tree_delete(Then), {ok, Handle1 #handle { hdl = closed, - trusted_offset = Offset, is_dirty = false, last_used_at = undefined }}; {_Error, _Handle} = Result -> @@ -925,10 +908,10 @@ handle_cast({transfer, FromPid, ToPid}, State) -> ok = track_client(ToPid, State#fhc_state.clients), {noreply, process_pending( update_counts(obtain, ToPid, +1, - update_counts(obtain, FromPid, -1, State)))}; + update_counts(obtain, FromPid, -1, State)))}. -handle_cast(check_counts, State) -> - {noreply, maybe_reduce(State #fhc_state { timer_ref = undefined })}. +handle_info(check_counts, State) -> + {noreply, maybe_reduce(State #fhc_state { timer_ref = undefined })}; handle_info({'DOWN', _MRef, process, Pid, _Reason}, State = #fhc_state { elders = Elders, @@ -1133,9 +1116,9 @@ reduce(State = #fhc_state { open_pending = OpenPending, end end, case TRef of - undefined -> {ok, TRef1} = timer:apply_after( - ?FILE_HANDLES_CHECK_INTERVAL, - gen_server, cast, [?SERVER, check_counts]), + undefined -> TRef1 = erlang:send_after( + ?FILE_HANDLES_CHECK_INTERVAL, ?SERVER, + check_counts), State #fhc_state { timer_ref = TRef1 }; _ -> State end. diff --git a/src/gen_server2.erl b/src/gen_server2.erl index 43e0a8f5..35258139 100644 --- a/src/gen_server2.erl +++ b/src/gen_server2.erl @@ -67,6 +67,11 @@ %% module. Note there is no form also encompassing a reply, thus if %% you wish to reply in handle_call/3 and change the callback module, %% you need to use gen_server2:reply/2 to issue the reply manually. +%% +%% 8) The callback module can optionally implement +%% format_message_queue/2 which is the equivalent of format_status/2 +%% but where the second argument is specifically the priority_queue +%% which contains the prioritised message_queue. %% All modifications are (C) 2009-2011 VMware, Inc. @@ -593,41 +598,35 @@ adjust_timeout_state(SleptAt, AwokeAt, {backoff, CurrentTO, MinimumTO, CurrentTO1 = Base + Extra, {backoff, CurrentTO1, MinimumTO, DesiredHibPeriod, RandomState1}. -in({'$gen_cast', Msg}, GS2State = #gs2_state { prioritise_cast = PC, - queue = Queue }) -> - GS2State #gs2_state { queue = priority_queue:in( - {'$gen_cast', Msg}, - PC(Msg, GS2State), Queue) }; -in({'$gen_call', From, Msg}, GS2State = #gs2_state { prioritise_call = PC, - queue = Queue }) -> - GS2State #gs2_state { queue = priority_queue:in( - {'$gen_call', From, Msg}, - PC(Msg, From, GS2State), Queue) }; -in(Input, GS2State = #gs2_state { prioritise_info = PI, queue = Queue }) -> - GS2State #gs2_state { queue = priority_queue:in( - Input, PI(Input, GS2State), Queue) }. - -process_msg(Msg, - GS2State = #gs2_state { parent = Parent, - name = Name, - debug = Debug }) -> - case Msg of - {system, From, Req} -> - sys:handle_system_msg( - Req, From, Parent, ?MODULE, Debug, - GS2State); - %% gen_server puts Hib on the end as the 7th arg, but that - %% version of the function seems not to be documented so - %% leaving out for now. - {'EXIT', Parent, Reason} -> - terminate(Reason, Msg, GS2State); - _Msg when Debug =:= [] -> - handle_msg(Msg, GS2State); - _Msg -> - Debug1 = sys:handle_debug(Debug, fun print_event/3, - Name, {in, Msg}), - handle_msg(Msg, GS2State #gs2_state { debug = Debug1 }) - end. +in({'$gen_cast', Msg} = Input, + GS2State = #gs2_state { prioritise_cast = PC }) -> + in(Input, PC(Msg, GS2State), GS2State); +in({'$gen_call', From, Msg} = Input, + GS2State = #gs2_state { prioritise_call = PC }) -> + in(Input, PC(Msg, From, GS2State), GS2State); +in({'EXIT', Parent, _R} = Input, GS2State = #gs2_state { parent = Parent }) -> + in(Input, infinity, GS2State); +in({system, _From, _Req} = Input, GS2State) -> + in(Input, infinity, GS2State); +in(Input, GS2State = #gs2_state { prioritise_info = PI }) -> + in(Input, PI(Input, GS2State), GS2State). + +in(Input, Priority, GS2State = #gs2_state { queue = Queue }) -> + GS2State # gs2_state { queue = priority_queue:in(Input, Priority, Queue) }. + +process_msg({system, From, Req}, + GS2State = #gs2_state { parent = Parent, debug = Debug }) -> + sys:handle_system_msg(Req, From, Parent, ?MODULE, Debug, GS2State); +process_msg({'EXIT', Parent, Reason} = Msg, + GS2State = #gs2_state { parent = Parent }) -> + %% gen_server puts Hib on the end as the 7th arg, but that version + %% of the fun seems not to be documented so leaving out for now. + terminate(Reason, Msg, GS2State); +process_msg(Msg, GS2State = #gs2_state { debug = [] }) -> + handle_msg(Msg, GS2State); +process_msg(Msg, GS2State = #gs2_state { name = Name, debug = Debug }) -> + Debug1 = sys:handle_debug(Debug, fun print_event/3, Name, {in, Msg}), + handle_msg(Msg, GS2State #gs2_state { debug = Debug1 }). %%% --------------------------------------------------- %%% Send/recive functions @@ -1161,17 +1160,22 @@ format_status(Opt, StatusData) -> end, Header = lists:concat(["Status for generic server ", NameTag]), Log = sys:get_debug(log, Debug, []), - Specfic = - case erlang:function_exported(Mod, format_status, 2) of - true -> case catch Mod:format_status(Opt, [PDict, State]) of - {'EXIT', _} -> [{data, [{"State", State}]}]; - Else -> Else - end; - _ -> [{data, [{"State", State}]}] - end, + Specfic = callback(Mod, format_status, [Opt, [PDict, State]], + fun () -> [{data, [{"State", State}]}] end), + Messages = callback(Mod, format_message_queue, [Opt, Queue], + fun () -> priority_queue:to_list(Queue) end), [{header, Header}, {data, [{"Status", SysState}, {"Parent", Parent}, {"Logged events", Log}, - {"Queued messages", priority_queue:to_list(Queue)}]} | + {"Queued messages", Messages}]} | Specfic]. + +callback(Mod, FunName, Args, DefaultThunk) -> + case erlang:function_exported(Mod, FunName, length(Args)) of + true -> case catch apply(Mod, FunName, Args) of + {'EXIT', _} -> DefaultThunk(); + Success -> Success + end; + false -> DefaultThunk() + end. @@ -376,11 +376,11 @@ confirmed_broadcast/2, group_members/1]). -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, - code_change/3, prioritise_cast/2, prioritise_info/2]). + code_change/3, prioritise_info/2]). -export([behaviour_info/1]). --export([table_definitions/0, flush/1]). +-export([table_definitions/0]). -define(GROUP_TABLE, gm_group). -define(HIBERNATE_AFTER_MIN, 1000). @@ -511,9 +511,6 @@ confirmed_broadcast(Server, Msg) -> group_members(Server) -> gen_server2:call(Server, group_members, infinity). -flush(Server) -> - gen_server2:cast(Server, flush). - init([GroupName, Module, Args]) -> {MegaSecs, Secs, MicroSecs} = now(), @@ -629,12 +626,12 @@ handle_cast(join, State = #state { self = Self, {Module:joined(Args, all_known_members(View)), State1}); handle_cast(leave, State) -> - {stop, normal, State}; + {stop, normal, State}. -handle_cast(flush, State) -> - noreply( - flush_broadcast_buffer(State #state { broadcast_timer = undefined })). +handle_info(flush, State) -> + noreply( + flush_broadcast_buffer(State #state { broadcast_timer = undefined })); handle_info({'DOWN', MRef, process, _Pid, _Reason}, State = #state { self = Self, @@ -684,9 +681,7 @@ terminate(Reason, State = #state { module = Module, code_change(_OldVsn, State, _Extra) -> {ok, State}. -prioritise_cast(flush, _State) -> 1; -prioritise_cast(_ , _State) -> 0. - +prioritise_info(flush, _State) -> 1; prioritise_info({'DOWN', _MRef, process, _Pid, _Reason}, _State) -> 1; prioritise_info(_ , _State) -> 0. @@ -808,10 +803,10 @@ ensure_broadcast_timer(State = #state { broadcast_buffer = [], State; ensure_broadcast_timer(State = #state { broadcast_buffer = [], broadcast_timer = TRef }) -> - timer:cancel(TRef), + erlang:cancel_timer(TRef), State #state { broadcast_timer = undefined }; ensure_broadcast_timer(State = #state { broadcast_timer = undefined }) -> - {ok, TRef} = timer:apply_after(?BROADCAST_TIMER, ?MODULE, flush, [self()]), + TRef = erlang:send_after(?BROADCAST_TIMER, self(), flush), State #state { broadcast_timer = TRef }; ensure_broadcast_timer(State) -> State. diff --git a/src/priority_queue.erl b/src/priority_queue.erl index 4a94b24b..4fc8b469 100644 --- a/src/priority_queue.erl +++ b/src/priority_queue.erl @@ -47,7 +47,10 @@ -ifdef(use_specs). --type(priority() :: integer()). +-export_type([q/0]). + +-type(q() :: pqueue()). +-type(priority() :: integer() | 'infinity'). -type(squeue() :: {queue, [any()], [any()]}). -type(pqueue() :: squeue() | {pqueue, [{priority(), squeue()}]}). @@ -71,8 +74,9 @@ new() -> is_queue({queue, R, F}) when is_list(R), is_list(F) -> true; is_queue({pqueue, Queues}) when is_list(Queues) -> - lists:all(fun ({P, Q}) -> is_integer(P) andalso is_queue(Q) end, - Queues); + lists:all(fun ({infinity, Q}) -> is_queue(Q); + ({P, Q}) -> is_integer(P) andalso is_queue(Q) + end, Queues); is_queue(_) -> false. @@ -89,7 +93,8 @@ len({pqueue, Queues}) -> to_list({queue, In, Out}) when is_list(In), is_list(Out) -> [{0, V} || V <- Out ++ lists:reverse(In, [])]; to_list({pqueue, Queues}) -> - [{-P, V} || {P, Q} <- Queues, {0, V} <- to_list(Q)]. + [{maybe_negate_priority(P), V} || {P, Q} <- Queues, + {0, V} <- to_list(Q)]. in(Item, Q) -> in(Item, 0, Q). @@ -103,12 +108,20 @@ in(X, Priority, _Q = {queue, [], []}) -> in(X, Priority, Q = {queue, _, _}) -> in(X, Priority, {pqueue, [{0, Q}]}); in(X, Priority, {pqueue, Queues}) -> - P = -Priority, + P = maybe_negate_priority(Priority), {pqueue, case lists:keysearch(P, 1, Queues) of {value, {_, Q}} -> lists:keyreplace(P, 1, Queues, {P, in(X, Q)}); + false when P == infinity -> + [{P, {queue, [X], []}} | Queues]; false -> - lists:keysort(1, [{P, {queue, [X], []}} | Queues]) + case Queues of + [{infinity, InfQueue} | Queues1] -> + [{infinity, InfQueue} | + lists:keysort(1, [{P, {queue, [X], []}} | Queues1])]; + _ -> + lists:keysort(1, [{P, {queue, [X], []}} | Queues]) + end end}. out({queue, [], []} = Q) -> @@ -141,7 +154,8 @@ join({queue, [], []}, B) -> join({queue, AIn, AOut}, {queue, BIn, BOut}) -> {queue, BIn, AOut ++ lists:reverse(AIn, BOut)}; join(A = {queue, _, _}, {pqueue, BPQ}) -> - {Pre, Post} = lists:splitwith(fun ({P, _}) -> P < 0 end, BPQ), + {Pre, Post} = + lists:splitwith(fun ({P, _}) -> P < 0 orelse P == infinity end, BPQ), Post1 = case Post of [] -> [ {0, A} ]; [ {0, ZeroQueue} | Rest ] -> [ {0, join(A, ZeroQueue)} | Rest ]; @@ -149,7 +163,8 @@ join(A = {queue, _, _}, {pqueue, BPQ}) -> end, {pqueue, Pre ++ Post1}; join({pqueue, APQ}, B = {queue, _, _}) -> - {Pre, Post} = lists:splitwith(fun ({P, _}) -> P < 0 end, APQ), + {Pre, Post} = + lists:splitwith(fun ({P, _}) -> P < 0 orelse P == infinity end, APQ), Post1 = case Post of [] -> [ {0, B} ]; [ {0, ZeroQueue} | Rest ] -> [ {0, join(ZeroQueue, B)} | Rest ]; @@ -165,7 +180,7 @@ merge(APQ, [], Acc) -> lists:reverse(Acc, APQ); merge([{P, A}|As], [{P, B}|Bs], Acc) -> merge(As, Bs, [ {P, join(A, B)} | Acc ]); -merge([{PA, A}|As], Bs = [{PB, _}|_], Acc) when PA < PB -> +merge([{PA, A}|As], Bs = [{PB, _}|_], Acc) when PA < PB orelse PA == infinity -> merge(As, Bs, [ {PA, A} | Acc ]); merge(As = [{_, _}|_], [{PB, B}|Bs], Acc) -> merge(As, Bs, [ {PB, B} | Acc ]). @@ -174,3 +189,6 @@ r2f([]) -> {queue, [], []}; r2f([_] = R) -> {queue, [], R}; r2f([X,Y]) -> {queue, [X], [Y]}; r2f([X,Y|R]) -> {queue, [X,Y], lists:reverse(R, [])}. + +maybe_negate_priority(infinity) -> infinity; +maybe_negate_priority(P) -> -P. diff --git a/src/rabbit.erl b/src/rabbit.erl index 6ef816c0..8cae7fde 100644 --- a/src/rabbit.erl +++ b/src/rabbit.erl @@ -19,7 +19,7 @@ -behaviour(application). -export([prepare/0, start/0, stop/0, stop_and_halt/0, status/0, environment/0, - rotate_logs/1]). + rotate_logs/1, force_event_refresh/0]). -export([start/2, stop/1]). @@ -134,6 +134,18 @@ {requires, empty_db_check}, {enables, routing_ready}]}). +-rabbit_boot_step({mirror_queue_slave_sup, + [{description, "mirror queue slave sup"}, + {mfa, {rabbit_mirror_queue_slave_sup, start, []}}, + {requires, recovery}, + {enables, routing_ready}]}). + +-rabbit_boot_step({mirrored_queues, + [{description, "adding mirrors to queues"}, + {mfa, {rabbit_mirror_queue_misc, on_node_up, []}}, + {requires, mirror_queue_slave_sup}, + {enables, routing_ready}]}). + -rabbit_boot_step({routing_ready, [{description, "message delivery logic ready"}, {requires, core_initialized}]}). @@ -175,8 +187,9 @@ -spec(prepare/0 :: () -> 'ok'). -spec(start/0 :: () -> 'ok'). -spec(stop/0 :: () -> 'ok'). --spec(stop_and_halt/0 :: () -> 'ok'). +-spec(stop_and_halt/0 :: () -> no_return()). -spec(rotate_logs/1 :: (file_suffix()) -> rabbit_types:ok_or_error(any())). +-spec(force_event_refresh/0 :: () -> 'ok'). -spec(status/0 :: () -> [{pid, integer()} | {running_applications, [{atom(), string(), string()}]} | @@ -500,6 +513,12 @@ log_rotation_result(ok, {error, SaslLogError}) -> log_rotation_result(ok, ok) -> ok. +force_event_refresh() -> + rabbit_direct:force_event_refresh(), + rabbit_networking:force_connection_event_refresh(), + rabbit_channel:force_event_refresh(), + rabbit_amqqueue:force_event_refresh(). + %%--------------------------------------------------------------------------- %% misc diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl index e9d01d12..88ff26cc 100644 --- a/src/rabbit_amqqueue.erl +++ b/src/rabbit_amqqueue.erl @@ -21,7 +21,8 @@ -export([lookup/1, with/2, with_or_die/2, assert_equivalence/5, check_exclusive_access/2, with_exclusive_access_or_die/3, stat/1, deliver/2, requeue/3, ack/3, reject/4]). --export([list/1, info_keys/0, info/1, info/2, info_all/1, info_all/2]). +-export([list/0, list/1, info_keys/0, info/1, info/2, info_all/1, info_all/2]). +-export([force_event_refresh/0]). -export([consumers/1, consumers_all/1, consumer_info_keys/0]). -export([basic_get/3, basic_consume/7, basic_cancel/4]). -export([notify_sent/2, unblock/2, flush_all/2]). @@ -32,9 +33,7 @@ %% internal -export([internal_declare/2, internal_delete/1, run_backing_queue/3, - sync_timeout/1, update_ram_duration/1, set_ram_duration_target/2, - set_maximum_since_use/2, maybe_expire/1, drop_expired/1, - emit_stats/1]). + set_ram_duration_target/2, set_maximum_since_use/2]). -include("rabbit.hrl"). -include_lib("stdlib/include/qlc.hrl"). @@ -81,6 +80,7 @@ -> 'ok' | rabbit_types:channel_exit()). -spec(with_exclusive_access_or_die/3 :: (name(), pid(), qfun(A)) -> A | rabbit_types:channel_exit()). +-spec(list/0 :: () -> [rabbit_types:amqqueue()]). -spec(list/1 :: (rabbit_types:vhost()) -> [rabbit_types:amqqueue()]). -spec(info_keys/0 :: () -> rabbit_types:info_keys()). -spec(info/1 :: (rabbit_types:amqqueue()) -> rabbit_types:infos()). @@ -90,6 +90,7 @@ -spec(info_all/1 :: (rabbit_types:vhost()) -> [rabbit_types:infos()]). -spec(info_all/2 :: (rabbit_types:vhost(), rabbit_types:info_keys()) -> [rabbit_types:infos()]). +-spec(force_event_refresh/0 :: () -> 'ok'). -spec(consumers/1 :: (rabbit_types:amqqueue()) -> [{pid(), rabbit_types:ctag(), boolean()}]). @@ -100,7 +101,6 @@ -spec(stat/1 :: (rabbit_types:amqqueue()) -> {'ok', non_neg_integer(), non_neg_integer()}). --spec(emit_stats/1 :: (rabbit_types:amqqueue()) -> 'ok'). -spec(delete_immediately/1 :: (rabbit_types:amqqueue()) -> 'ok'). -spec(delete/3 :: (rabbit_types:amqqueue(), 'false', 'false') @@ -119,12 +119,13 @@ -spec(ack/3 :: (pid(), [msg_id()], pid()) -> 'ok'). -spec(reject/4 :: (pid(), [msg_id()], boolean(), pid()) -> 'ok'). -spec(notify_down_all/2 :: ([pid()], pid()) -> ok_or_errors()). --spec(limit_all/3 :: ([pid()], pid(), pid() | 'undefined') -> ok_or_errors()). +-spec(limit_all/3 :: ([pid()], pid(), rabbit_limiter:token()) -> + ok_or_errors()). -spec(basic_get/3 :: (rabbit_types:amqqueue(), pid(), boolean()) -> {'ok', non_neg_integer(), qmsg()} | 'empty'). -spec(basic_consume/7 :: - (rabbit_types:amqqueue(), boolean(), pid(), pid() | 'undefined', - rabbit_types:ctag(), boolean(), any()) + (rabbit_types:amqqueue(), boolean(), pid(), + rabbit_limiter:token(), rabbit_types:ctag(), boolean(), any()) -> rabbit_types:ok_or_error('exclusive_consume_unavailable')). -spec(basic_cancel/4 :: (rabbit_types:amqqueue(), pid(), rabbit_types:ctag(), any()) -> 'ok'). @@ -142,11 +143,8 @@ -spec(run_backing_queue/3 :: (pid(), atom(), (fun ((atom(), A) -> {[rabbit_types:msg_id()], A}))) -> 'ok'). --spec(sync_timeout/1 :: (pid()) -> 'ok'). --spec(update_ram_duration/1 :: (pid()) -> 'ok'). -spec(set_ram_duration_target/2 :: (pid(), number() | 'infinity') -> 'ok'). -spec(set_maximum_since_use/2 :: (pid(), non_neg_integer()) -> 'ok'). --spec(maybe_expire/1 :: (pid()) -> 'ok'). -spec(on_node_down/1 :: (node()) -> 'ok'). -spec(pseudo_queue/2 :: (name(), pid()) -> rabbit_types:amqqueue()). @@ -230,7 +228,7 @@ internal_declare(Q = #amqqueue{name = QueueName}, false) -> end). store_queue(Q = #amqqueue{durable = true}) -> - ok = mnesia:write(rabbit_durable_queue, Q, write), + ok = mnesia:write(rabbit_durable_queue, Q#amqqueue{slave_pids = []}, write), ok = mnesia:write(rabbit_queue, Q, write), ok; store_queue(Q = #amqqueue{durable = false}) -> @@ -365,6 +363,9 @@ check_ha_policy_argument({longstr, Policy}, _Args) -> check_ha_policy_argument({Type, _}, _Args) -> {error, {unacceptable_type, Type}}. +list() -> + mnesia:dirty_match_object(rabbit_queue, #amqqueue{_ = '_'}). + list(VHostPath) -> mnesia:dirty_match_object( rabbit_queue, @@ -387,6 +388,10 @@ info_all(VHostPath) -> map(VHostPath, fun (Q) -> info(Q) end). info_all(VHostPath, Items) -> map(VHostPath, fun (Q) -> info(Q, Items) end). +force_event_refresh() -> + [gen_server2:cast(Q#amqqueue.pid, force_event_refresh) || Q <- list()], + ok. + consumers(#amqqueue{ pid = QPid }) -> delegate_call(QPid, consumers). @@ -405,9 +410,6 @@ consumers_all(VHostPath) -> stat(#amqqueue{pid = QPid}) -> delegate_call(QPid, stat). -emit_stats(#amqqueue{pid = QPid}) -> - delegate_cast(QPid, emit_stats). - delete_immediately(#amqqueue{ pid = QPid }) -> gen_server2:cast(QPid, delete_immediately). @@ -439,19 +441,17 @@ notify_down_all(QPids, ChPid) -> fun (QPid) -> gen_server2:call(QPid, {notify_down, ChPid}, infinity) end, QPids). -limit_all(QPids, ChPid, LimiterPid) -> +limit_all(QPids, ChPid, Limiter) -> delegate:invoke_no_result( - QPids, fun (QPid) -> - gen_server2:cast(QPid, {limit, ChPid, LimiterPid}) - end). + QPids, fun (QPid) -> gen_server2:cast(QPid, {limit, ChPid, Limiter}) end). basic_get(#amqqueue{pid = QPid}, ChPid, NoAck) -> delegate_call(QPid, {basic_get, ChPid, NoAck}). -basic_consume(#amqqueue{pid = QPid}, NoAck, ChPid, LimiterPid, +basic_consume(#amqqueue{pid = QPid}, NoAck, ChPid, Limiter, ConsumerTag, ExclusiveConsume, OkMsg) -> delegate_call(QPid, {basic_consume, NoAck, ChPid, - LimiterPid, ConsumerTag, ExclusiveConsume, OkMsg}). + Limiter, ConsumerTag, ExclusiveConsume, OkMsg}). basic_cancel(#amqqueue{pid = QPid}, ChPid, ConsumerTag, OkMsg) -> ok = delegate_call(QPid, {basic_cancel, ChPid, ConsumerTag, OkMsg}). @@ -486,24 +486,12 @@ internal_delete(QueueName) -> run_backing_queue(QPid, Mod, Fun) -> gen_server2:cast(QPid, {run_backing_queue, Mod, Fun}). -sync_timeout(QPid) -> - gen_server2:cast(QPid, sync_timeout). - -update_ram_duration(QPid) -> - gen_server2:cast(QPid, update_ram_duration). - set_ram_duration_target(QPid, Duration) -> gen_server2:cast(QPid, {set_ram_duration_target, Duration}). set_maximum_since_use(QPid, Age) -> gen_server2:cast(QPid, {set_maximum_since_use, Age}). -maybe_expire(QPid) -> - gen_server2:cast(QPid, maybe_expire). - -drop_expired(QPid) -> - gen_server2:cast(QPid, drop_expired). - on_node_down(Node) -> rabbit_misc:execute_mnesia_tx_with_tail( fun () -> Dels = qlc:e(qlc:q([delete_queue(QueueName) || diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl index fcd6cc24..11a95a62 100644 --- a/src/rabbit_amqqueue_process.erl +++ b/src/rabbit_amqqueue_process.erl @@ -31,7 +31,7 @@ -export([init/1, terminate/2, code_change/3, handle_call/3, handle_cast/2, handle_info/2, handle_pre_hibernate/1, prioritise_call/3, - prioritise_cast/2, prioritise_info/2]). + prioritise_cast/2, prioritise_info/2, format_message_queue/2]). -export([init_with_backing_queue_state/7]). @@ -58,7 +58,7 @@ %% These are held in our process dictionary -record(cr, {consumer_count, ch_pid, - limiter_pid, + limiter, monitor_ref, acktags, is_limit_active, @@ -73,8 +73,8 @@ messages, consumers, memory, - backing_queue_status, - slave_pids + slave_pids, + backing_queue_status ]). -define(CREATION_EVENT_KEYS, @@ -84,10 +84,12 @@ auto_delete, arguments, owner_pid, - mirror_nodes + slave_pids, + synchronised_slave_pids ]). --define(INFO_KEYS, ?CREATION_EVENT_KEYS ++ ?STATISTICS_KEYS -- [pid]). +-define(INFO_KEYS, + ?CREATION_EVENT_KEYS ++ ?STATISTICS_KEYS -- [pid, slave_pids]). %%---------------------------------------------------------------------------- @@ -149,11 +151,13 @@ terminate(shutdown = R, State = #q{backing_queue = BQ}) -> terminate_shutdown(fun (BQS) -> BQ:terminate(R, BQS) end, State); terminate({shutdown, _} = R, State = #q{backing_queue = BQ}) -> terminate_shutdown(fun (BQS) -> BQ:terminate(R, BQS) end, State); -terminate(Reason, State = #q{backing_queue = BQ}) -> +terminate(Reason, State = #q{q = #amqqueue{name = QName}, + backing_queue = BQ}) -> %% FIXME: How do we cancel active subscriptions? terminate_shutdown(fun (BQS) -> rabbit_event:notify( - queue_deleted, [{pid, self()}]), + queue_deleted, [{pid, self()}, + {name, QName}]), BQS1 = BQ:delete_and_terminate(Reason, BQS), %% don't care if the internal delete %% doesn't return 'ok'. @@ -249,8 +253,7 @@ backing_queue_module(#amqqueue{arguments = Args}) -> end. ensure_sync_timer(State = #q{sync_timer_ref = undefined}) -> - {ok, TRef} = timer:apply_after( - ?SYNC_INTERVAL, rabbit_amqqueue, sync_timeout, [self()]), + TRef = erlang:send_after(?SYNC_INTERVAL, self(), sync_timeout), State#q{sync_timer_ref = TRef}; ensure_sync_timer(State) -> State. @@ -258,14 +261,12 @@ ensure_sync_timer(State) -> stop_sync_timer(State = #q{sync_timer_ref = undefined}) -> State; stop_sync_timer(State = #q{sync_timer_ref = TRef}) -> - {ok, cancel} = timer:cancel(TRef), + erlang:cancel_timer(TRef), State#q{sync_timer_ref = undefined}. ensure_rate_timer(State = #q{rate_timer_ref = undefined}) -> - {ok, TRef} = timer:apply_after( - ?RAM_DURATION_UPDATE_INTERVAL, - rabbit_amqqueue, update_ram_duration, - [self()]), + TRef = erlang:send_after( + ?RAM_DURATION_UPDATE_INTERVAL, self(), update_ram_duration), State#q{rate_timer_ref = TRef}; ensure_rate_timer(State = #q{rate_timer_ref = just_measured}) -> State#q{rate_timer_ref = undefined}; @@ -277,13 +278,13 @@ stop_rate_timer(State = #q{rate_timer_ref = undefined}) -> stop_rate_timer(State = #q{rate_timer_ref = just_measured}) -> State#q{rate_timer_ref = undefined}; stop_rate_timer(State = #q{rate_timer_ref = TRef}) -> - {ok, cancel} = timer:cancel(TRef), + erlang:cancel_timer(TRef), State#q{rate_timer_ref = undefined}. stop_expiry_timer(State = #q{expiry_timer_ref = undefined}) -> State; stop_expiry_timer(State = #q{expiry_timer_ref = TRef}) -> - {ok, cancel} = timer:cancel(TRef), + erlang:cancel_timer(TRef), State#q{expiry_timer_ref = undefined}. %% We wish to expire only when there are no consumers *and* the expiry @@ -295,18 +296,16 @@ ensure_expiry_timer(State = #q{expires = Expires}) -> case is_unused(State) of true -> NewState = stop_expiry_timer(State), - {ok, TRef} = timer:apply_after( - Expires, rabbit_amqqueue, maybe_expire, [self()]), + TRef = erlang:send_after(Expires, self(), maybe_expire), NewState#q{expiry_timer_ref = TRef}; false -> State end. ensure_stats_timer(State = #q{stats_timer = StatsTimer, - q = Q}) -> + q = #amqqueue{pid = QPid}}) -> State#q{stats_timer = rabbit_event:ensure_stats_timer( - StatsTimer, - fun() -> rabbit_amqqueue:emit_stats(Q) end)}. + StatsTimer, QPid, emit_stats)}. assert_invariant(#q{active_consumers = AC, backing_queue = BQ, backing_queue_state = BQS}) -> @@ -327,6 +326,7 @@ ch_record(ChPid) -> monitor_ref = MonitorRef, acktags = sets:new(), is_limit_active = false, + limiter = rabbit_limiter:make_token(), unsent_message_count = 0}, put(Key, C), C; @@ -347,9 +347,9 @@ maybe_store_ch_record(C = #cr{consumer_count = ConsumerCount, end. erase_ch_record(#cr{ch_pid = ChPid, - limiter_pid = LimiterPid, + limiter = Limiter, monitor_ref = MonitorRef}) -> - ok = rabbit_limiter:unregister(LimiterPid, self()), + ok = rabbit_limiter:unregister(Limiter, self()), erlang:demonitor(MonitorRef), erase({ch, ChPid}), ok. @@ -374,12 +374,12 @@ deliver_msgs_to_consumers(Funs = {PredFun, DeliverFun}, FunAcc, {{value, QEntry = {ChPid, #consumer{tag = ConsumerTag, ack_required = AckRequired}}}, ActiveConsumersTail} -> - C = #cr{limiter_pid = LimiterPid, + C = #cr{limiter = Limiter, unsent_message_count = Count, acktags = ChAckTags} = ch_record(ChPid), IsMsgReady = PredFun(FunAcc, State), case (IsMsgReady andalso - rabbit_limiter:can_send( LimiterPid, self(), AckRequired )) of + rabbit_limiter:can_send(Limiter, self(), AckRequired)) of true -> {{Message, IsDelivered, AckTag}, FunAcc1, State1} = DeliverFun(AckRequired, FunAcc, State), @@ -700,8 +700,7 @@ ensure_ttl_timer(State = #q{backing_queue = BQ, when TTL =/= undefined -> case BQ:is_empty(BQS) of true -> State; - false -> TRef = timer:apply_after(TTL, rabbit_amqqueue, drop_expired, - [self()]), + false -> TRef = erlang:send_after(TTL, self(), drop_expired), State#q{ttl_timer_ref = TRef} end; ensure_ttl_timer(State) -> @@ -709,7 +708,40 @@ ensure_ttl_timer(State) -> now_micros() -> timer:now_diff(now(), {0,0,0}). -infos(Items, State) -> [{Item, i(Item, State)} || Item <- Items]. +infos(Items, State) -> + {Prefix, Items1} = + case lists:member(synchronised_slave_pids, Items) of + true -> Prefix1 = slaves_status(State), + case lists:member(slave_pids, Items) of + true -> {Prefix1, Items -- [slave_pids]}; + false -> {proplists:delete(slave_pids, Prefix1), Items} + end; + false -> {[], Items} + end, + Prefix ++ [{Item, i(Item, State)} + || Item <- (Items1 -- [synchronised_slave_pids])]. + +slaves_status(#q{q = #amqqueue{name = Name}}) -> + {ok, #amqqueue{mirror_nodes = MNodes, slave_pids = SPids}} = + rabbit_amqqueue:lookup(Name), + case MNodes of + undefined -> + [{slave_pids, ''}, {synchronised_slave_pids, ''}]; + _ -> + {Results, _Bad} = + delegate:invoke( + SPids, fun (Pid) -> rabbit_mirror_queue_slave:info(Pid) end), + {SPids1, SSPids} = + lists:foldl( + fun ({Pid, Infos}, {SPidsN, SSPidsN}) -> + {[Pid | SPidsN], + case proplists:get_bool(is_synchronised, Infos) of + true -> [Pid | SSPidsN]; + false -> SSPidsN + end} + end, {[], []}, Results), + [{slave_pids, SPids1}, {synchronised_slave_pids, SSPids}] + end. i(name, #q{q = #amqqueue{name = Name}}) -> Name; i(durable, #q{q = #amqqueue{durable = Durable}}) -> Durable; @@ -741,14 +773,15 @@ i(consumers, State) -> i(memory, _) -> {memory, M} = process_info(self(), memory), M; +i(slave_pids, #q{q = #amqqueue{name = Name}}) -> + {ok, #amqqueue{mirror_nodes = MNodes, + slave_pids = SPids}} = rabbit_amqqueue:lookup(Name), + case MNodes of + undefined -> []; + _ -> SPids + end; i(backing_queue_status, #q{backing_queue_state = BQS, backing_queue = BQ}) -> BQ:status(BQS); -i(slave_pids, #q{q = #amqqueue{name = Name}}) -> - {ok, #amqqueue{slave_pids = SPids}} = rabbit_amqqueue:lookup(Name), - SPids; -i(mirror_nodes, #q{q = #amqqueue{name = Name}}) -> - {ok, #amqqueue{mirror_nodes = MNodes}} = rabbit_amqqueue:lookup(Name), - MNodes; i(Item, _) -> throw({bad_argument, Item}). @@ -792,25 +825,27 @@ prioritise_call(Msg, _From, _State) -> prioritise_cast(Msg, _State) -> case Msg of - update_ram_duration -> 8; delete_immediately -> 8; {set_ram_duration_target, _Duration} -> 8; {set_maximum_since_use, _Age} -> 8; - maybe_expire -> 8; - drop_expired -> 8; - emit_stats -> 7; {ack, _AckTags, _ChPid} -> 7; {reject, _AckTags, _Requeue, _ChPid} -> 7; {notify_sent, _ChPid} -> 7; {unblock, _ChPid} -> 7; {run_backing_queue, _Mod, _Fun} -> 6; - sync_timeout -> 6; _ -> 0 end. -prioritise_info({'DOWN', _MonitorRef, process, DownPid, _Reason}, - #q{q = #amqqueue{exclusive_owner = DownPid}}) -> 8; -prioritise_info(_Msg, _State) -> 0. +prioritise_info(Msg, #q{q = #amqqueue{exclusive_owner = DownPid}}) -> + case Msg of + {'DOWN', _, process, DownPid, _} -> 8; + update_ram_duration -> 8; + maybe_expire -> 8; + drop_expired -> 8; + emit_stats -> 7; + sync_timeout -> 6; + _ -> 0 + end. handle_call({init, Recover}, From, State = #q{q = #amqqueue{exclusive_owner = none}}) -> @@ -904,7 +939,7 @@ handle_call({basic_get, ChPid, NoAck}, _From, reply({ok, Remaining, Msg}, State3) end; -handle_call({basic_consume, NoAck, ChPid, LimiterPid, +handle_call({basic_consume, NoAck, ChPid, Limiter, ConsumerTag, ExclusiveConsume, OkMsg}, _From, State = #q{exclusive_consumer = ExistingHolder}) -> case check_exclusive_access(ExistingHolder, ExclusiveConsume, @@ -915,10 +950,11 @@ handle_call({basic_consume, NoAck, ChPid, LimiterPid, C = #cr{consumer_count = ConsumerCount} = ch_record(ChPid), Consumer = #consumer{tag = ConsumerTag, ack_required = not NoAck}, - true = maybe_store_ch_record(C#cr{consumer_count = ConsumerCount +1, - limiter_pid = LimiterPid}), + true = maybe_store_ch_record( + C#cr{consumer_count = ConsumerCount +1, + limiter = Limiter}), ok = case ConsumerCount of - 0 -> rabbit_limiter:register(LimiterPid, self()); + 0 -> rabbit_limiter:register(Limiter, self()); _ -> ok end, ExclusiveConsumer = if ExclusiveConsume -> {ChPid, ConsumerTag}; @@ -951,12 +987,12 @@ handle_call({basic_cancel, ChPid, ConsumerTag, OkMsg}, _From, ok = maybe_send_reply(ChPid, OkMsg), reply(ok, State); C = #cr{consumer_count = ConsumerCount, - limiter_pid = LimiterPid} -> + limiter = Limiter} -> C1 = C#cr{consumer_count = ConsumerCount -1}, maybe_store_ch_record( case ConsumerCount of - 1 -> ok = rabbit_limiter:unregister(LimiterPid, self()), - C1#cr{limiter_pid = undefined}; + 1 -> ok = rabbit_limiter:unregister(Limiter, self()), + C1#cr{limiter = rabbit_limiter:make_token()}; _ -> C1 end), emit_consumer_deleted(ChPid, ConsumerTag), @@ -1015,9 +1051,6 @@ handle_call({requeue, AckTags, ChPid}, From, State) -> handle_cast({run_backing_queue, Mod, Fun}, State) -> noreply(run_backing_queue(Mod, Fun, State)); -handle_cast(sync_timeout, State) -> - noreply(backing_queue_timeout(State#q{sync_timer_ref = undefined})); - handle_cast({deliver, Delivery}, State) -> %% Asynchronous, non-"mandatory", non-"immediate" deliver mode. noreply(deliver_or_enqueue(Delivery, State)); @@ -1065,35 +1098,26 @@ handle_cast({notify_sent, ChPid}, State) -> C#cr{unsent_message_count = Count - 1} end)); -handle_cast({limit, ChPid, LimiterPid}, State) -> +handle_cast({limit, ChPid, Limiter}, State) -> noreply( possibly_unblock( State, ChPid, - fun (C = #cr{consumer_count = ConsumerCount, - limiter_pid = OldLimiterPid, - is_limit_active = Limited}) -> - if ConsumerCount =/= 0 andalso OldLimiterPid == undefined -> - ok = rabbit_limiter:register(LimiterPid, self()); - true -> - ok + fun (C = #cr{consumer_count = ConsumerCount, + limiter = OldLimiter, + is_limit_active = OldLimited}) -> + case (ConsumerCount =/= 0 andalso + not rabbit_limiter:is_enabled(OldLimiter)) of + true -> ok = rabbit_limiter:register(Limiter, self()); + false -> ok end, - NewLimited = Limited andalso LimiterPid =/= undefined, - C#cr{limiter_pid = LimiterPid, is_limit_active = NewLimited} + Limited = OldLimited andalso rabbit_limiter:is_enabled(Limiter), + C#cr{limiter = Limiter, is_limit_active = Limited} end)); handle_cast({flush, ChPid}, State) -> ok = rabbit_channel:flushed(ChPid, self()), noreply(State); -handle_cast(update_ram_duration, State = #q{backing_queue = BQ, - backing_queue_state = BQS}) -> - {RamDuration, BQS1} = BQ:ram_duration(BQS), - DesiredDuration = - rabbit_memory_monitor:report_ram_duration(self(), RamDuration), - BQS2 = BQ:set_ram_duration_target(DesiredDuration, BQS1), - noreply(State#q{rate_timer_ref = just_measured, - backing_queue_state = BQS2}); - handle_cast({set_ram_duration_target, Duration}, State = #q{backing_queue = BQ, backing_queue_state = BQS}) -> BQS1 = BQ:set_ram_duration_target(Duration, BQS), @@ -1103,22 +1127,32 @@ handle_cast({set_maximum_since_use, Age}, State) -> ok = file_handle_cache:set_maximum_since_use(Age), noreply(State); -handle_cast(maybe_expire, State) -> +handle_cast(force_event_refresh, State = #q{exclusive_consumer = Exclusive}) -> + rabbit_event:notify(queue_created, infos(?CREATION_EVENT_KEYS, State)), + case Exclusive of + none -> [emit_consumer_created(Ch, CTag, false, AckRequired) || + {Ch, CTag, AckRequired} <- consumers(State)]; + {Ch, CTag} -> [{Ch, CTag, AckRequired}] = consumers(State), + emit_consumer_created(Ch, CTag, true, AckRequired) + end, + noreply(State). + +handle_info(maybe_expire, State) -> case is_unused(State) of true -> ?LOGDEBUG("Queue lease expired for ~p~n", [State#q.q]), {stop, normal, State}; false -> noreply(ensure_expiry_timer(State)) end; -handle_cast(drop_expired, State) -> +handle_info(drop_expired, State) -> noreply(drop_expired_messages(State#q{ttl_timer_ref = undefined})); -handle_cast(emit_stats, State = #q{stats_timer = StatsTimer}) -> +handle_info(emit_stats, State = #q{stats_timer = StatsTimer}) -> %% Do not invoke noreply as it would see no timer and create a new one. emit_stats(State), State1 = State#q{stats_timer = rabbit_event:reset_stats_timer(StatsTimer)}, assert_invariant(State1), - {noreply, State1, hibernate}. + {noreply, State1, hibernate}; handle_info({'DOWN', _MonitorRef, process, DownPid, _Reason}, State = #q{q = #amqqueue{exclusive_owner = DownPid}}) -> @@ -1135,6 +1169,18 @@ handle_info({'DOWN', _MonitorRef, process, DownPid, _Reason}, State) -> {stop, NewState} -> {stop, normal, NewState} end; +handle_info(update_ram_duration, State = #q{backing_queue = BQ, + backing_queue_state = BQS}) -> + {RamDuration, BQS1} = BQ:ram_duration(BQS), + DesiredDuration = + rabbit_memory_monitor:report_ram_duration(self(), RamDuration), + BQS2 = BQ:set_ram_duration_target(DesiredDuration, BQS1), + noreply(State#q{rate_timer_ref = just_measured, + backing_queue_state = BQS2}); + +handle_info(sync_timeout, State) -> + noreply(backing_queue_timeout(State#q{sync_timer_ref = undefined})); + handle_info(timeout, State) -> noreply(backing_queue_timeout(State)); @@ -1155,10 +1201,11 @@ handle_pre_hibernate(State = #q{backing_queue = BQ, rabbit_memory_monitor:report_ram_duration(self(), RamDuration), BQS2 = BQ:set_ram_duration_target(DesiredDuration, BQS1), BQS3 = BQ:handle_pre_hibernate(BQS2), - rabbit_event:if_enabled(StatsTimer, - fun () -> - emit_stats(State, [{idle_since, now()}]) - end), + rabbit_event:if_enabled( + StatsTimer, + fun () -> emit_stats(State, [{idle_since, now()}]) end), State1 = State#q{stats_timer = rabbit_event:stop_stats_timer(StatsTimer), backing_queue_state = BQS3}, {hibernate, stop_rate_timer(State1)}. + +format_message_queue(Opt, MQ) -> rabbit_misc:format_message_queue(Opt, MQ). diff --git a/src/rabbit_backing_queue_qc.erl b/src/rabbit_backing_queue_qc.erl new file mode 100644 index 00000000..22691ef9 --- /dev/null +++ b/src/rabbit_backing_queue_qc.erl @@ -0,0 +1,392 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License +%% at http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and +%% limitations under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is VMware, Inc. +%% Copyright (c) 2011-2011 VMware, Inc. All rights reserved. +%% + +-module(rabbit_backing_queue_qc). +-ifdef(use_proper_qc). +-include("rabbit.hrl"). +-include("rabbit_framing.hrl"). +-include_lib("proper/include/proper.hrl"). + +-behaviour(proper_statem). + +-define(BQMOD, rabbit_variable_queue). +-define(QUEUE_MAXLEN, 10000). +-define(TIMEOUT_LIMIT, 100). + +-define(RECORD_INDEX(Key, Record), + proplists:get_value(Key, lists:zip( + record_info(fields, Record), lists:seq(2, record_info(size, Record))))). + +-export([initial_state/0, command/1, precondition/2, postcondition/3, + next_state/3]). + +-export([prop_backing_queue_test/0, publish_multiple/4, timeout/2]). + +-record(state, {bqstate, + len, %% int + messages, %% queue of {msg_props, basic_msg} + acks, %% dict of acktag => {msg_props, basic_msg} + confirms}). %% set of msgid + +%% Initialise model + +initial_state() -> + #state{bqstate = qc_variable_queue_init(qc_test_queue()), + len = 0, + messages = queue:new(), + acks = orddict:new(), + confirms = gb_sets:new()}. + +%% Property + +prop_backing_queue_test() -> + ?FORALL(Cmds, commands(?MODULE, initial_state()), + backing_queue_test(Cmds)). + +backing_queue_test(Cmds) -> + {ok, FileSizeLimit} = + application:get_env(rabbit, msg_store_file_size_limit), + application:set_env(rabbit, msg_store_file_size_limit, 512, + infinity), + {ok, MaxJournal} = + application:get_env(rabbit, queue_index_max_journal_entries), + application:set_env(rabbit, queue_index_max_journal_entries, 128, + infinity), + + {_H, #state{bqstate = BQ}, Res} = run_commands(?MODULE, Cmds), + + application:set_env(rabbit, msg_store_file_size_limit, + FileSizeLimit, infinity), + application:set_env(rabbit, queue_index_max_journal_entries, + MaxJournal, infinity), + + ?BQMOD:delete_and_terminate(shutdown, BQ), + ?WHENFAIL( + io:format("Result: ~p~n", [Res]), + aggregate(command_names(Cmds), Res =:= ok)). + +%% Commands + +%% Command frequencies are tuned so that queues are normally reasonably +%% short, but they may sometimes exceed ?QUEUE_MAXLEN. Publish-multiple +%% and purging cause extreme queue lengths, so these have lower probabilities. +%% Fetches are sufficiently frequent so that commands that need acktags +%% get decent coverage. + +command(S) -> + frequency([{10, qc_publish(S)}, + {1, qc_publish_delivered(S)}, + {1, qc_publish_multiple(S)}, %% very slow + {15, qc_fetch(S)}, %% needed for ack and requeue + {15, qc_ack(S)}, + {15, qc_requeue(S)}, + {3, qc_set_ram_duration_target(S)}, + {1, qc_ram_duration(S)}, + {1, qc_drain_confirmed(S)}, + {1, qc_dropwhile(S)}, + {1, qc_is_empty(S)}, + {1, qc_timeout(S)}, + {1, qc_purge(S)}]). + +qc_publish(#state{bqstate = BQ}) -> + {call, ?BQMOD, publish, + [qc_message(), + #message_properties{needs_confirming = frequency([{1, true}, + {20, false}]), + expiry = oneof([undefined | lists:seq(1, 10)])}, + self(), BQ]}. + +qc_publish_multiple(#state{bqstate = BQ}) -> + {call, ?MODULE, publish_multiple, + [qc_message(), #message_properties{}, BQ, + resize(?QUEUE_MAXLEN, pos_integer())]}. + +qc_publish_delivered(#state{bqstate = BQ}) -> + {call, ?BQMOD, publish_delivered, + [boolean(), qc_message(), #message_properties{}, self(), BQ]}. + +qc_fetch(#state{bqstate = BQ}) -> + {call, ?BQMOD, fetch, [boolean(), BQ]}. + +qc_ack(#state{bqstate = BQ, acks = Acks}) -> + {call, ?BQMOD, ack, [rand_choice(orddict:fetch_keys(Acks)), BQ]}. + +qc_requeue(#state{bqstate = BQ, acks = Acks}) -> + {call, ?BQMOD, requeue, + [rand_choice(orddict:fetch_keys(Acks)), fun(MsgOpts) -> MsgOpts end, BQ]}. + +qc_set_ram_duration_target(#state{bqstate = BQ}) -> + {call, ?BQMOD, set_ram_duration_target, + [oneof([0, 1, 2, resize(1000, pos_integer()), infinity]), BQ]}. + +qc_ram_duration(#state{bqstate = BQ}) -> + {call, ?BQMOD, ram_duration, [BQ]}. + +qc_drain_confirmed(#state{bqstate = BQ}) -> + {call, ?BQMOD, drain_confirmed, [BQ]}. + +qc_dropwhile(#state{bqstate = BQ}) -> + {call, ?BQMOD, dropwhile, [fun dropfun/1, BQ]}. + +qc_is_empty(#state{bqstate = BQ}) -> + {call, ?BQMOD, is_empty, [BQ]}. + +qc_timeout(#state{bqstate = BQ}) -> + {call, ?MODULE, timeout, [BQ, ?TIMEOUT_LIMIT]}. + +qc_purge(#state{bqstate = BQ}) -> + {call, ?BQMOD, purge, [BQ]}. + +%% Preconditions + +precondition(#state{acks = Acks}, {call, ?BQMOD, Fun, _Arg}) + when Fun =:= ack; Fun =:= requeue -> + orddict:size(Acks) > 0; +precondition(#state{messages = Messages}, + {call, ?BQMOD, publish_delivered, _Arg}) -> + queue:is_empty(Messages); +precondition(_S, {call, ?BQMOD, _Fun, _Arg}) -> + true; +precondition(_S, {call, ?MODULE, timeout, _Arg}) -> + true; +precondition(#state{len = Len}, {call, ?MODULE, publish_multiple, _Arg}) -> + Len < ?QUEUE_MAXLEN. + +%% Model updates + +next_state(S, BQ, {call, ?BQMOD, publish, [Msg, MsgProps, _Pid, _BQ]}) -> + #state{len = Len, messages = Messages, confirms = Confirms} = S, + MsgId = {call, erlang, element, [?RECORD_INDEX(id, basic_message), Msg]}, + NeedsConfirm = + {call, erlang, element, + [?RECORD_INDEX(needs_confirming, message_properties), MsgProps]}, + S#state{bqstate = BQ, + len = Len + 1, + messages = queue:in({MsgProps, Msg}, Messages), + confirms = case eval(NeedsConfirm) of + true -> gb_sets:add(MsgId, Confirms); + _ -> Confirms + end}; + +next_state(S, BQ, {call, _, publish_multiple, [Msg, MsgProps, _BQ, Count]}) -> + #state{len = Len, messages = Messages} = S, + Messages1 = repeat(Messages, fun(Msgs) -> + queue:in({MsgProps, Msg}, Msgs) + end, Count), + S#state{bqstate = BQ, + len = Len + Count, + messages = Messages1}; + +next_state(S, Res, + {call, ?BQMOD, publish_delivered, + [AckReq, Msg, MsgProps, _Pid, _BQ]}) -> + #state{confirms = Confirms, acks = Acks} = S, + AckTag = {call, erlang, element, [1, Res]}, + BQ1 = {call, erlang, element, [2, Res]}, + MsgId = {call, erlang, element, [?RECORD_INDEX(id, basic_message), Msg]}, + NeedsConfirm = + {call, erlang, element, + [?RECORD_INDEX(needs_confirming, message_properties), MsgProps]}, + S#state{bqstate = BQ1, + confirms = case eval(NeedsConfirm) of + true -> gb_sets:add(MsgId, Confirms); + _ -> Confirms + end, + acks = case AckReq of + true -> orddict:append(AckTag, {MsgProps, Msg}, Acks); + false -> Acks + end + }; + +next_state(S, Res, {call, ?BQMOD, fetch, [AckReq, _BQ]}) -> + #state{len = Len, messages = Messages, acks = Acks} = S, + ResultInfo = {call, erlang, element, [1, Res]}, + BQ1 = {call, erlang, element, [2, Res]}, + AckTag = {call, erlang, element, [3, ResultInfo]}, + S1 = S#state{bqstate = BQ1}, + case queue:out(Messages) of + {empty, _M2} -> + S1; + {{value, MsgProp_Msg}, M2} -> + S2 = S1#state{len = Len - 1, messages = M2}, + case AckReq of + true -> + S2#state{acks = orddict:append(AckTag, MsgProp_Msg, Acks)}; + false -> + S2 + end + end; + +next_state(S, Res, {call, ?BQMOD, ack, [AcksArg, _BQ]}) -> + #state{acks = AcksState} = S, + BQ1 = {call, erlang, element, [2, Res]}, + S#state{bqstate = BQ1, + acks = lists:foldl(fun orddict:erase/2, AcksState, AcksArg)}; + +next_state(S, Res, {call, ?BQMOD, requeue, [AcksArg, _F, _V]}) -> + #state{len = Len, messages = Messages, acks = AcksState} = S, + BQ1 = {call, erlang, element, [2, Res]}, + RequeueMsgs = lists:append([orddict:fetch(Key, AcksState) || + Key <- AcksArg]), + S#state{bqstate = BQ1, + len = Len + length(RequeueMsgs), + messages = queue:join(Messages, queue:from_list(RequeueMsgs)), + acks = lists:foldl(fun orddict:erase/2, AcksState, AcksArg)}; + +next_state(S, BQ, {call, ?BQMOD, set_ram_duration_target, _Args}) -> + S#state{bqstate = BQ}; + +next_state(S, Res, {call, ?BQMOD, ram_duration, _Args}) -> + BQ1 = {call, erlang, element, [2, Res]}, + S#state{bqstate = BQ1}; + +next_state(S, Res, {call, ?BQMOD, drain_confirmed, _Args}) -> + BQ1 = {call, erlang, element, [2, Res]}, + S#state{bqstate = BQ1}; + +next_state(S, BQ1, {call, ?BQMOD, dropwhile, _Args}) -> + #state{messages = Messages} = S, + Messages1 = drop_messages(Messages), + S#state{bqstate = BQ1, len = queue:len(Messages1), messages = Messages1}; + +next_state(S, _Res, {call, ?BQMOD, is_empty, _Args}) -> + S; + +next_state(S, BQ, {call, ?MODULE, timeout, _Args}) -> + S#state{bqstate = BQ}; + +next_state(S, Res, {call, ?BQMOD, purge, _Args}) -> + BQ1 = {call, erlang, element, [2, Res]}, + S#state{bqstate = BQ1, len = 0, messages = queue:new()}. + +%% Postconditions + +postcondition(S, {call, ?BQMOD, fetch, _Args}, Res) -> + #state{messages = Messages, len = Len, acks = Acks, confirms = Confrms} = S, + case Res of + {{MsgFetched, _IsDelivered, AckTag, RemainingLen}, _BQ} -> + {_MsgProps, Msg} = queue:head(Messages), + MsgFetched =:= Msg andalso + not orddict:is_key(AckTag, Acks) andalso + not gb_sets:is_element(AckTag, Confrms) andalso + RemainingLen =:= Len - 1; + {empty, _BQ} -> + Len =:= 0 + end; + +postcondition(S, {call, ?BQMOD, publish_delivered, _Args}, {AckTag, _BQ}) -> + #state{acks = Acks, confirms = Confrms} = S, + not orddict:is_key(AckTag, Acks) andalso + not gb_sets:is_element(AckTag, Confrms); + +postcondition(#state{len = Len}, {call, ?BQMOD, purge, _Args}, Res) -> + {PurgeCount, _BQ} = Res, + Len =:= PurgeCount; + +postcondition(#state{len = Len}, + {call, ?BQMOD, is_empty, _Args}, Res) -> + (Len =:= 0) =:= Res; + +postcondition(S, {call, ?BQMOD, drain_confirmed, _Args}, Res) -> + #state{confirms = Confirms} = S, + {ReportedConfirmed, _BQ} = Res, + lists:all(fun (M) -> + gb_sets:is_element(M, Confirms) + end, ReportedConfirmed); + +postcondition(#state{bqstate = BQ, len = Len}, {call, _M, _F, _A}, _Res) -> + ?BQMOD:len(BQ) =:= Len. + +%% Helpers + +repeat(Result, _Fun, 0) -> + Result; +repeat(Result, Fun, Times) -> + repeat(Fun(Result), Fun, Times - 1). + +publish_multiple(Msg, MsgProps, BQ, Count) -> + repeat(BQ, fun(BQ1) -> + ?BQMOD:publish(Msg, MsgProps, self(), BQ1) + end, Count). + +timeout(BQ, 0) -> + BQ; +timeout(BQ, AtMost) -> + case ?BQMOD:needs_timeout(BQ) of + false -> BQ; + _ -> timeout(?BQMOD:timeout(BQ), AtMost - 1) + end. + +qc_message_payload() -> + ?SIZED(Size, resize(Size * Size, binary())). + +qc_routing_key() -> + noshrink(binary(10)). + +qc_delivery_mode() -> + oneof([1, 2]). + +qc_message() -> + qc_message(qc_delivery_mode()). + +qc_message(DeliveryMode) -> + {call, rabbit_basic, message, [ + qc_default_exchange(), + qc_routing_key(), + #'P_basic'{delivery_mode = DeliveryMode}, + qc_message_payload()]}. + +qc_default_exchange() -> + {call, rabbit_misc, r, [<<>>, exchange, <<>>]}. + +qc_variable_queue_init(Q) -> + {call, ?BQMOD, init, + [Q, false, function(2, ok)]}. + +qc_test_q() -> + {call, rabbit_misc, r, [<<"/">>, queue, noshrink(binary(16))]}. + +qc_test_queue() -> + qc_test_queue(boolean()). + +qc_test_queue(Durable) -> + #amqqueue{name = qc_test_q(), + durable = Durable, + auto_delete = false, + arguments = [], + pid = self()}. + +rand_choice([]) -> []; +rand_choice(List) -> [lists:nth(random:uniform(length(List)), List)]. + +dropfun(Props) -> + Expiry = eval({call, erlang, element, + [?RECORD_INDEX(expiry, message_properties), Props]}), + Expiry =/= 1. + +drop_messages(Messages) -> + case queue:out(Messages) of + {empty, _} -> + Messages; + {{value, MsgProps_Msg}, M2} -> + MsgProps = {call, erlang, element, [1, MsgProps_Msg]}, + case dropfun(MsgProps) of + true -> drop_messages(M2); + false -> Messages + end + end. + +-endif. diff --git a/src/rabbit_channel.erl b/src/rabbit_channel.erl index 8310bd8e..dfe84644 100644 --- a/src/rabbit_channel.erl +++ b/src/rabbit_channel.erl @@ -23,14 +23,17 @@ -export([start_link/10, do/2, do/3, flush/1, shutdown/1]). -export([send_command/2, deliver/4, flushed/2, confirm/2]). -export([list/0, info_keys/0, info/1, info/2, info_all/0, info_all/1]). --export([refresh_config_all/0, emit_stats/1, ready_for_close/1]). +-export([refresh_config_local/0, ready_for_close/1]). +-export([force_event_refresh/0]). -export([init/1, terminate/2, code_change/3, handle_call/3, handle_cast/2, handle_info/2, handle_pre_hibernate/1, prioritise_call/3, - prioritise_cast/2]). + prioritise_cast/2, prioritise_info/2, format_message_queue/2]). +%% Internal +-export([list_local/0]). -record(ch, {state, protocol, channel, reader_pid, writer_pid, conn_pid, - limiter_pid, start_limiter_fun, tx_status, next_tag, + limiter, tx_status, next_tag, unacked_message_q, uncommitted_message_q, uncommitted_ack_q, user, virtual_host, most_recently_declared_queue, consumer_mapping, blocking, consumer_monitors, queue_collector_pid, @@ -71,8 +74,7 @@ -spec(start_link/10 :: (channel_number(), pid(), pid(), pid(), rabbit_types:protocol(), rabbit_types:user(), rabbit_types:vhost(), rabbit_framing:amqp_table(), - pid(), fun ((non_neg_integer()) -> rabbit_types:ok(pid()))) -> - rabbit_types:ok_pid_or_error()). + pid(), rabbit_limiter:token()) -> rabbit_types:ok_pid_or_error()). -spec(do/2 :: (pid(), rabbit_framing:amqp_method_record()) -> 'ok'). -spec(do/3 :: (pid(), rabbit_framing:amqp_method_record(), rabbit_types:maybe(rabbit_types:content())) -> 'ok'). @@ -85,24 +87,25 @@ -spec(flushed/2 :: (pid(), pid()) -> 'ok'). -spec(confirm/2 ::(pid(), [non_neg_integer()]) -> 'ok'). -spec(list/0 :: () -> [pid()]). +-spec(list_local/0 :: () -> [pid()]). -spec(info_keys/0 :: () -> rabbit_types:info_keys()). -spec(info/1 :: (pid()) -> rabbit_types:infos()). -spec(info/2 :: (pid(), rabbit_types:info_keys()) -> rabbit_types:infos()). -spec(info_all/0 :: () -> [rabbit_types:infos()]). -spec(info_all/1 :: (rabbit_types:info_keys()) -> [rabbit_types:infos()]). --spec(refresh_config_all/0 :: () -> 'ok'). --spec(emit_stats/1 :: (pid()) -> 'ok'). +-spec(refresh_config_local/0 :: () -> 'ok'). -spec(ready_for_close/1 :: (pid()) -> 'ok'). +-spec(force_event_refresh/0 :: () -> 'ok'). -endif. %%---------------------------------------------------------------------------- start_link(Channel, ReaderPid, WriterPid, ConnPid, Protocol, User, VHost, - Capabilities, CollectorPid, StartLimiterFun) -> + Capabilities, CollectorPid, Limiter) -> gen_server2:start_link( ?MODULE, [Channel, ReaderPid, WriterPid, ConnPid, Protocol, User, - VHost, Capabilities, CollectorPid, StartLimiterFun], []). + VHost, Capabilities, CollectorPid, Limiter], []). do(Pid, Method) -> do(Pid, Method, none). @@ -129,6 +132,10 @@ confirm(Pid, MsgSeqNos) -> gen_server2:cast(Pid, {confirm, MsgSeqNos, self()}). list() -> + rabbit_misc:append_rpc_all_nodes(rabbit_mnesia:running_clustered_nodes(), + rabbit_channel, list_local, []). + +list_local() -> pg_local:get_members(rabbit_channels). info_keys() -> ?INFO_KEYS. @@ -148,21 +155,22 @@ info_all() -> info_all(Items) -> rabbit_misc:filter_exit_map(fun (C) -> info(C, Items) end, list()). -refresh_config_all() -> +refresh_config_local() -> rabbit_misc:upmap( - fun (C) -> gen_server2:call(C, refresh_config) end, list()), + fun (C) -> gen_server2:call(C, refresh_config) end, list_local()), ok. -emit_stats(Pid) -> - gen_server2:cast(Pid, emit_stats). - ready_for_close(Pid) -> gen_server2:cast(Pid, ready_for_close). +force_event_refresh() -> + [gen_server2:cast(C, force_event_refresh) || C <- list()], + ok. + %%--------------------------------------------------------------------------- init([Channel, ReaderPid, WriterPid, ConnPid, Protocol, User, VHost, - Capabilities, CollectorPid, StartLimiterFun]) -> + Capabilities, CollectorPid, Limiter]) -> process_flag(trap_exit, true), ok = pg_local:join(rabbit_channels, self()), StatsTimer = rabbit_event:init_stats_timer(), @@ -172,8 +180,7 @@ init([Channel, ReaderPid, WriterPid, ConnPid, Protocol, User, VHost, reader_pid = ReaderPid, writer_pid = WriterPid, conn_pid = ConnPid, - limiter_pid = undefined, - start_limiter_fun = StartLimiterFun, + limiter = Limiter, tx_status = none, next_tag = 1, unacked_message_q = queue:new(), @@ -196,7 +203,7 @@ init([Channel, ReaderPid, WriterPid, ConnPid, Protocol, User, VHost, trace_state = rabbit_trace:init(VHost)}, rabbit_event:notify(channel_created, infos(?CREATION_EVENT_KEYS, State)), rabbit_event:if_enabled(StatsTimer, - fun() -> internal_emit_stats(State) end), + fun() -> emit_stats(State) end), {ok, State, hibernate, {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. @@ -209,11 +216,16 @@ prioritise_call(Msg, _From, _State) -> prioritise_cast(Msg, _State) -> case Msg of - emit_stats -> 7; {confirm, _MsgSeqNos, _QPid} -> 5; _ -> 0 end. +prioritise_info(Msg, _State) -> + case Msg of + emit_stats -> 7; + _ -> 0 + end. + handle_call(flush, _From, State) -> reply(ok, State); @@ -295,11 +307,10 @@ handle_cast({deliver, ConsumerTag, AckRequired, rabbit_trace:tap_trace_out(Msg, TraceState), noreply(State1#ch{next_tag = DeliveryTag + 1}); -handle_cast(emit_stats, State = #ch{stats_timer = StatsTimer}) -> - internal_emit_stats(State), - noreply([ensure_stats_timer], - State#ch{stats_timer = rabbit_event:reset_stats_timer(StatsTimer)}); +handle_cast(force_event_refresh, State) -> + rabbit_event:notify(channel_created, infos(?CREATION_EVENT_KEYS, State)), + noreply(State); handle_cast({confirm, MsgSeqNos, From}, State) -> State1 = #ch{confirmed = C} = confirm(MsgSeqNos, From, State), noreply([send_confirms], State1, case C of [] -> hibernate; _ -> 0 end). @@ -307,6 +318,11 @@ handle_cast({confirm, MsgSeqNos, From}, State) -> handle_info(timeout, State) -> noreply(State); +handle_info(emit_stats, State = #ch{stats_timer = StatsTimer}) -> + emit_stats(State), + noreply([ensure_stats_timer], + State#ch{stats_timer = rabbit_event:reset_stats_timer(StatsTimer)}); + handle_info({'DOWN', MRef, process, QPid, Reason}, State = #ch{consumer_monitors = ConsumerMonitors}) -> noreply( @@ -322,11 +338,8 @@ handle_info({'EXIT', _Pid, Reason}, State) -> handle_pre_hibernate(State = #ch{stats_timer = StatsTimer}) -> ok = clear_permission_cache(), - rabbit_event:if_enabled(StatsTimer, - fun () -> - internal_emit_stats( - State, [{idle_since, now()}]) - end), + rabbit_event:if_enabled( + StatsTimer, fun () -> emit_stats(State, [{idle_since, now()}]) end), StatsTimer1 = rabbit_event:stop_stats_timer(StatsTimer), {hibernate, State#ch{stats_timer = StatsTimer1}}. @@ -344,6 +357,8 @@ terminate(Reason, State) -> code_change(_OldVsn, State, _Extra) -> {ok, State}. +format_message_queue(Opt, MQ) -> rabbit_misc:format_message_queue(Opt, MQ). + %%--------------------------------------------------------------------------- reply(Reply, NewState) -> reply(Reply, [], NewState). @@ -368,8 +383,7 @@ next_state(Mask, State) -> ensure_stats_timer(State = #ch{stats_timer = StatsTimer}) -> ChPid = self(), State#ch{stats_timer = rabbit_event:ensure_stats_timer( - StatsTimer, - fun() -> emit_stats(ChPid) end)}. + StatsTimer, ChPid, emit_stats)}. return_ok(State, true, _Msg) -> {noreply, State}; return_ok(State, false, Msg) -> {reply, Msg, State}. @@ -705,7 +719,7 @@ handle_method(#'basic.consume'{queue = QueueNameBin, exclusive = ExclusiveConsume, nowait = NoWait}, _, State = #ch{conn_pid = ConnPid, - limiter_pid = LimiterPid, + limiter = Limiter, consumer_mapping = ConsumerMapping}) -> case dict:find(ConsumerTag, ConsumerMapping) of error -> @@ -724,7 +738,7 @@ handle_method(#'basic.consume'{queue = QueueNameBin, QueueName, ConnPid, fun (Q) -> {rabbit_amqqueue:basic_consume( - Q, NoAck, self(), LimiterPid, + Q, NoAck, self(), Limiter, ActualConsumerTag, ExclusiveConsume, ok_msg(NoWait, #'basic.consume_ok'{ consumer_tag = ActualConsumerTag})), @@ -798,22 +812,23 @@ handle_method(#'basic.qos'{prefetch_size = Size}, _, _State) when Size /= 0 -> rabbit_misc:protocol_error(not_implemented, "prefetch_size!=0 (~w)", [Size]); -handle_method(#'basic.qos'{prefetch_count = PrefetchCount}, - _, State = #ch{limiter_pid = LimiterPid}) -> - LimiterPid1 = case {LimiterPid, PrefetchCount} of - {undefined, 0} -> undefined; - {undefined, _} -> start_limiter(State); - {_, _} -> LimiterPid - end, - LimiterPid2 = case rabbit_limiter:limit(LimiterPid1, PrefetchCount) of - ok -> LimiterPid1; - stopped -> unlimit_queues(State) - end, - {reply, #'basic.qos_ok'{}, State#ch{limiter_pid = LimiterPid2}}; +handle_method(#'basic.qos'{prefetch_count = PrefetchCount}, _, + State = #ch{limiter = Limiter}) -> + Limiter1 = case {rabbit_limiter:is_enabled(Limiter), PrefetchCount} of + {false, 0} -> Limiter; + {false, _} -> enable_limiter(State); + {_, _} -> Limiter + end, + Limiter3 = case rabbit_limiter:limit(Limiter1, PrefetchCount) of + ok -> Limiter1; + {disabled, Limiter2} -> ok = limit_queues(Limiter2, State), + Limiter2 + end, + {reply, #'basic.qos_ok'{}, State#ch{limiter = Limiter3}}; handle_method(#'basic.recover_async'{requeue = true}, _, State = #ch{unacked_message_q = UAMQ, - limiter_pid = LimiterPid}) -> + limiter = Limiter}) -> OkFun = fun () -> ok end, ok = fold_per_queue( fun (QPid, MsgIds, ok) -> @@ -827,7 +842,7 @@ handle_method(#'basic.recover_async'{requeue = true}, QPid, lists:reverse(MsgIds), self()) end) end, ok, UAMQ), - ok = notify_limiter(LimiterPid, UAMQ), + ok = notify_limiter(Limiter, UAMQ), %% No answer required - basic.recover is the newer, synchronous %% variant of this method {noreply, State#ch{unacked_message_q = queue:new()}}; @@ -1074,23 +1089,23 @@ handle_method(#'confirm.select'{nowait = NoWait}, _, State) -> NoWait, #'confirm.select_ok'{}); handle_method(#'channel.flow'{active = true}, _, - State = #ch{limiter_pid = LimiterPid}) -> - LimiterPid1 = case rabbit_limiter:unblock(LimiterPid) of - ok -> LimiterPid; - stopped -> unlimit_queues(State) - end, - {reply, #'channel.flow_ok'{active = true}, - State#ch{limiter_pid = LimiterPid1}}; + State = #ch{limiter = Limiter}) -> + Limiter2 = case rabbit_limiter:unblock(Limiter) of + ok -> Limiter; + {disabled, Limiter1} -> ok = limit_queues(Limiter1, State), + Limiter1 + end, + {reply, #'channel.flow_ok'{active = true}, State#ch{limiter = Limiter2}}; handle_method(#'channel.flow'{active = false}, _, - State = #ch{limiter_pid = LimiterPid, - consumer_mapping = Consumers}) -> - LimiterPid1 = case LimiterPid of - undefined -> start_limiter(State); - Other -> Other - end, - State1 = State#ch{limiter_pid = LimiterPid1}, - ok = rabbit_limiter:block(LimiterPid1), + State = #ch{consumer_mapping = Consumers, + limiter = Limiter}) -> + Limiter1 = case rabbit_limiter:is_enabled(Limiter) of + true -> Limiter; + false -> enable_limiter(State) + end, + State1 = State#ch{limiter = Limiter1}, + ok = rabbit_limiter:block(Limiter1), case consumer_queues(Consumers) of [] -> {reply, #'channel.flow_ok'{active = false}, State1}; QPids -> Queues = [{QPid, erlang:monitor(process, QPid)} || @@ -1131,10 +1146,16 @@ handle_publishing_queue_down(QPid, Reason, State = #ch{unconfirmed_qm = UQM}) -> %% process_confirms to prevent each MsgSeqNo being removed from %% the set one by one which which would be inefficient State1 = State#ch{unconfirmed_qm = gb_trees:delete_any(QPid, UQM)}, - {Nack, SendFun} = case Reason of - normal -> {false, fun record_confirms/2}; - _ -> {true, fun send_nacks/2} - end, + {Nack, SendFun} = + case Reason of + Reason when Reason =:= noproc; Reason =:= noconnection; + Reason =:= normal; Reason =:= shutdown -> + {false, fun record_confirms/2}; + {shutdown, _} -> + {false, fun record_confirms/2}; + _ -> + {true, fun send_nacks/2} + end, {MXs, State2} = process_confirms(MsgSeqNos, QPid, Nack, State1), erase_queue_stats(QPid), State3 = SendFun(MXs, State2), @@ -1214,7 +1235,7 @@ reject(DeliveryTag, Requeue, Multiple, State = #ch{unacked_message_q = UAMQ}) -> fun (QPid, MsgIds, ok) -> rabbit_amqqueue:reject(QPid, MsgIds, Requeue, self()) end, ok, Acked), - ok = notify_limiter(State#ch.limiter_pid, Acked), + ok = notify_limiter(State#ch.limiter, Acked), {noreply, State#ch{unacked_message_q = Remaining}}. ack_record(DeliveryTag, ConsumerTag, @@ -1251,7 +1272,7 @@ ack(Acked, State) -> [{QPid, length(MsgIds)} | L] end, [], Acked), maybe_incr_stats(QIncs, ack, State), - ok = notify_limiter(State#ch.limiter_pid, Acked), + ok = notify_limiter(State#ch.limiter, Acked), State. new_tx(State) -> State#ch{uncommitted_message_q = queue:new(), @@ -1275,17 +1296,14 @@ fold_per_queue(F, Acc0, UAQ) -> dict:fold(fun (QPid, MsgIds, Acc) -> F(QPid, MsgIds, Acc) end, Acc0, D). -start_limiter(State = #ch{unacked_message_q = UAMQ, start_limiter_fun = SLF}) -> - {ok, LPid} = SLF(queue:len(UAMQ)), - ok = limit_queues(LPid, State), - LPid. - -unlimit_queues(State) -> - ok = limit_queues(undefined, State), - undefined. +enable_limiter(State = #ch{unacked_message_q = UAMQ, + limiter = Limiter}) -> + Limiter1 = rabbit_limiter:enable(Limiter, queue:len(UAMQ)), + ok = limit_queues(Limiter1, State), + Limiter1. -limit_queues(LPid, #ch{consumer_mapping = Consumers}) -> - rabbit_amqqueue:limit_all(consumer_queues(Consumers), self(), LPid). +limit_queues(Limiter, #ch{consumer_mapping = Consumers}) -> + rabbit_amqqueue:limit_all(consumer_queues(Consumers), self(), Limiter). consumer_queues(Consumers) -> lists:usort([QPid || @@ -1296,14 +1314,15 @@ consumer_queues(Consumers) -> %% for messages delivered to subscribed consumers, but not acks for %% messages sent in a response to a basic.get (identified by their %% 'none' consumer tag) -notify_limiter(undefined, _Acked) -> - ok; -notify_limiter(LimiterPid, Acked) -> - case rabbit_misc:queue_fold(fun ({_, none, _}, Acc) -> Acc; - ({_, _, _}, Acc) -> Acc + 1 - end, 0, Acked) of - 0 -> ok; - Count -> rabbit_limiter:ack(LimiterPid, Count) +notify_limiter(Limiter, Acked) -> + case rabbit_limiter:is_enabled(Limiter) of + false -> ok; + true -> case rabbit_misc:queue_fold(fun ({_, none, _}, Acc) -> Acc; + ({_, _, _}, Acc) -> Acc + 1 + end, 0, Acked) of + 0 -> ok; + Count -> rabbit_limiter:ack(Limiter, Count) + end end. deliver_to_queues({Delivery = #delivery{message = Message = #basic_message{ @@ -1441,10 +1460,10 @@ i(messages_uncommitted, #ch{uncommitted_message_q = TMQ}) -> queue:len(TMQ); i(acks_uncommitted, #ch{uncommitted_ack_q = TAQ}) -> queue:len(TAQ); -i(prefetch_count, #ch{limiter_pid = LimiterPid}) -> - rabbit_limiter:get_limit(LimiterPid); -i(client_flow_blocked, #ch{limiter_pid = LimiterPid}) -> - rabbit_limiter:is_blocked(LimiterPid); +i(prefetch_count, #ch{limiter = Limiter}) -> + rabbit_limiter:get_limit(Limiter); +i(client_flow_blocked, #ch{limiter = Limiter}) -> + rabbit_limiter:is_blocked(Limiter); i(Item, _) -> throw({bad_argument, Item}). @@ -1487,10 +1506,10 @@ update_measures(Type, QX, Inc, Measure) -> put({Type, QX}, orddict:store(Measure, Cur + Inc, Measures)). -internal_emit_stats(State) -> - internal_emit_stats(State, []). +emit_stats(State) -> + emit_stats(State, []). -internal_emit_stats(State = #ch{stats_timer = StatsTimer}, Extra) -> +emit_stats(State = #ch{stats_timer = StatsTimer}, Extra) -> CoarseStats = infos(?STATISTICS_KEYS, State), case rabbit_event:stats_level(StatsTimer) of coarse -> diff --git a/src/rabbit_channel_sup.erl b/src/rabbit_channel_sup.erl index 65ccca02..a19b6bfd 100644 --- a/src/rabbit_channel_sup.erl +++ b/src/rabbit_channel_sup.erl @@ -47,47 +47,44 @@ start_link({tcp, Sock, Channel, FrameMax, ReaderPid, Protocol, User, VHost, Capabilities, Collector}) -> - {ok, SupPid} = supervisor2:start_link(?MODULE, []), - {ok, WriterPid} = - supervisor2:start_child( - SupPid, - {writer, {rabbit_writer, start_link, - [Sock, Channel, FrameMax, Protocol, ReaderPid]}, - intrinsic, ?MAX_WAIT, worker, [rabbit_writer]}), + {ok, SupPid} = supervisor2:start_link(?MODULE, + {tcp, Sock, Channel, FrameMax, + ReaderPid, Protocol}), + [LimiterPid] = supervisor2:find_child(SupPid, limiter), + [WriterPid] = supervisor2:find_child(SupPid, writer), {ok, ChannelPid} = supervisor2:start_child( SupPid, {channel, {rabbit_channel, start_link, [Channel, ReaderPid, WriterPid, ReaderPid, Protocol, User, VHost, Capabilities, Collector, - start_limiter_fun(SupPid)]}, + rabbit_limiter:make_token(LimiterPid)]}, intrinsic, ?MAX_WAIT, worker, [rabbit_channel]}), {ok, AState} = rabbit_command_assembler:init(Protocol), {ok, SupPid, {ChannelPid, AState}}; start_link({direct, Channel, ClientChannelPid, ConnPid, Protocol, User, VHost, Capabilities, Collector}) -> - {ok, SupPid} = supervisor2:start_link(?MODULE, []), + {ok, SupPid} = supervisor2:start_link(?MODULE, direct), + [LimiterPid] = supervisor2:find_child(SupPid, limiter), {ok, ChannelPid} = supervisor2:start_child( SupPid, {channel, {rabbit_channel, start_link, [Channel, ClientChannelPid, ClientChannelPid, ConnPid, Protocol, User, VHost, Capabilities, Collector, - start_limiter_fun(SupPid)]}, + rabbit_limiter:make_token(LimiterPid)]}, intrinsic, ?MAX_WAIT, worker, [rabbit_channel]}), {ok, SupPid, {ChannelPid, none}}. %%---------------------------------------------------------------------------- -init([]) -> - {ok, {{one_for_all, 0, 1}, []}}. - -start_limiter_fun(SupPid) -> - fun (UnackedCount) -> - Me = self(), - {ok, _Pid} = - supervisor2:start_child( - SupPid, - {limiter, {rabbit_limiter, start_link, [Me, UnackedCount]}, - transient, ?MAX_WAIT, worker, [rabbit_limiter]}) - end. +init(Type) -> + {ok, {{one_for_all, 0, 1}, child_specs(Type)}}. + +child_specs({tcp, Sock, Channel, FrameMax, ReaderPid, Protocol}) -> + [{writer, {rabbit_writer, start_link, + [Sock, Channel, FrameMax, Protocol, ReaderPid]}, + intrinsic, ?MAX_WAIT, worker, [rabbit_writer]} | child_specs(direct)]; +child_specs(direct) -> + [{limiter, {rabbit_limiter, start_link, []}, + transient, ?MAX_WAIT, worker, [rabbit_limiter]}]. diff --git a/src/rabbit_control.erl b/src/rabbit_control.erl index 6eb1aaba..e8afed0c 100644 --- a/src/rabbit_control.erl +++ b/src/rabbit_control.erl @@ -17,7 +17,7 @@ -module(rabbit_control). -include("rabbit.hrl"). --export([start/0, stop/0, action/5, diagnostics/1]). +-export([start/0, stop/0, action/5, diagnostics/1, log_action/3]). -define(RPC_TIMEOUT, infinity). -define(WAIT_FOR_VM_ATTEMPTS, 5). @@ -51,6 +51,7 @@ -> 'ok'). -spec(diagnostics/1 :: (node()) -> [{string(), [any()]}]). -spec(usage/0 :: () -> no_return()). +-spec(log_action/3 :: (node(), string(), [term()]) -> ok). -endif. @@ -73,6 +74,7 @@ start() -> Command = list_to_atom(Command0), Quiet = proplists:get_bool(?QUIET_OPT, Opts1), Node = proplists:get_value(?NODE_OPT, Opts1), + rpc_call(Node, rabbit_control, log_action, [node(), Command0, Args]), Inform = case Quiet of true -> fun (_Format, _Args1) -> ok end; false -> fun (Format, Args1) -> @@ -474,3 +476,22 @@ quit(Status) -> {unix, _} -> halt(Status); {win32, _} -> init:stop(Status) end. + +log_action(Node, Command, Args) -> + rabbit_misc:with_local_io( + fun () -> + error_logger:info_msg("~p executing~n rabbitmqctl ~s ~s~n", + [Node, Command, + format_args(mask_args(Command, Args))]) + end). + +%% Mask passwords and other sensitive info before logging. +mask_args("add_user", [Name, _Password | Args]) -> + [Name, "****" | Args]; +mask_args("change_password", [Name, _Password | Args]) -> + [Name, "****" | Args]; +mask_args(_, Args) -> + Args. + +format_args(Args) -> + string:join([io_lib:format("~p", [A]) || A <- Args], " "). diff --git a/src/rabbit_direct.erl b/src/rabbit_direct.erl index 7ff534ee..68afaf5d 100644 --- a/src/rabbit_direct.erl +++ b/src/rabbit_direct.erl @@ -16,7 +16,10 @@ -module(rabbit_direct). --export([boot/0, connect/4, start_channel/8, disconnect/1]). +-export([boot/0, force_event_refresh/0, list/0, connect/5, + start_channel/8, disconnect/2]). +%% Internal +-export([list_local/0]). -include("rabbit.hrl"). @@ -25,8 +28,12 @@ -ifdef(use_specs). -spec(boot/0 :: () -> 'ok'). --spec(connect/4 :: (rabbit_types:username(), rabbit_types:vhost(), - rabbit_types:protocol(), rabbit_event:event_props()) -> +-spec(force_event_refresh/0 :: () -> 'ok'). +-spec(list/0 :: () -> [pid()]). +-spec(list_local/0 :: () -> [pid()]). +-spec(connect/5 :: (rabbit_types:username(), rabbit_types:vhost(), + rabbit_types:protocol(), pid(), + rabbit_event:event_props()) -> {'ok', {rabbit_types:user(), rabbit_framing:amqp_table()}}). -spec(start_channel/8 :: @@ -34,7 +41,7 @@ rabbit_types:user(), rabbit_types:vhost(), rabbit_framing:amqp_table(), pid()) -> {'ok', pid()}). --spec(disconnect/1 :: (rabbit_event:event_props()) -> 'ok'). +-spec(disconnect/2 :: (pid(), rabbit_event:event_props()) -> 'ok'). -endif. @@ -51,15 +58,27 @@ boot() -> transient, infinity, supervisor, [rabbit_client_sup]}), ok. +force_event_refresh() -> + [Pid ! force_event_refresh || Pid<- list()], + ok. + +list_local() -> + pg_local:get_members(rabbit_direct). + +list() -> + rabbit_misc:append_rpc_all_nodes(rabbit_mnesia:running_clustered_nodes(), + rabbit_direct, list_local, []). + %%---------------------------------------------------------------------------- -connect(Username, VHost, Protocol, Infos) -> +connect(Username, VHost, Protocol, Pid, Infos) -> case lists:keymember(rabbit, 1, application:which_applications()) of true -> case rabbit_access_control:check_user_login(Username, []) of {ok, User} -> try rabbit_access_control:check_vhost_access(User, VHost) of - ok -> rabbit_event:notify(connection_created, Infos), + ok -> ok = pg_local:join(rabbit_direct, Pid), + rabbit_event:notify(connection_created, Infos), {ok, {User, rabbit_reader:server_properties(Protocol)}} catch @@ -82,5 +101,6 @@ start_channel(Number, ClientChannelPid, ConnPid, Protocol, User, VHost, Capabilities, Collector}]), {ok, ChannelPid}. -disconnect(Infos) -> +disconnect(Pid, Infos) -> + pg_local:leave(rabbit_direct, Pid), rabbit_event:notify(connection_closed, Infos). diff --git a/src/rabbit_event.erl b/src/rabbit_event.erl index 468f9293..bb765566 100644 --- a/src/rabbit_event.erl +++ b/src/rabbit_event.erl @@ -19,7 +19,7 @@ -include("rabbit.hrl"). -export([start_link/0]). --export([init_stats_timer/0, ensure_stats_timer/2, stop_stats_timer/1]). +-export([init_stats_timer/0, ensure_stats_timer/3, stop_stats_timer/1]). -export([reset_stats_timer/1]). -export([stats_level/1, if_enabled/2]). -export([notify/2, notify_if/3]). @@ -57,7 +57,7 @@ -spec(start_link/0 :: () -> rabbit_types:ok_pid_or_error()). -spec(init_stats_timer/0 :: () -> state()). --spec(ensure_stats_timer/2 :: (state(), timer_fun()) -> state()). +-spec(ensure_stats_timer/3 :: (state(), pid(), term()) -> state()). -spec(stop_stats_timer/1 :: (state()) -> state()). -spec(reset_stats_timer/1 :: (state()) -> state()). -spec(stats_level/1 :: (state()) -> level()). @@ -80,7 +80,7 @@ start_link() -> %% if_enabled(internal_emit_stats) - so we immediately send something %% %% On wakeup: -%% ensure_stats_timer(Timer, emit_stats) +%% ensure_stats_timer(Timer, Pid, emit_stats) %% (Note we can't emit stats immediately, the timer may have fired 1ms ago.) %% %% emit_stats: @@ -99,13 +99,13 @@ init_stats_timer() -> {ok, Interval} = application:get_env(rabbit, collect_statistics_interval), #state{level = StatsLevel, interval = Interval, timer = undefined}. -ensure_stats_timer(State = #state{level = none}, _Fun) -> +ensure_stats_timer(State = #state{level = none}, _Pid, _Msg) -> State; ensure_stats_timer(State = #state{interval = Interval, - timer = undefined}, Fun) -> - {ok, TRef} = timer:apply_after(Interval, erlang, apply, [Fun, []]), + timer = undefined}, Pid, Msg) -> + TRef = erlang:send_after(Interval, Pid, Msg), State#state{timer = TRef}; -ensure_stats_timer(State, _Fun) -> +ensure_stats_timer(State, _Pid, _Msg) -> State. stop_stats_timer(State = #state{level = none}) -> @@ -113,7 +113,7 @@ stop_stats_timer(State = #state{level = none}) -> stop_stats_timer(State = #state{timer = undefined}) -> State; stop_stats_timer(State = #state{timer = TRef}) -> - {ok, cancel} = timer:cancel(TRef), + erlang:cancel_timer(TRef), State#state{timer = undefined}. reset_stats_timer(State) -> diff --git a/src/rabbit_limiter.erl b/src/rabbit_limiter.erl index e79583fa..66a4f89b 100644 --- a/src/rabbit_limiter.erl +++ b/src/rabbit_limiter.erl @@ -20,27 +20,36 @@ -export([init/1, terminate/2, code_change/3, handle_call/3, handle_cast/2, handle_info/2, prioritise_call/3]). --export([start_link/2]). +-export([start_link/0, make_token/0, make_token/1, is_enabled/1, enable/2, + disable/1]). -export([limit/2, can_send/3, ack/2, register/2, unregister/2]). -export([get_limit/1, block/1, unblock/1, is_blocked/1]). %%---------------------------------------------------------------------------- --ifdef(use_specs). +-record(token, {pid, enabled}). --type(maybe_pid() :: pid() | 'undefined'). +-ifdef(use_specs). --spec(start_link/2 :: (pid(), non_neg_integer()) -> - rabbit_types:ok_pid_or_error()). --spec(limit/2 :: (maybe_pid(), non_neg_integer()) -> 'ok' | 'stopped'). --spec(can_send/3 :: (maybe_pid(), pid(), boolean()) -> boolean()). --spec(ack/2 :: (maybe_pid(), non_neg_integer()) -> 'ok'). --spec(register/2 :: (maybe_pid(), pid()) -> 'ok'). --spec(unregister/2 :: (maybe_pid(), pid()) -> 'ok'). --spec(get_limit/1 :: (maybe_pid()) -> non_neg_integer()). --spec(block/1 :: (maybe_pid()) -> 'ok'). --spec(unblock/1 :: (maybe_pid()) -> 'ok' | 'stopped'). --spec(is_blocked/1 :: (maybe_pid()) -> boolean()). +-export_type([token/0]). + +-opaque(token() :: #token{}). + +-spec(start_link/0 :: () -> rabbit_types:ok_pid_or_error()). +-spec(make_token/0 :: () -> token()). +-spec(make_token/1 :: ('undefined' | pid()) -> token()). +-spec(is_enabled/1 :: (token()) -> boolean()). +-spec(enable/2 :: (token(), non_neg_integer()) -> token()). +-spec(disable/1 :: (token()) -> token()). +-spec(limit/2 :: (token(), non_neg_integer()) -> 'ok' | {'disabled', token()}). +-spec(can_send/3 :: (token(), pid(), boolean()) -> boolean()). +-spec(ack/2 :: (token(), non_neg_integer()) -> 'ok'). +-spec(register/2 :: (token(), pid()) -> 'ok'). +-spec(unregister/2 :: (token(), pid()) -> 'ok'). +-spec(get_limit/1 :: (token()) -> non_neg_integer()). +-spec(block/1 :: (token()) -> 'ok'). +-spec(unblock/1 :: (token()) -> 'ok' | {'disabled', token()}). +-spec(is_blocked/1 :: (token()) -> boolean()). -endif. @@ -49,7 +58,7 @@ -record(lim, {prefetch_count = 0, ch_pid, blocked = false, - queues = dict:new(), % QPid -> {MonitorRef, Notify} + queues = orddict:new(), % QPid -> {MonitorRef, Notify} volume = 0}). %% 'Notify' is a boolean that indicates whether a queue should be %% notified of a change in the limit or volume that may allow it to @@ -59,63 +68,63 @@ %% API %%---------------------------------------------------------------------------- -start_link(ChPid, UnackedMsgCount) -> - gen_server2:start_link(?MODULE, [ChPid, UnackedMsgCount], []). +start_link() -> gen_server2:start_link(?MODULE, [], []). + +make_token() -> make_token(undefined). +make_token(Pid) -> #token{pid = Pid, enabled = false}. + +is_enabled(#token{enabled = Enabled}) -> Enabled. + +enable(#token{pid = Pid} = Token, Volume) -> + gen_server2:call(Pid, {enable, Token, self(), Volume}, infinity). -limit(undefined, 0) -> - ok; -limit(LimiterPid, PrefetchCount) -> - gen_server2:call(LimiterPid, {limit, PrefetchCount}, infinity). +disable(#token{pid = Pid} = Token) -> + gen_server2:call(Pid, {disable, Token}, infinity). + +limit(Limiter, PrefetchCount) -> + maybe_call(Limiter, {limit, PrefetchCount, Limiter}, ok). %% Ask the limiter whether the queue can deliver a message without -%% breaching a limit -can_send(undefined, _QPid, _AckRequired) -> - true; -can_send(LimiterPid, QPid, AckRequired) -> +%% breaching a limit. Note that we don't use maybe_call here in order +%% to avoid always going through with_exit_handler/2, even when the +%% limiter is disabled. +can_send(#token{pid = Pid, enabled = true}, QPid, AckRequired) -> rabbit_misc:with_exit_handler( fun () -> true end, - fun () -> gen_server2:call(LimiterPid, {can_send, QPid, AckRequired}, - infinity) end). + fun () -> + gen_server2:call(Pid, {can_send, QPid, AckRequired}, infinity) + end); +can_send(_, _, _) -> + true. %% Let the limiter know that the channel has received some acks from a %% consumer -ack(undefined, _Count) -> ok; -ack(LimiterPid, Count) -> gen_server2:cast(LimiterPid, {ack, Count}). +ack(Limiter, Count) -> maybe_cast(Limiter, {ack, Count}). -register(undefined, _QPid) -> ok; -register(LimiterPid, QPid) -> gen_server2:cast(LimiterPid, {register, QPid}). +register(Limiter, QPid) -> maybe_cast(Limiter, {register, QPid}). -unregister(undefined, _QPid) -> ok; -unregister(LimiterPid, QPid) -> gen_server2:cast(LimiterPid, {unregister, QPid}). +unregister(Limiter, QPid) -> maybe_cast(Limiter, {unregister, QPid}). -get_limit(undefined) -> - 0; -get_limit(Pid) -> +get_limit(Limiter) -> rabbit_misc:with_exit_handler( fun () -> 0 end, - fun () -> gen_server2:call(Pid, get_limit, infinity) end). + fun () -> maybe_call(Limiter, get_limit, ok) end). -block(undefined) -> - ok; -block(LimiterPid) -> - gen_server2:call(LimiterPid, block, infinity). +block(Limiter) -> + maybe_call(Limiter, block, ok). -unblock(undefined) -> - ok; -unblock(LimiterPid) -> - gen_server2:call(LimiterPid, unblock, infinity). +unblock(Limiter) -> + maybe_call(Limiter, {unblock, Limiter}, ok). -is_blocked(undefined) -> - false; -is_blocked(LimiterPid) -> - gen_server2:call(LimiterPid, is_blocked, infinity). +is_blocked(Limiter) -> + maybe_call(Limiter, is_blocked, false). %%---------------------------------------------------------------------------- %% gen_server callbacks %%---------------------------------------------------------------------------- -init([ChPid, UnackedMsgCount]) -> - {ok, #lim{ch_pid = ChPid, volume = UnackedMsgCount}}. +init([]) -> + {ok, #lim{}}. prioritise_call(get_limit, _From, _State) -> 9; prioritise_call(_Msg, _From, _State) -> 0. @@ -135,23 +144,33 @@ handle_call({can_send, QPid, AckRequired}, _From, handle_call(get_limit, _From, State = #lim{prefetch_count = PrefetchCount}) -> {reply, PrefetchCount, State}; -handle_call({limit, PrefetchCount}, _From, State) -> +handle_call({limit, PrefetchCount, Token}, _From, State) -> case maybe_notify(State, State#lim{prefetch_count = PrefetchCount}) of - {cont, State1} -> {reply, ok, State1}; - {stop, State1} -> {stop, normal, stopped, State1} + {cont, State1} -> + {reply, ok, State1}; + {stop, State1} -> + {reply, {disabled, Token#token{enabled = false}}, State1} end; handle_call(block, _From, State) -> {reply, ok, State#lim{blocked = true}}; -handle_call(unblock, _From, State) -> +handle_call({unblock, Token}, _From, State) -> case maybe_notify(State, State#lim{blocked = false}) of - {cont, State1} -> {reply, ok, State1}; - {stop, State1} -> {stop, normal, stopped, State1} + {cont, State1} -> + {reply, ok, State1}; + {stop, State1} -> + {reply, {disabled, Token#token{enabled = false}}, State1} end; handle_call(is_blocked, _From, State) -> - {reply, blocked(State), State}. + {reply, blocked(State), State}; + +handle_call({enable, Token, Channel, Volume}, _From, State) -> + {reply, Token#token{enabled = true}, + State#lim{ch_pid = Channel, volume = Volume}}; +handle_call({disable, Token}, _From, State) -> + {reply, Token#token{enabled = false}, State}. handle_cast({ack, Count}, State = #lim{volume = Volume}) -> NewVolume = if Volume == 0 -> 0; @@ -190,37 +209,46 @@ maybe_notify(OldState, NewState) -> false -> {cont, NewState} end. +maybe_call(#token{pid = Pid, enabled = true}, Call, _Default) -> + gen_server2:call(Pid, Call, infinity); +maybe_call(_, _Call, Default) -> + Default. + +maybe_cast(#token{pid = Pid, enabled = true}, Cast) -> + gen_server2:cast(Pid, Cast); +maybe_cast(_, _Call) -> + ok. + limit_reached(#lim{prefetch_count = Limit, volume = Volume}) -> Limit =/= 0 andalso Volume >= Limit. blocked(#lim{blocked = Blocked}) -> Blocked. remember_queue(QPid, State = #lim{queues = Queues}) -> - case dict:is_key(QPid, Queues) of + case orddict:is_key(QPid, Queues) of false -> MRef = erlang:monitor(process, QPid), - State#lim{queues = dict:store(QPid, {MRef, false}, Queues)}; + State#lim{queues = orddict:store(QPid, {MRef, false}, Queues)}; true -> State end. forget_queue(QPid, State = #lim{ch_pid = ChPid, queues = Queues}) -> - case dict:find(QPid, Queues) of - {ok, {MRef, _}} -> - true = erlang:demonitor(MRef), - ok = rabbit_amqqueue:unblock(QPid, ChPid), - State#lim{queues = dict:erase(QPid, Queues)}; - error -> State + case orddict:find(QPid, Queues) of + {ok, {MRef, _}} -> true = erlang:demonitor(MRef), + ok = rabbit_amqqueue:unblock(QPid, ChPid), + State#lim{queues = orddict:erase(QPid, Queues)}; + error -> State end. limit_queue(QPid, State = #lim{queues = Queues}) -> UpdateFun = fun ({MRef, _}) -> {MRef, true} end, - State#lim{queues = dict:update(QPid, UpdateFun, Queues)}. + State#lim{queues = orddict:update(QPid, UpdateFun, Queues)}. notify_queues(State = #lim{ch_pid = ChPid, queues = Queues}) -> {QList, NewQueues} = - dict:fold(fun (_QPid, {_, false}, Acc) -> Acc; - (QPid, {MRef, true}, {L, D}) -> - {[QPid | L], dict:store(QPid, {MRef, false}, D)} - end, {[], Queues}, Queues), + orddict:fold(fun (_QPid, {_, false}, Acc) -> Acc; + (QPid, {MRef, true}, {L, D}) -> + {[QPid | L], orddict:store(QPid, {MRef, false}, D)} + end, {[], Queues}, Queues), case length(QList) of 0 -> ok; L -> @@ -228,7 +256,8 @@ notify_queues(State = #lim{ch_pid = ChPid, queues = Queues}) -> %% thus ensuring that each queue has an equal chance of %% being notified first. {L1, L2} = lists:split(random:uniform(L), QList), - [ok = rabbit_amqqueue:unblock(Q, ChPid) || Q <- L2 ++ L1], + [[ok = rabbit_amqqueue:unblock(Q, ChPid) || Q <- L3] + || L3 <- [L2, L1]], ok end, State#lim{queues = NewQueues}. diff --git a/src/rabbit_mirror_queue_coordinator.erl b/src/rabbit_mirror_queue_coordinator.erl index a347904c..8ed2bede 100644 --- a/src/rabbit_mirror_queue_coordinator.erl +++ b/src/rabbit_mirror_queue_coordinator.erl @@ -11,12 +11,12 @@ %% The Original Code is RabbitMQ. %% %% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2010 VMware, Inc. All rights reserved. +%% Copyright (c) 2010-2011 VMware, Inc. All rights reserved. %% -module(rabbit_mirror_queue_coordinator). --export([start_link/3, get_gm/1, ensure_monitoring/2]). +-export([start_link/4, get_gm/1, ensure_monitoring/2]). -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3]). @@ -32,15 +32,17 @@ -record(state, { q, gm, monitors, - death_fun + death_fun, + length_fun }). -define(ONE_SECOND, 1000). -ifdef(use_specs). --spec(start_link/3 :: (rabbit_types:amqqueue(), pid() | 'undefined', - rabbit_mirror_queue_master:death_fun()) -> +-spec(start_link/4 :: (rabbit_types:amqqueue(), pid() | 'undefined', + rabbit_mirror_queue_master:death_fun(), + rabbit_mirror_queue_master:length_fun()) -> rabbit_types:ok_pid_or_error()). -spec(get_gm/1 :: (pid()) -> pid()). -spec(ensure_monitoring/2 :: (pid(), [pid()]) -> 'ok'). @@ -53,7 +55,7 @@ %% %% A queue with mirrors consists of the following: %% -%% #amqqueue{ pid, mirror_pids } +%% #amqqueue{ pid, slave_pids } %% | | %% +----------+ +-------+--------------+-----------...etc... %% | | | @@ -138,9 +140,28 @@ %% state of the master. The detection of the sync-status of a slave is %% done entirely based on length: if the slave and the master both %% agree on the length of the queue after the fetch of the head of the -%% queue, then the queues must be in sync. The only other possibility -%% is that the slave's queue is shorter, and thus the fetch should be -%% ignored. +%% queue (or a 'set_length' results in a slave having to drop some +%% messages from the head of its queue), then the queues must be in +%% sync. The only other possibility is that the slave's queue is +%% shorter, and thus the fetch should be ignored. In case slaves are +%% joined to an empty queue which only goes on to receive publishes, +%% they start by asking the master to broadcast its length. This is +%% enough for slaves to always be able to work out when their head +%% does not differ from the master (and is much simpler and cheaper +%% than getting the master to hang on to the guid of the msg at the +%% head of its queue). When a slave is promoted to a master, it +%% unilaterally broadcasts its length, in order to solve the problem +%% of length requests from new slaves being unanswered by a dead +%% master. +%% +%% Obviously, due to the async nature of communication across gm, the +%% slaves can fall behind. This does not matter from a sync pov: if +%% they fall behind and the master dies then a) no publishes are lost +%% because all publishes go to all mirrors anyway; b) the worst that +%% happens is that acks get lost and so messages come back to +%% life. This is no worse than normal given you never get confirmation +%% that an ack has been received (not quite true with QoS-prefetch, +%% but close enough for jazz). %% %% Because acktags are issued by the bq independently, and because %% there is no requirement for the master and all slaves to use the @@ -279,8 +300,8 @@ %% %%---------------------------------------------------------------------------- -start_link(Queue, GM, DeathFun) -> - gen_server2:start_link(?MODULE, [Queue, GM, DeathFun], []). +start_link(Queue, GM, DeathFun, LengthFun) -> + gen_server2:start_link(?MODULE, [Queue, GM, DeathFun, LengthFun], []). get_gm(CPid) -> gen_server2:call(CPid, get_gm, infinity). @@ -292,7 +313,7 @@ ensure_monitoring(CPid, Pids) -> %% gen_server %% --------------------------------------------------------------------------- -init([#amqqueue { name = QueueName } = Q, GM, DeathFun]) -> +init([#amqqueue { name = QueueName } = Q, GM, DeathFun, LengthFun]) -> GM1 = case GM of undefined -> {ok, GM2} = gm:start_link(QueueName, ?MODULE, [self()]), @@ -306,10 +327,11 @@ init([#amqqueue { name = QueueName } = Q, GM, DeathFun]) -> end, {ok, _TRef} = timer:apply_interval(?ONE_SECOND, gm, broadcast, [GM1, heartbeat]), - {ok, #state { q = Q, - gm = GM1, - monitors = dict:new(), - death_fun = DeathFun }, + {ok, #state { q = Q, + gm = GM1, + monitors = dict:new(), + death_fun = DeathFun, + length_fun = LengthFun }, hibernate, {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. @@ -317,18 +339,21 @@ handle_call(get_gm, _From, State = #state { gm = GM }) -> reply(GM, State). handle_cast({gm_deaths, Deaths}, - State = #state { q = #amqqueue { name = QueueName } }) -> - rabbit_log:info("Mirrored-queue (~s): Master ~s saw deaths of mirrors ~s~n", - [rabbit_misc:rs(QueueName), - rabbit_misc:pid_to_string(self()), - [[rabbit_misc:pid_to_string(Pid), $ ] || Pid <- Deaths]]), + State = #state { q = #amqqueue { name = QueueName, pid = MPid } }) + when node(MPid) =:= node() -> case rabbit_mirror_queue_misc:remove_from_queue(QueueName, Deaths) of - {ok, Pid} when node(Pid) =:= node() -> + {ok, MPid, DeadPids} -> + rabbit_mirror_queue_misc:report_deaths(MPid, true, QueueName, + DeadPids), noreply(State); {error, not_found} -> {stop, normal, State} end; +handle_cast(request_length, State = #state { length_fun = LengthFun }) -> + ok = LengthFun(), + noreply(State); + handle_cast({ensure_monitoring, Pids}, State = #state { monitors = Monitors }) -> Monitors1 = @@ -343,13 +368,12 @@ handle_cast({ensure_monitoring, Pids}, handle_info({'DOWN', _MonitorRef, process, Pid, _Reason}, State = #state { monitors = Monitors, - death_fun = Fun }) -> - noreply( - case dict:is_key(Pid, Monitors) of - false -> State; - true -> ok = Fun(Pid), - State #state { monitors = dict:erase(Pid, Monitors) } - end); + death_fun = DeathFun }) -> + noreply(case dict:is_key(Pid, Monitors) of + false -> State; + true -> ok = DeathFun(Pid), + State #state { monitors = dict:erase(Pid, Monitors) } + end); handle_info(Msg, State) -> {stop, {unexpected_info, Msg}, State}. @@ -379,6 +403,8 @@ members_changed([CPid], _Births, Deaths) -> handle_msg([_CPid], _From, heartbeat) -> ok; +handle_msg([CPid], _From, request_length = Msg) -> + ok = gen_server2:cast(CPid, Msg); handle_msg([CPid], _From, {ensure_monitoring, _Pids} = Msg) -> ok = gen_server2:cast(CPid, Msg); handle_msg([_CPid], _From, _Msg) -> diff --git a/src/rabbit_mirror_queue_master.erl b/src/rabbit_mirror_queue_master.erl index 69be6ecd..ad5fd28f 100644 --- a/src/rabbit_mirror_queue_master.erl +++ b/src/rabbit_mirror_queue_master.erl @@ -11,7 +11,7 @@ %% The Original Code is RabbitMQ. %% %% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2010 VMware, Inc. All rights reserved. +%% Copyright (c) 2010-2011 VMware, Inc. All rights reserved. %% -module(rabbit_mirror_queue_master). @@ -25,7 +25,7 @@ -export([start/1, stop/0]). --export([promote_backing_queue_state/6, sender_death_fun/0]). +-export([promote_backing_queue_state/6, sender_death_fun/0, length_fun/0]). -behaviour(rabbit_backing_queue). @@ -44,9 +44,10 @@ -ifdef(use_specs). --export_type([death_fun/0]). +-export_type([death_fun/0, length_fun/0]). -type(death_fun() :: fun ((pid()) -> 'ok')). +-type(length_fun() :: fun (() -> 'ok')). -type(master_state() :: #state { gm :: pid(), coordinator :: pid(), backing_queue :: atom(), @@ -61,6 +62,7 @@ -spec(promote_backing_queue_state/6 :: (pid(), atom(), any(), pid(), dict(), [pid()]) -> master_state()). -spec(sender_death_fun/0 :: () -> death_fun()). +-spec(length_fun/0 :: () -> length_fun()). -endif. @@ -83,7 +85,7 @@ stop() -> init(#amqqueue { name = QName, mirror_nodes = MNodes } = Q, Recover, AsyncCallback) -> {ok, CPid} = rabbit_mirror_queue_coordinator:start_link( - Q, undefined, sender_death_fun()), + Q, undefined, sender_death_fun(), length_fun()), GM = rabbit_mirror_queue_coordinator:get_gm(CPid), MNodes1 = (case MNodes of @@ -94,6 +96,7 @@ init(#amqqueue { name = QName, mirror_nodes = MNodes } = Q, Recover, [rabbit_mirror_queue_misc:add_mirror(QName, Node) || Node <- MNodes1], {ok, BQ} = application:get_env(backing_queue_module), BQS = BQ:init(Q, Recover, AsyncCallback), + ok = gm:broadcast(GM, {length, BQ:len(BQS)}), #state { gm = GM, coordinator = CPid, backing_queue = BQ, @@ -349,11 +352,13 @@ discard(Msg = #basic_message { id = MsgId }, ChPid, %% --------------------------------------------------------------------------- promote_backing_queue_state(CPid, BQ, BQS, GM, SeenStatus, KS) -> + Len = BQ:len(BQS), + ok = gm:broadcast(GM, {length, Len}), #state { gm = GM, coordinator = CPid, backing_queue = BQ, backing_queue_state = BQS, - set_delivered = BQ:len(BQS), + set_delivered = Len, seen_status = SeenStatus, confirmed = [], ack_msg_id = dict:new(), @@ -371,9 +376,18 @@ sender_death_fun() -> end) end. -%% --------------------------------------------------------------------------- -%% Helpers -%% --------------------------------------------------------------------------- +length_fun() -> + Self = self(), + fun () -> + rabbit_amqqueue:run_backing_queue( + Self, ?MODULE, + fun (?MODULE, State = #state { gm = GM, + backing_queue = BQ, + backing_queue_state = BQS }) -> + ok = gm:broadcast(GM, {length, BQ:len(BQS)}), + State + end) + end. maybe_store_acktag(undefined, _MsgId, AM) -> AM; diff --git a/src/rabbit_mirror_queue_misc.erl b/src/rabbit_mirror_queue_misc.erl index 4761f79e..cf8e9484 100644 --- a/src/rabbit_mirror_queue_misc.erl +++ b/src/rabbit_mirror_queue_misc.erl @@ -11,13 +11,14 @@ %% The Original Code is RabbitMQ. %% %% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2010 VMware, Inc. All rights reserved. +%% Copyright (c) 2010-2011 VMware, Inc. All rights reserved. %% -module(rabbit_mirror_queue_misc). -export([remove_from_queue/2, on_node_up/0, - drop_mirror/2, drop_mirror/3, add_mirror/2, add_mirror/3]). + drop_mirror/2, drop_mirror/3, add_mirror/2, add_mirror/3, + report_deaths/4]). -include("rabbit.hrl"). @@ -28,6 +29,7 @@ %% become the new master, which is bad because it could then mean the %% slave (now master) receives messages it's not ready for (for %% example, new consumers). +%% Returns {ok, NewMPid, DeadPids} remove_from_queue(QueueName, DeadPids) -> DeadNodes = [node(DeadPid) || DeadPid <- DeadPids], rabbit_misc:execute_mnesia_transaction( @@ -38,27 +40,27 @@ remove_from_queue(QueueName, DeadPids) -> [] -> {error, not_found}; [Q = #amqqueue { pid = QPid, slave_pids = SPids }] -> - [QPid1 | SPids1] = + [QPid1 | SPids1] = Alive = [Pid || Pid <- [QPid | SPids], not lists:member(node(Pid), DeadNodes)], case {{QPid, SPids}, {QPid1, SPids1}} of {Same, Same} -> - ok; + {ok, QPid1, []}; _ when QPid =:= QPid1 orelse node(QPid1) =:= node() -> %% Either master hasn't changed, so %% we're ok to update mnesia; or we have %% become the master. Q1 = Q #amqqueue { pid = QPid1, slave_pids = SPids1 }, - ok = rabbit_amqqueue:store_queue(Q1); + ok = rabbit_amqqueue:store_queue(Q1), + {ok, QPid1, [QPid | SPids] -- Alive}; _ -> %% Master has changed, and we're not it, %% so leave alone to allow the promoted %% slave to find it and make its %% promotion atomic. - ok - end, - {ok, QPid1} + {ok, QPid1, []} + end end end). @@ -133,3 +135,17 @@ if_mirrored_queue(Queue, Fun) -> _ -> Fun(Q) end end). + +report_deaths(_MirrorPid, _IsMaster, _QueueName, []) -> + ok; +report_deaths(MirrorPid, IsMaster, QueueName, DeadPids) -> + rabbit_event:notify(queue_mirror_deaths, [{name, QueueName}, + {pids, DeadPids}]), + rabbit_log:info("Mirrored-queue (~s): ~s ~s saw deaths of mirrors ~s~n", + [rabbit_misc:rs(QueueName), + case IsMaster of + true -> "Master"; + false -> "Slave" + end, + rabbit_misc:pid_to_string(MirrorPid), + [[rabbit_misc:pid_to_string(P), $ ] || P <- DeadPids]]). diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl index 93340ba8..3c453981 100644 --- a/src/rabbit_mirror_queue_slave.erl +++ b/src/rabbit_mirror_queue_slave.erl @@ -11,7 +11,7 @@ %% The Original Code is RabbitMQ. %% %% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2010 VMware, Inc. All rights reserved. +%% Copyright (c) 2010-2011 VMware, Inc. All rights reserved. %% -module(rabbit_mirror_queue_slave). @@ -33,11 +33,11 @@ %% All instructions from the GM group must be processed in the order %% in which they're received. --export([start_link/1, set_maximum_since_use/2]). +-export([start_link/1, set_maximum_since_use/2, info/1]). -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3, handle_pre_hibernate/1, prioritise_call/3, - prioritise_cast/2]). + prioritise_cast/2, prioritise_info/2]). -export([joined/2, members_changed/3, handle_msg/3]). @@ -47,6 +47,15 @@ -include("rabbit.hrl"). -include("gm_specs.hrl"). +-define(CREATION_EVENT_KEYS, + [pid, + name, + master_pid, + is_synchronised + ]). + +-define(INFO_KEYS, ?CREATION_EVENT_KEYS). + -define(SYNC_INTERVAL, 25). %% milliseconds -define(RAM_DURATION_UPDATE_INTERVAL, 5000). -define(DEATH_TIMEOUT, 20000). %% 20 seconds @@ -64,7 +73,9 @@ ack_num, msg_id_status, - known_senders + known_senders, + + synchronised }). start_link(Q) -> @@ -73,6 +84,9 @@ start_link(Q) -> set_maximum_since_use(QPid, Age) -> gen_server2:cast(QPid, {set_maximum_since_use, Age}). +info(QPid) -> + gen_server2:call(QPid, info, infinity). + init([#amqqueue { name = QueueName } = Q]) -> process_flag(trap_exit, true), %% amqqueue_process traps exits too. {ok, GM} = gm:start_link(QueueName, ?MODULE, [self()]), @@ -89,33 +103,38 @@ init([#amqqueue { name = QueueName } = Q]) -> %% ASSERTION [] = [Pid || Pid <- [QPid | MPids], node(Pid) =:= Node], MPids1 = MPids ++ [Self], - mnesia:write(rabbit_queue, - Q1 #amqqueue { slave_pids = MPids1 }, - write), + ok = rabbit_amqqueue:store_queue( + Q1 #amqqueue { slave_pids = MPids1 }), {ok, QPid} end), erlang:monitor(process, MPid), ok = file_handle_cache:register_callback( - rabbit_amqqueue, set_maximum_since_use, [self()]), + rabbit_amqqueue, set_maximum_since_use, [Self]), ok = rabbit_memory_monitor:register( - self(), {rabbit_amqqueue, set_ram_duration_target, [self()]}), + Self, {rabbit_amqqueue, set_ram_duration_target, [Self]}), {ok, BQ} = application:get_env(backing_queue_module), BQS = bq_init(BQ, Q, false), - {ok, #state { q = Q, - gm = GM, - master_pid = MPid, - backing_queue = BQ, - backing_queue_state = BQS, - rate_timer_ref = undefined, - sync_timer_ref = undefined, - - sender_queues = dict:new(), - msg_id_ack = dict:new(), - ack_num = 0, - - msg_id_status = dict:new(), - known_senders = dict:new() - }, hibernate, + State = #state { q = Q, + gm = GM, + master_pid = MPid, + backing_queue = BQ, + backing_queue_state = BQS, + rate_timer_ref = undefined, + sync_timer_ref = undefined, + + sender_queues = dict:new(), + msg_id_ack = dict:new(), + ack_num = 0, + + msg_id_status = dict:new(), + known_senders = dict:new(), + + synchronised = false + }, + rabbit_event:notify(queue_slave_created, + infos(?CREATION_EVENT_KEYS, State)), + ok = gm:broadcast(GM, request_length), + {ok, State, hibernate, {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. handle_call({deliver_immediately, Delivery = #delivery {}}, From, State) -> @@ -145,29 +164,32 @@ handle_call({gm_deaths, Deaths}, From, State = #state { q = #amqqueue { name = QueueName }, gm = GM, master_pid = MPid }) -> - rabbit_log:info("Mirrored-queue (~s): Slave ~s saw deaths of mirrors ~s~n", - [rabbit_misc:rs(QueueName), - rabbit_misc:pid_to_string(self()), - [[rabbit_misc:pid_to_string(Pid), $ ] || Pid <- Deaths]]), %% The GM has told us about deaths, which means we're not going to %% receive any more messages from GM case rabbit_mirror_queue_misc:remove_from_queue(QueueName, Deaths) of - {ok, Pid} when node(Pid) =:= node(MPid) -> - %% master hasn't changed - reply(ok, State); - {ok, Pid} when node(Pid) =:= node() -> - %% we've become master - promote_me(From, State); - {ok, Pid} -> - %% master has changed to not us. - gen_server2:reply(From, ok), - erlang:monitor(process, Pid), - ok = gm:broadcast(GM, heartbeat), - noreply(State #state { master_pid = Pid }); {error, not_found} -> gen_server2:reply(From, ok), - {stop, normal, State} - end. + {stop, normal, State}; + {ok, Pid, DeadPids} -> + rabbit_mirror_queue_misc:report_deaths(self(), false, QueueName, + DeadPids), + if node(Pid) =:= node(MPid) -> + %% master hasn't changed + reply(ok, State); + node(Pid) =:= node() -> + %% we've become master + promote_me(From, State); + true -> + %% master has changed to not us. + gen_server2:reply(From, ok), + erlang:monitor(process, Pid), + ok = gm:broadcast(GM, heartbeat), + noreply(State #state { master_pid = Pid }) + end + end; + +handle_call(info, _From, State) -> + reply(infos(?INFO_KEYS, State), State). handle_cast({run_backing_queue, Mod, Fun}, State) -> noreply(run_backing_queue(Mod, Fun, State)); @@ -187,9 +209,9 @@ handle_cast({set_ram_duration_target, Duration}, State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> BQS1 = BQ:set_ram_duration_target(Duration, BQS), - noreply(State #state { backing_queue_state = BQS1 }); + noreply(State #state { backing_queue_state = BQS1 }). -handle_cast(update_ram_duration, +handle_info(update_ram_duration, State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> {RamDuration, BQS1} = BQ:ram_duration(BQS), @@ -199,9 +221,9 @@ handle_cast(update_ram_duration, noreply(State #state { rate_timer_ref = just_measured, backing_queue_state = BQS2 }); -handle_cast(sync_timeout, State) -> +handle_info(sync_timeout, State) -> noreply(backing_queue_timeout( - State #state { sync_timer_ref = undefined })). + State #state { sync_timer_ref = undefined })); handle_info(timeout, State) -> noreply(backing_queue_timeout(State)); @@ -260,22 +282,28 @@ handle_pre_hibernate(State = #state { backing_queue = BQ, prioritise_call(Msg, _From, _State) -> case Msg of + info -> 9; {gm_deaths, _Deaths} -> 5; _ -> 0 end. prioritise_cast(Msg, _State) -> case Msg of - update_ram_duration -> 8; {set_ram_duration_target, _Duration} -> 8; {set_maximum_since_use, _Age} -> 8; {run_backing_queue, _Mod, _Fun} -> 6; - sync_timeout -> 6; {gm, _Msg} -> 5; {post_commit, _Txn, _AckTags} -> 4; _ -> 0 end. +prioritise_info(Msg, _State) -> + case Msg of + update_ram_duration -> 8; + sync_timeout -> 6; + _ -> 0 + end. + %% --------------------------------------------------------------------------- %% GM %% --------------------------------------------------------------------------- @@ -291,6 +319,9 @@ members_changed([SPid], _Births, Deaths) -> handle_msg([_SPid], _From, heartbeat) -> ok; +handle_msg([_SPid], _From, request_length) -> + %% This is only of value to the master + ok; handle_msg([_SPid], _From, {ensure_monitoring, _Pid}) -> %% This is only of value to the master ok; @@ -315,6 +346,14 @@ inform_deaths(SPid, Deaths) -> %% Others %% --------------------------------------------------------------------------- +infos(Items, State) -> [{Item, i(Item, State)} || Item <- Items]. + +i(pid, _State) -> self(); +i(name, #state { q = #amqqueue { name = Name } }) -> Name; +i(master_pid, #state { master_pid = MPid }) -> MPid; +i(is_synchronised, #state { synchronised = Synchronised }) -> Synchronised; +i(Item, _State) -> throw({bad_argument, Item}). + bq_init(BQ, Q, Recover) -> Self = self(), BQ:init(Q, Recover, @@ -380,7 +419,7 @@ gb_trees_cons(Key, Value, Tree) -> handle_process_result({ok, State}) -> noreply(State); handle_process_result({stop, State}) -> {stop, normal, State}. -promote_me(From, #state { q = Q, +promote_me(From, #state { q = Q = #amqqueue { name = QName }, gm = GM, backing_queue = BQ, backing_queue_state = BQS, @@ -389,12 +428,14 @@ promote_me(From, #state { q = Q, msg_id_ack = MA, msg_id_status = MS, known_senders = KS }) -> + rabbit_event:notify(queue_slave_promoted, [{pid, self()}, + {name, QName}]), rabbit_log:info("Mirrored-queue (~s): Promoting slave ~s to master~n", - [rabbit_misc:rs(Q #amqqueue.name), - rabbit_misc:pid_to_string(self())]), + [rabbit_misc:rs(QName), rabbit_misc:pid_to_string(self())]), Q1 = Q #amqqueue { pid = self() }, {ok, CPid} = rabbit_mirror_queue_coordinator:start_link( - Q1, GM, rabbit_mirror_queue_master:sender_death_fun()), + Q1, GM, rabbit_mirror_queue_master:sender_death_fun(), + rabbit_mirror_queue_master:length_fun()), true = unlink(GM), gen_server2:reply(From, {promote, CPid}), ok = gm:confirmed_broadcast(GM, heartbeat), @@ -516,8 +557,7 @@ backing_queue_timeout(State = #state { backing_queue = BQ }) -> run_backing_queue(BQ, fun (M, BQS) -> M:timeout(BQS) end, State). ensure_sync_timer(State = #state { sync_timer_ref = undefined }) -> - {ok, TRef} = timer:apply_after( - ?SYNC_INTERVAL, rabbit_amqqueue, sync_timeout, [self()]), + TRef = erlang:send_after(?SYNC_INTERVAL, self(), sync_timeout), State #state { sync_timer_ref = TRef }; ensure_sync_timer(State) -> State. @@ -525,14 +565,12 @@ ensure_sync_timer(State) -> stop_sync_timer(State = #state { sync_timer_ref = undefined }) -> State; stop_sync_timer(State = #state { sync_timer_ref = TRef }) -> - {ok, cancel} = timer:cancel(TRef), + erlang:cancel_timer(TRef), State #state { sync_timer_ref = undefined }. ensure_rate_timer(State = #state { rate_timer_ref = undefined }) -> - {ok, TRef} = timer:apply_after( - ?RAM_DURATION_UPDATE_INTERVAL, - rabbit_amqqueue, update_ram_duration, - [self()]), + TRef = erlang:send_after(?RAM_DURATION_UPDATE_INTERVAL, + self(), update_ram_duration), State #state { rate_timer_ref = TRef }; ensure_rate_timer(State = #state { rate_timer_ref = just_measured }) -> State #state { rate_timer_ref = undefined }; @@ -544,7 +582,7 @@ stop_rate_timer(State = #state { rate_timer_ref = undefined }) -> stop_rate_timer(State = #state { rate_timer_ref = just_measured }) -> State #state { rate_timer_ref = undefined }; stop_rate_timer(State = #state { rate_timer_ref = TRef }) -> - {ok, cancel} = timer:cancel(TRef), + erlang:cancel_timer(TRef), State #state { rate_timer_ref = undefined }. ensure_monitoring(ChPid, State = #state { known_senders = KS }) -> @@ -748,7 +786,7 @@ process_instruction({set_length, Length}, backing_queue_state = BQS }) -> QLen = BQ:len(BQS), ToDrop = QLen - Length, - {ok, case ToDrop > 0 of + {ok, case ToDrop >= 0 of true -> BQS1 = lists:foldl( fun (const, BQSN) -> @@ -756,7 +794,8 @@ process_instruction({set_length, Length}, BQSN1} = BQ:fetch(false, BQSN), BQSN1 end, BQS, lists:duplicate(ToDrop, const)), - State #state { backing_queue_state = BQS1 }; + set_synchronised( + true, State #state { backing_queue_state = BQS1 }); false -> State end}; process_instruction({fetch, AckRequired, MsgId, Remaining}, @@ -769,6 +808,8 @@ process_instruction({fetch, AckRequired, MsgId, Remaining}, AckTag, Remaining}, BQS1} = BQ:fetch(AckRequired, BQS), maybe_store_ack(AckRequired, MsgId, AckTag, State #state { backing_queue_state = BQS1 }); + Other when Other + 1 =:= Remaining -> + set_synchronised(true, State); Other when Other < Remaining -> %% we must be shorter than the master State @@ -821,6 +862,10 @@ process_instruction({sender_death, ChPid}, msg_id_status = MS1, known_senders = dict:erase(ChPid, KS) } end}; +process_instruction({length, Length}, + State = #state { backing_queue = BQ, + backing_queue_state = BQS }) -> + {ok, set_synchronised(Length =:= BQ:len(BQS), State)}; process_instruction({delete_and_terminate, Reason}, State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> @@ -848,3 +893,15 @@ maybe_store_ack(true, MsgId, AckTag, State = #state { msg_id_ack = MA, ack_num = Num }) -> State #state { msg_id_ack = dict:store(MsgId, {Num, AckTag}, MA), ack_num = Num + 1 }. + +%% We intentionally leave out the head where a slave becomes +%% unsynchronised: we assert that can never happen. +set_synchronised(true, State = #state { q = #amqqueue { name = QName }, + synchronised = false }) -> + rabbit_event:notify(queue_slave_synchronised, [{pid, self()}, + {name, QName}]), + State #state { synchronised = true }; +set_synchronised(true, State) -> + State; +set_synchronised(false, State = #state { synchronised = false }) -> + State. diff --git a/src/rabbit_mirror_queue_slave_sup.erl b/src/rabbit_mirror_queue_slave_sup.erl index 2ce5941e..fc04ec79 100644 --- a/src/rabbit_mirror_queue_slave_sup.erl +++ b/src/rabbit_mirror_queue_slave_sup.erl @@ -11,23 +11,11 @@ %% The Original Code is RabbitMQ. %% %% The Initial Developer of the Original Code is VMware, Inc. -%% Copyright (c) 2007-2010 VMware, Inc. All rights reserved. +%% Copyright (c) 2010-2011 VMware, Inc. All rights reserved. %% -module(rabbit_mirror_queue_slave_sup). --rabbit_boot_step({mirror_queue_slave_sup, - [{description, "mirror queue slave sup"}, - {mfa, {rabbit_mirror_queue_slave_sup, start, []}}, - {requires, recovery}, - {enables, routing_ready}]}). - --rabbit_boot_step({mirrored_queues, - [{description, "adding mirrors to queues"}, - {mfa, {rabbit_mirror_queue_misc, on_node_up, []}}, - {requires, mirror_queue_slave_sup}, - {enables, routing_ready}]}). - -behaviour(supervisor2). -export([start/0, start_link/0, start_child/2]). diff --git a/src/rabbit_misc.erl b/src/rabbit_misc.erl index b6b97f6d..ae28722a 100644 --- a/src/rabbit_misc.erl +++ b/src/rabbit_misc.erl @@ -42,7 +42,7 @@ -export([dirty_read_all/1, dirty_foreach_key/2, dirty_dump_log/1]). -export([read_term_file/1, write_term_file/2, write_file/2, write_file/3]). -export([append_file/2, ensure_parent_dirs_exist/1]). --export([format_stderr/2]). +-export([format_stderr/2, with_local_io/1]). -export([start_applications/1, stop_applications/1]). -export([unfold/2, ceil/1, queue_fold/3]). -export([sort_field_table/1]). @@ -57,6 +57,8 @@ -export([ntoa/1, ntoab/1]). -export([is_process_alive/1]). -export([pget/2, pget/3, pget_or_die/2]). +-export([format_message_queue/2]). +-export([append_rpc_all_nodes/4]). %%---------------------------------------------------------------------------- @@ -164,6 +166,7 @@ -spec(append_file/2 :: (file:filename(), string()) -> ok_or_error()). -spec(ensure_parent_dirs_exist/1 :: (string()) -> 'ok'). -spec(format_stderr/2 :: (string(), [any()]) -> 'ok'). +-spec(with_local_io/1 :: (fun (() -> A)) -> A). -spec(start_applications/1 :: ([atom()]) -> 'ok'). -spec(stop_applications/1 :: ([atom()]) -> 'ok'). -spec(unfold/2 :: (fun ((A) -> ({'true', B, A} | 'false')), A) -> {[B], A}). @@ -205,6 +208,8 @@ -spec(pget/2 :: (term(), [term()]) -> term()). -spec(pget/3 :: (term(), [term()], term()) -> term()). -spec(pget_or_die/2 :: (term(), [term()]) -> term() | no_return()). +-spec(format_message_queue/2 :: (any(), priority_queue:q()) -> term()). +-spec(append_rpc_all_nodes/4 :: ([node()], atom(), atom(), [any()]) -> [any()]). -endif. @@ -603,6 +608,17 @@ format_stderr(Fmt, Args) -> end, ok. +%% Execute Fun using the IO system of the local node (i.e. the node on +%% which the code is executing). +with_local_io(Fun) -> + GL = group_leader(), + group_leader(whereis(user), self()), + try + Fun() + after + group_leader(GL, self()) + end. + manage_applications(Iterate, Do, Undo, SkipError, ErrorTag, Apps) -> Iterate(fun (App, Acc) -> case Do(App) of @@ -919,3 +935,31 @@ pget_or_die(K, P) -> undefined -> exit({error, key_missing, K}); V -> V end. + +format_message_queue(_Opt, MQ) -> + Len = priority_queue:len(MQ), + {Len, + case Len > 100 of + false -> priority_queue:to_list(MQ); + true -> {summary, + orddict:to_list( + lists:foldl( + fun ({P, V}, Counts) -> + orddict:update_counter( + {P, format_message_queue_entry(V)}, 1, Counts) + end, orddict:new(), priority_queue:to_list(MQ)))} + end}. + +format_message_queue_entry(V) when is_atom(V) -> + V; +format_message_queue_entry(V) when is_tuple(V) -> + list_to_tuple([format_message_queue_entry(E) || E <- tuple_to_list(V)]); +format_message_queue_entry(_V) -> + '_'. + +append_rpc_all_nodes(Nodes, M, F, A) -> + {ResL, _} = rpc:multicall(Nodes, M, F, A), + lists:append([case Res of + {badrpc, _} -> []; + _ -> Res + end || Res <- ResL]). diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl index 463da98c..c63c67f4 100644 --- a/src/rabbit_mnesia.erl +++ b/src/rabbit_mnesia.erl @@ -23,7 +23,8 @@ empty_ram_only_tables/0, copy_db/1, wait_for_tables/1, create_cluster_nodes_config/1, read_cluster_nodes_config/0, record_running_nodes/0, read_previously_running_nodes/0, - delete_previously_running_nodes/0, running_nodes_filename/0]). + delete_previously_running_nodes/0, running_nodes_filename/0, + is_disc_node/0, on_node_down/1, on_node_up/1]). -export([table_names/0]). @@ -65,6 +66,9 @@ -spec(read_previously_running_nodes/0 :: () -> [node()]). -spec(delete_previously_running_nodes/0 :: () -> 'ok'). -spec(running_nodes_filename/0 :: () -> file:filename()). +-spec(is_disc_node/0 :: () -> boolean()). +-spec(on_node_up/1 :: (node()) -> 'ok'). +-spec(on_node_down/1 :: (node()) -> 'ok'). -endif. @@ -83,7 +87,9 @@ status() -> no -> case all_clustered_nodes() of [] -> []; Nodes -> [{unknown, Nodes}] - end + end; + Reason when Reason =:= starting; Reason =:= stopping -> + exit({rabbit_busy, try_again_later}) end}, {running_nodes, running_clustered_nodes()}]. @@ -115,14 +121,58 @@ force_cluster(ClusterNodes) -> cluster(ClusterNodes, Force) -> ensure_mnesia_not_running(), ensure_mnesia_dir(), - rabbit_misc:ensure_ok(mnesia:start(), cannot_start_mnesia), + + case not Force andalso is_only_disc_node(node(), false) andalso + not should_be_disc_node(ClusterNodes) of + true -> log_both("last running disc node leaving cluster"); + _ -> ok + end, + + %% Wipe mnesia if we're changing type from disc to ram + case {is_disc_node(), should_be_disc_node(ClusterNodes)} of + {true, false} -> rabbit_misc:with_local_io( + fun () -> error_logger:warning_msg( + "changing node type; wiping " + "mnesia...~n~n") + end), + rabbit_misc:ensure_ok(mnesia:delete_schema([node()]), + cannot_delete_schema); + _ -> ok + end, + + %% Pre-emptively leave the cluster + %% + %% We're trying to handle the following two cases: + %% 1. We have a two-node cluster, where both nodes are disc nodes. + %% One node is re-clustered as a ram node. When it tries to + %% re-join the cluster, but before it has time to update its + %% tables definitions, the other node will order it to re-create + %% its disc tables. So, we need to leave the cluster before we + %% can join it again. + %% 2. We have a two-node cluster, where both nodes are disc nodes. + %% One node is forcefully reset (so, the other node thinks its + %% still a part of the cluster). The reset node is re-clustered + %% as a ram node. Same as above, we need to leave the cluster + %% before we can join it. But, since we don't know if we're in a + %% cluster or not, we just pre-emptively leave it before joining. + ProperClusterNodes = ClusterNodes -- [node()], + try + ok = leave_cluster(ProperClusterNodes, ProperClusterNodes) + catch + {error, {no_running_cluster_nodes, _, _}} when Force -> + ok + end, + + %% Join the cluster + start_mnesia(), try ok = init_db(ClusterNodes, Force, fun maybe_upgrade_local_or_record_desired/0), ok = create_cluster_nodes_config(ClusterNodes) after - mnesia:stop() + stop_mnesia() end, + ok. %% return node to its virgin state, where it is not member of any @@ -158,10 +208,13 @@ nodes_of_type(Type) -> %% This function should return the nodes of a certain type (ram, %% disc or disc_only) in the current cluster. The type of nodes %% is determined when the cluster is initially configured. - %% Specifically, we check whether a certain table, which we know - %% will be written to disk on a disc node, is stored on disk or in - %% RAM. - mnesia:table_info(rabbit_durable_exchange, Type). + mnesia:table_info(schema, Type). + +%% The tables aren't supposed to be on disk on a ram node +table_definitions(disc) -> + table_definitions(); +table_definitions(ram) -> + [{Tab, copy_type_to_ram(TabDef)} || {Tab, TabDef} <- table_definitions()]. table_definitions() -> [{rabbit_user, @@ -218,8 +271,6 @@ table_definitions() -> {type, ordered_set}, {match, #topic_trie_binding{trie_binding = trie_binding_match(), _='_'}}]}, - %% Consider the implications to nodes_of_type/1 before altering - %% the next entry. {rabbit_durable_exchange, [{record_name, exchange}, {attributes, record_info(fields, exchange)}, @@ -289,14 +340,24 @@ ensure_mnesia_dir() -> ensure_mnesia_running() -> case mnesia:system_info(is_running) of - yes -> ok; - no -> throw({error, mnesia_not_running}) + yes -> + ok; + starting -> + wait_for(mnesia_running), + ensure_mnesia_running(); + Reason when Reason =:= no; Reason =:= stopping -> + throw({error, mnesia_not_running}) end. ensure_mnesia_not_running() -> case mnesia:system_info(is_running) of - no -> ok; - yes -> throw({error, mnesia_unexpectedly_running}) + no -> + ok; + stopping -> + wait_for(mnesia_not_running), + ensure_mnesia_not_running(); + Reason when Reason =:= yes; Reason =:= starting -> + throw({error, mnesia_unexpectedly_running}) end. ensure_schema_integrity() -> @@ -342,7 +403,11 @@ check_table_content(Tab, TabDef) -> end. check_tables(Fun) -> - case [Error || {Tab, TabDef} <- table_definitions(), + case [Error || {Tab, TabDef} <- table_definitions( + case is_disc_node() of + true -> disc; + false -> ram + end), case Fun(Tab, TabDef) of ok -> Error = none, false; {error, Error} -> true @@ -443,30 +508,47 @@ init_db(ClusterNodes, Force, SecondaryPostMnesiaFun) -> end; true -> ok end, - case {Nodes, mnesia:system_info(use_dir)} of - {[], false} -> + WantDiscNode = should_be_disc_node(ClusterNodes), + WasDiscNode = is_disc_node(), + %% We create a new db (on disk, or in ram) in the first + %% two cases and attempt to upgrade the in the other two + case {Nodes, WasDiscNode, WantDiscNode} of + {[], _, false} -> + %% New ram node; start from scratch + ok = create_schema(ram); + {[], false, true} -> %% Nothing there at all, start from scratch - ok = create_schema(); - {[], true} -> + ok = create_schema(disc); + {[], true, true} -> %% We're the first node up case rabbit_upgrade:maybe_upgrade_local() of ok -> ensure_schema_integrity(); version_not_available -> ok = schema_ok_or_move() - end, - ok; - {[AnotherNode|_], _} -> + end; + {[AnotherNode|_], _, _} -> %% Subsequent node in cluster, catch up ensure_version_ok( rpc:call(AnotherNode, rabbit_version, recorded, [])), - IsDiskNode = ClusterNodes == [] orelse - lists:member(node(), ClusterNodes), + {CopyType, CopyTypeAlt} = + case WantDiscNode of + true -> {disc, disc_copies}; + false -> {ram, ram_copies} + end, ok = wait_for_replicated_tables(), - ok = create_local_table_copy(schema, disc_copies), - ok = create_local_table_copies(case IsDiskNode of - true -> disc; - false -> ram - end), + ok = create_local_table_copy(schema, CopyTypeAlt), + ok = create_local_table_copies(CopyType), + ok = SecondaryPostMnesiaFun(), + %% We've taken down mnesia, so ram nodes will need + %% to re-sync + case is_disc_node() of + false -> start_mnesia(), + mnesia:change_config(extra_db_nodes, + ProperClusterNodes), + wait_for_replicated_tables(); + true -> ok + end, + ensure_schema_integrity(), ok end; @@ -497,7 +579,7 @@ schema_ok_or_move() -> "and recreating schema from scratch~n", [Reason]), ok = move_db(), - ok = create_schema() + ok = create_schema(disc) end. ensure_version_ok({ok, DiscVersion}) -> @@ -509,18 +591,27 @@ ensure_version_ok({ok, DiscVersion}) -> ensure_version_ok({error, _}) -> ok = rabbit_version:record_desired(). -create_schema() -> - mnesia:stop(), - rabbit_misc:ensure_ok(mnesia:create_schema([node()]), - cannot_create_schema), - rabbit_misc:ensure_ok(mnesia:start(), - cannot_start_mnesia), - ok = create_tables(), +create_schema(Type) -> + stop_mnesia(), + case Type of + disc -> rabbit_misc:ensure_ok(mnesia:create_schema([node()]), + cannot_create_schema); + ram -> %% remove the disc schema since this is a ram node + rabbit_misc:ensure_ok(mnesia:delete_schema([node()]), + cannot_delete_schema) + end, + start_mnesia(), + ok = create_tables(Type), ensure_schema_integrity(), ok = rabbit_version:record_desired(). +is_disc_node() -> mnesia:system_info(use_dir). + +should_be_disc_node(ClusterNodes) -> + ClusterNodes == [] orelse lists:member(node(), ClusterNodes). + move_db() -> - mnesia:stop(), + stop_mnesia(), MnesiaDir = filename:dirname(dir() ++ "/"), {{Year, Month, Day}, {Hour, Minute, Second}} = erlang:universaltime(), BackupDir = lists:flatten( @@ -538,14 +629,16 @@ move_db() -> MnesiaDir, BackupDir, Reason}}) end, ensure_mnesia_dir(), - rabbit_misc:ensure_ok(mnesia:start(), cannot_start_mnesia), + start_mnesia(), ok. copy_db(Destination) -> ok = ensure_mnesia_not_running(), rabbit_misc:recursive_copy(dir(), Destination). -create_tables() -> +create_tables() -> create_tables(disc). + +create_tables(Type) -> lists:foreach(fun ({Tab, TabDef}) -> TabDef1 = proplists:delete(match, TabDef), case mnesia:create_table(Tab, TabDef1) of @@ -555,9 +648,13 @@ create_tables() -> Tab, TabDef1, Reason}}) end end, - table_definitions()), + table_definitions(Type)), ok. +copy_type_to_ram(TabDef) -> + [{disc_copies, []}, {ram_copies, [node()]} + | proplists:delete(ram_copies, proplists:delete(disc_copies, TabDef))]. + table_has_copy_type(TabDef, DiscType) -> lists:member(node(), proplists:get_value(DiscType, TabDef, [])). @@ -587,7 +684,7 @@ create_local_table_copies(Type) -> end, ok = create_local_table_copy(Tab, StorageType) end, - table_definitions()), + table_definitions(Type)), ok. create_local_table_copy(Tab, Type) -> @@ -618,19 +715,23 @@ wait_for_tables(TableNames) -> reset(Force) -> ensure_mnesia_not_running(), + case not Force andalso is_only_disc_node(node(), false) of + true -> log_both("no other disc nodes running"); + false -> ok + end, Node = node(), case Force of true -> ok; false -> ensure_mnesia_dir(), - rabbit_misc:ensure_ok(mnesia:start(), cannot_start_mnesia), + start_mnesia(), {Nodes, RunningNodes} = try ok = init(), {all_clustered_nodes() -- [Node], running_clustered_nodes() -- [Node]} after - mnesia:stop() + stop_mnesia() end, leave_cluster(Nodes, RunningNodes), rabbit_misc:ensure_ok(mnesia:delete_schema([Node]), @@ -653,6 +754,7 @@ leave_cluster(Nodes, RunningNodes) -> [schema, node()]) of {atomic, ok} -> true; {badrpc, nodedown} -> false; + {aborted, {node_not_running, _}} -> false; {aborted, Reason} -> throw({error, {failed_to_leave_cluster, Nodes, RunningNodes, Reason}}) @@ -663,3 +765,48 @@ leave_cluster(Nodes, RunningNodes) -> false -> throw({error, {no_running_cluster_nodes, Nodes, RunningNodes}}) end. + +wait_for(Condition) -> + error_logger:info_msg("Waiting for ~p...~n", [Condition]), + timer:sleep(1000). + +on_node_up(Node) -> + case is_only_disc_node(Node, true) of + true -> rabbit_misc:with_local_io( + fun () -> rabbit_log:info("cluster contains disc " + "nodes again~n") + end); + false -> ok + end. + +on_node_down(Node) -> + case is_only_disc_node(Node, true) of + true -> rabbit_misc:with_local_io( + fun () -> rabbit_log:info("only running disc node " + "went down~n") + end); + false -> ok + end. + +is_only_disc_node(Node, _MnesiaRunning = true) -> + RunningSet = sets:from_list(running_clustered_nodes()), + DiscSet = sets:from_list(nodes_of_type(disc_copies)), + [Node] =:= sets:to_list(sets:intersection(RunningSet, DiscSet)); +is_only_disc_node(Node, false) -> + start_mnesia(), + Res = is_only_disc_node(Node, true), + stop_mnesia(), + Res. + +log_both(Warning) -> + io:format("Warning: ~s~n", [Warning]), + rabbit_misc:with_local_io( + fun () -> error_logger:warning_msg("~s~n", [Warning]) end). + +start_mnesia() -> + rabbit_misc:ensure_ok(mnesia:start(), cannot_start_mnesia), + ensure_mnesia_running(). + +stop_mnesia() -> + stopped = mnesia:stop(), + ensure_mnesia_not_running(). diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl index 3f4162cd..17d5f64b 100644 --- a/src/rabbit_msg_store.erl +++ b/src/rabbit_msg_store.erl @@ -21,21 +21,22 @@ -export([start_link/4, successfully_recovered_state/1, client_init/4, client_terminate/1, client_delete_and_terminate/1, client_ref/1, close_all_indicated/1, - write/3, read/2, contains/2, remove/2, sync/3]). + write/3, read/2, contains/2, remove/2]). --export([sync/1, set_maximum_since_use/2, - has_readers/2, combine_files/3, delete_file/2]). %% internal +-export([set_maximum_since_use/2, has_readers/2, combine_files/3, + delete_file/2]). %% internal -export([transform_dir/3, force_recovery/2]). %% upgrade --export([init/1, handle_call/3, handle_cast/2, handle_info/2, - terminate/2, code_change/3, prioritise_call/3, prioritise_cast/2]). +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, + code_change/3, prioritise_call/3, prioritise_cast/2, + prioritise_info/2, format_message_queue/2]). %%---------------------------------------------------------------------------- -include("rabbit_msg_store.hrl"). --define(SYNC_INTERVAL, 5). %% milliseconds +-define(SYNC_INTERVAL, 25). %% milliseconds -define(CLEAN_FILENAME, "clean.dot"). -define(FILE_SUMMARY_FILENAME, "file_summary.ets"). -define(TRANSFORM_TMP, "transform_tmp"). @@ -59,7 +60,6 @@ current_file, %% current file name as number current_file_handle, %% current file handle since the last fsync? file_handle_cache, %% file handle cache - on_sync, %% pending sync requests sync_timer_ref, %% TRef for our interval timer sum_valid_data, %% sum of valid data in all files sum_file_size, %% sum of file sizes @@ -132,7 +132,8 @@ -type(msg_ref_delta_gen(A) :: fun ((A) -> 'finished' | {rabbit_types:msg_id(), non_neg_integer(), A})). --type(maybe_msg_id_fun() :: 'undefined' | fun ((gb_set()) -> any())). +-type(maybe_msg_id_fun() :: + 'undefined' | fun ((gb_set(), 'written' | 'removed') -> any())). -type(maybe_close_fds_fun() :: 'undefined' | fun (() -> 'ok')). -type(deletion_thunk() :: fun (() -> boolean())). @@ -150,10 +151,7 @@ {rabbit_types:ok(msg()) | 'not_found', client_msstate()}). -spec(contains/2 :: (rabbit_types:msg_id(), client_msstate()) -> boolean()). -spec(remove/2 :: ([rabbit_types:msg_id()], client_msstate()) -> 'ok'). --spec(sync/3 :: - ([rabbit_types:msg_id()], fun (() -> any()), client_msstate()) -> 'ok'). --spec(sync/1 :: (server()) -> 'ok'). -spec(set_maximum_since_use/2 :: (server(), non_neg_integer()) -> 'ok'). -spec(has_readers/2 :: (non_neg_integer(), gc_state()) -> boolean()). -spec(combine_files/3 :: (non_neg_integer(), non_neg_integer(), gc_state()) -> @@ -441,10 +439,6 @@ contains(MsgId, CState) -> server_call(CState, {contains, MsgId}). remove([], _CState) -> ok; remove(MsgIds, CState = #client_msstate { client_ref = CRef }) -> server_cast(CState, {remove, CRef, MsgIds}). -sync(MsgIds, K, CState) -> server_cast(CState, {sync, MsgIds, K}). - -sync(Server) -> - gen_server2:cast(Server, sync). set_maximum_since_use(Server, Age) -> gen_server2:cast(Server, {set_maximum_since_use, Age}). @@ -641,7 +635,6 @@ init([Server, BaseDir, ClientRefs, StartupFunState]) -> current_file = 0, current_file_handle = undefined, file_handle_cache = dict:new(), - on_sync = [], sync_timer_ref = undefined, sum_valid_data = 0, sum_file_size = 0, @@ -682,7 +675,6 @@ prioritise_call(Msg, _From, _State) -> prioritise_cast(Msg, _State) -> case Msg of - sync -> 8; {combine_files, _Source, _Destination, _Reclaimed} -> 8; {delete_file, _File, _Reclaimed} -> 8; {set_maximum_since_use, _Age} -> 8; @@ -690,6 +682,12 @@ prioritise_cast(Msg, _State) -> _ -> 0 end. +prioritise_info(Msg, _State) -> + case Msg of + sync -> 8; + _ -> 0 + end. + handle_call(successfully_recovered_state, _From, State) -> reply(State #msstate.successfully_recovered, State); @@ -758,24 +756,6 @@ handle_cast({remove, CRef, MsgIds}, State) -> noreply(maybe_compact(client_confirm(CRef, gb_sets:from_list(MsgIds), removed, State1))); -handle_cast({sync, MsgIds, K}, - State = #msstate { current_file = CurFile, - current_file_handle = CurHdl, - on_sync = Syncs }) -> - {ok, SyncOffset} = file_handle_cache:last_sync_offset(CurHdl), - case lists:any(fun (MsgId) -> - #msg_location { file = File, offset = Offset } = - index_lookup(MsgId, State), - File =:= CurFile andalso Offset >= SyncOffset - end, MsgIds) of - false -> K(), - noreply(State); - true -> noreply(State #msstate { on_sync = [K | Syncs] }) - end; - -handle_cast(sync, State) -> - noreply(internal_sync(State)); - handle_cast({combine_files, Source, Destination, Reclaimed}, State = #msstate { sum_file_size = SumFileSize, file_handles_ets = FileHandlesEts, @@ -799,6 +779,9 @@ handle_cast({set_maximum_since_use, Age}, State) -> ok = file_handle_cache:set_maximum_since_use(Age), noreply(State). +handle_info(sync, State) -> + noreply(internal_sync(State)); + handle_info(timeout, State) -> noreply(internal_sync(State)); @@ -836,6 +819,8 @@ terminate(_Reason, State = #msstate { index_state = IndexState, code_change(_OldVsn, State, _Extra) -> {ok, State}. +format_message_queue(Opt, MQ) -> rabbit_misc:format_message_queue(Opt, MQ). + %%---------------------------------------------------------------------------- %% general helper functions %%---------------------------------------------------------------------------- @@ -849,31 +834,28 @@ reply(Reply, State) -> {reply, Reply, State1, Timeout}. next_state(State = #msstate { sync_timer_ref = undefined, - on_sync = Syncs, cref_to_msg_ids = CTM }) -> - case {Syncs, dict:size(CTM)} of - {[], 0} -> {State, hibernate}; - _ -> {start_sync_timer(State), 0} + case dict:size(CTM) of + 0 -> {State, hibernate}; + _ -> {start_sync_timer(State), 0} end; -next_state(State = #msstate { on_sync = Syncs, - cref_to_msg_ids = CTM }) -> - case {Syncs, dict:size(CTM)} of - {[], 0} -> {stop_sync_timer(State), hibernate}; - _ -> {State, 0} +next_state(State = #msstate { cref_to_msg_ids = CTM }) -> + case dict:size(CTM) of + 0 -> {stop_sync_timer(State), hibernate}; + _ -> {State, 0} end. start_sync_timer(State = #msstate { sync_timer_ref = undefined }) -> - {ok, TRef} = timer:apply_after(?SYNC_INTERVAL, ?MODULE, sync, [self()]), + TRef = erlang:send_after(?SYNC_INTERVAL, self(), sync), State #msstate { sync_timer_ref = TRef }. stop_sync_timer(State = #msstate { sync_timer_ref = undefined }) -> State; stop_sync_timer(State = #msstate { sync_timer_ref = TRef }) -> - {ok, cancel} = timer:cancel(TRef), + erlang:cancel_timer(TRef), State #msstate { sync_timer_ref = undefined }. internal_sync(State = #msstate { current_file_handle = CurHdl, - on_sync = Syncs, cref_to_msg_ids = CTM }) -> State1 = stop_sync_timer(State), CGs = dict:fold(fun (CRef, MsgIds, NS) -> @@ -882,16 +864,13 @@ internal_sync(State = #msstate { current_file_handle = CurHdl, false -> [{CRef, MsgIds} | NS] end end, [], CTM), - ok = case {Syncs, CGs} of - {[], []} -> ok; - _ -> file_handle_cache:sync(CurHdl) + ok = case CGs of + [] -> ok; + _ -> file_handle_cache:sync(CurHdl) end, - [K() || K <- lists:reverse(Syncs)], - State2 = lists:foldl( - fun ({CRef, MsgIds}, StateN) -> - client_confirm(CRef, MsgIds, written, StateN) - end, State1, CGs), - State2 #msstate { on_sync = [] }. + lists:foldl(fun ({CRef, MsgIds}, StateN) -> + client_confirm(CRef, MsgIds, written, StateN) + end, State1, CGs). write_action({true, not_found}, _MsgId, State) -> {ignore, undefined, State}; diff --git a/src/rabbit_networking.erl b/src/rabbit_networking.erl index 451e56e8..b2abcba6 100644 --- a/src/rabbit_networking.erl +++ b/src/rabbit_networking.erl @@ -21,7 +21,7 @@ node_listeners/1, connections/0, connection_info_keys/0, connection_info/1, connection_info/2, connection_info_all/0, connection_info_all/1, - close_connection/2]). + close_connection/2, force_connection_event_refresh/0]). %%used by TCP-based transports, e.g. STOMP adapter -export([check_tcp_listener_address/2, @@ -30,6 +30,9 @@ -export([tcp_listener_started/3, tcp_listener_stopped/3, start_client/1, start_ssl_client/2]). +%% Internal +-export([connections_local/0]). + -include("rabbit.hrl"). -include_lib("kernel/include/inet.hrl"). @@ -59,6 +62,7 @@ -spec(active_listeners/0 :: () -> [rabbit_types:listener()]). -spec(node_listeners/1 :: (node()) -> [rabbit_types:listener()]). -spec(connections/0 :: () -> [rabbit_types:connection()]). +-spec(connections_local/0 :: () -> [rabbit_types:connection()]). -spec(connection_info_keys/0 :: () -> rabbit_types:info_keys()). -spec(connection_info/1 :: (rabbit_types:connection()) -> rabbit_types:infos()). @@ -69,6 +73,8 @@ -spec(connection_info_all/1 :: (rabbit_types:info_keys()) -> [rabbit_types:infos()]). -spec(close_connection/2 :: (pid(), string()) -> 'ok'). +-spec(force_connection_event_refresh/0 :: () -> 'ok'). + -spec(on_node_down/1 :: (node()) -> 'ok'). -spec(check_tcp_listener_address/2 :: (atom(), listener_config()) -> [{inet:ip_address(), ip_port(), family(), atom()}]). @@ -270,10 +276,13 @@ start_ssl_client(SslOpts, Sock) -> start_client(Sock, ssl_transform_fun(SslOpts)). connections() -> + rabbit_misc:append_rpc_all_nodes(rabbit_mnesia:running_clustered_nodes(), + rabbit_networking, connections_local, []). + +connections_local() -> [rabbit_connection_sup:reader(ConnSup) || - Node <- rabbit_mnesia:running_clustered_nodes(), {_, ConnSup, supervisor, _} - <- supervisor:which_children({rabbit_tcp_client_sup, Node})]. + <- supervisor:which_children(rabbit_tcp_client_sup)]. connection_info_keys() -> rabbit_reader:info_keys(). @@ -289,6 +298,10 @@ close_connection(Pid, Explanation) -> false -> throw({error, {not_a_connection_pid, Pid}}) end. +force_connection_event_refresh() -> + [rabbit_reader:force_event_refresh(C) || C <- connections()], + ok. + %%-------------------------------------------------------------------- tcp_host({0,0,0,0}) -> diff --git a/src/rabbit_node_monitor.erl b/src/rabbit_node_monitor.erl index 1f30a2fc..cb4f826d 100644 --- a/src/rabbit_node_monitor.erl +++ b/src/rabbit_node_monitor.erl @@ -60,24 +60,19 @@ notify_cluster() -> %%-------------------------------------------------------------------- init([]) -> - ok = net_kernel:monitor_nodes(true), {ok, no_state}. handle_call(_Request, _From, State) -> {noreply, State}. handle_cast({rabbit_running_on, Node}, State) -> - rabbit_log:info("node ~p up~n", [Node]), + rabbit_log:info("rabbit on ~p up~n", [Node]), erlang:monitor(process, {rabbit, Node}), - ok = rabbit_alarm:on_node_up(Node), + ok = handle_live_rabbit(Node), {noreply, State}; handle_cast(_Msg, State) -> {noreply, State}. -handle_info({nodedown, Node}, State) -> - rabbit_log:info("node ~p down~n", [Node]), - ok = handle_dead_rabbit(Node), - {noreply, State}; handle_info({'DOWN', _MRef, process, {rabbit, Node}, _Reason}, State) -> rabbit_log:info("node ~p lost 'rabbit'~n", [Node]), ok = handle_dead_rabbit(Node), @@ -99,4 +94,9 @@ code_change(_OldVsn, State, _Extra) -> handle_dead_rabbit(Node) -> ok = rabbit_networking:on_node_down(Node), ok = rabbit_amqqueue:on_node_down(Node), - ok = rabbit_alarm:on_node_down(Node). + ok = rabbit_alarm:on_node_down(Node), + ok = rabbit_mnesia:on_node_down(Node). + +handle_live_rabbit(Node) -> + ok = rabbit_alarm:on_node_up(Node), + ok = rabbit_mnesia:on_node_up(Node). diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl index bf89cdb2..636913b5 100644 --- a/src/rabbit_queue_index.erl +++ b/src/rabbit_queue_index.erl @@ -569,13 +569,13 @@ add_to_journal(SeqId, Action, State = #qistate { dirty_count = DCount, add_to_journal(RelSeq, Action, Segment = #segment { journal_entries = JEntries, unacked = UnackedCount }) -> - Segment1 = Segment #segment { - journal_entries = add_to_journal(RelSeq, Action, JEntries) }, - case Action of - del -> Segment1; - ack -> Segment1 #segment { unacked = UnackedCount - 1 }; - ?PUB -> Segment1 #segment { unacked = UnackedCount + 1 } - end; + Segment #segment { + journal_entries = add_to_journal(RelSeq, Action, JEntries), + unacked = UnackedCount + case Action of + ?PUB -> +1; + del -> 0; + ack -> -1 + end}; add_to_journal(RelSeq, Action, JEntries) -> Val = case array:get(RelSeq, JEntries) of @@ -1013,7 +1013,7 @@ add_queue_ttl_segment(<<?PUB_PREFIX:?PUB_PREFIX_BITS, IsPersistentNum:1, {[<<?PUB_PREFIX:?PUB_PREFIX_BITS, IsPersistentNum:1, RelSeq:?REL_SEQ_BITS>>, MsgId, expiry_to_binary(undefined)], Rest}; add_queue_ttl_segment(<<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS, - RelSeq:?REL_SEQ_BITS, Rest>>) -> + RelSeq:?REL_SEQ_BITS, Rest/binary>>) -> {<<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS, RelSeq:?REL_SEQ_BITS>>, Rest}; add_queue_ttl_segment(_) -> diff --git a/src/rabbit_reader.erl b/src/rabbit_reader.erl index dffabf85..7eec2a2e 100644 --- a/src/rabbit_reader.erl +++ b/src/rabbit_reader.erl @@ -18,7 +18,8 @@ -include("rabbit_framing.hrl"). -include("rabbit.hrl"). --export([start_link/3, info_keys/0, info/1, info/2, shutdown/2]). +-export([start_link/3, info_keys/0, info/1, info/2, force_event_refresh/1, + shutdown/2]). -export([system_continue/3, system_terminate/4, system_code_change/4]). @@ -28,8 +29,6 @@ -export([process_channel_frame/5]). %% used by erlang-client --export([emit_stats/1]). - -define(HANDSHAKE_TIMEOUT, 10). -define(NORMAL_TIMEOUT, 3). -define(CLOSING_TIMEOUT, 1). @@ -70,7 +69,7 @@ -spec(info_keys/0 :: () -> rabbit_types:info_keys()). -spec(info/1 :: (pid()) -> rabbit_types:infos()). -spec(info/2 :: (pid(), rabbit_types:info_keys()) -> rabbit_types:infos()). --spec(emit_stats/1 :: (pid()) -> 'ok'). +-spec(force_event_refresh/1 :: (pid()) -> 'ok'). -spec(shutdown/2 :: (pid(), string()) -> 'ok'). -spec(conserve_memory/2 :: (pid(), boolean()) -> 'ok'). -spec(server_properties/1 :: (rabbit_types:protocol()) -> @@ -126,8 +125,8 @@ info(Pid, Items) -> {error, Error} -> throw(Error) end. -emit_stats(Pid) -> - gen_server:cast(Pid, emit_stats). +force_event_refresh(Pid) -> + gen_server:cast(Pid, force_event_refresh). conserve_memory(Pid, Conserve) -> Pid ! {conserve_memory, Conserve}, @@ -323,8 +322,12 @@ handle_other({'$gen_call', From, {info, Items}}, Deb, State) -> catch Error -> {error, Error} end), mainloop(Deb, State); -handle_other({'$gen_cast', emit_stats}, Deb, State) -> - mainloop(Deb, internal_emit_stats(State)); +handle_other({'$gen_cast', force_event_refresh}, Deb, State) -> + rabbit_event:notify(connection_created, + [{type, network} | infos(?CREATION_EVENT_KEYS, State)]), + mainloop(Deb, State); +handle_other(emit_stats, Deb, State) -> + mainloop(Deb, emit_stats(State)); handle_other({system, From, Request}, Deb, State = #v1{parent = Parent}) -> sys:handle_system_msg(Request, From, Parent, ?MODULE, Deb, State); handle_other(Other, _Deb, _State) -> @@ -591,10 +594,8 @@ refuse_connection(Sock, Exception) -> ensure_stats_timer(State = #v1{stats_timer = StatsTimer, connection_state = running}) -> - Self = self(), State#v1{stats_timer = rabbit_event:ensure_stats_timer( - StatsTimer, - fun() -> emit_stats(Self) end)}; + StatsTimer, self(), emit_stats)}; ensure_stats_timer(State) -> State. @@ -694,7 +695,7 @@ handle_method0(#'connection.open'{virtual_host = VHostPath}, [{type, network} | infos(?CREATION_EVENT_KEYS, State1)]), rabbit_event:if_enabled(StatsTimer, - fun() -> internal_emit_stats(State1) end), + fun() -> emit_stats(State1) end), State1; handle_method0(#'connection.close'{}, State) when ?IS_RUNNING(State) -> lists:foreach(fun rabbit_channel:shutdown/1, all_channels()), @@ -923,6 +924,6 @@ send_exception(State = #v1{connection = #connection{protocol = Protocol}}, State1#v1.sock, 0, CloseMethod, Protocol), State1. -internal_emit_stats(State = #v1{stats_timer = StatsTimer}) -> +emit_stats(State = #v1{stats_timer = StatsTimer}) -> rabbit_event:notify(connection_stats, infos(?STATISTICS_KEYS, State)), State#v1{stats_timer = rabbit_event:reset_stats_timer(StatsTimer)}. diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl index 4065cdd1..bbca55b4 100644 --- a/src/rabbit_tests.erl +++ b/src/rabbit_tests.erl @@ -20,6 +20,8 @@ -export([all_tests/0, test_parsing/0]). +-import(rabbit_misc, [pget/2]). + -include("rabbit.hrl"). -include("rabbit_framing.hrl"). -include_lib("kernel/include/file.hrl"). @@ -86,6 +88,7 @@ run_cluster_dependent_tests(SecondaryNode) -> passed = test_delegates_sync(SecondaryNode), passed = test_queue_cleanup(SecondaryNode), passed = test_declare_on_dead_queue(SecondaryNode), + passed = test_refresh_events(SecondaryNode), %% we now run the tests remotely, so that code coverage on the %% local node picks up more of the delegate @@ -95,7 +98,8 @@ run_cluster_dependent_tests(SecondaryNode) -> fun () -> Rs = [ test_delegates_async(Node), test_delegates_sync(Node), test_queue_cleanup(Node), - test_declare_on_dead_queue(Node) ], + test_declare_on_dead_queue(Node), + test_refresh_events(Node) ], Self ! {self(), Rs} end), receive @@ -204,6 +208,42 @@ test_priority_queue() -> {true, false, 3, [{1, baz}, {0, foo}, {0, bar}], [baz, foo, bar]} = test_priority_queue(Q15), + %% 1-element infinity priority Q + Q16 = priority_queue:in(foo, infinity, Q), + {true, false, 1, [{infinity, foo}], [foo]} = test_priority_queue(Q16), + + %% add infinity to 0-priority Q + Q17 = priority_queue:in(foo, infinity, priority_queue:in(bar, Q)), + {true, false, 2, [{infinity, foo}, {0, bar}], [foo, bar]} = + test_priority_queue(Q17), + + %% and the other way around + Q18 = priority_queue:in(bar, priority_queue:in(foo, infinity, Q)), + {true, false, 2, [{infinity, foo}, {0, bar}], [foo, bar]} = + test_priority_queue(Q18), + + %% add infinity to mixed-priority Q + Q19 = priority_queue:in(qux, infinity, Q3), + {true, false, 3, [{infinity, qux}, {2, bar}, {1, foo}], [qux, bar, foo]} = + test_priority_queue(Q19), + + %% merge the above with a negative priority Q + Q20 = priority_queue:join(Q19, Q4), + {true, false, 4, [{infinity, qux}, {2, bar}, {1, foo}, {-1, foo}], + [qux, bar, foo, foo]} = test_priority_queue(Q20), + + %% merge two infinity priority queues + Q21 = priority_queue:join(priority_queue:in(foo, infinity, Q), + priority_queue:in(bar, infinity, Q)), + {true, false, 2, [{infinity, foo}, {infinity, bar}], [foo, bar]} = + test_priority_queue(Q21), + + %% merge two mixed priority with infinity queues + Q22 = priority_queue:join(Q18, Q20), + {true, false, 6, [{infinity, foo}, {infinity, qux}, {2, bar}, {1, foo}, + {0, bar}, {-1, foo}], [foo, qux, bar, foo, bar, foo]} = + test_priority_queue(Q22), + passed. priority_queue_in_all(Q, L) -> @@ -905,7 +945,6 @@ test_option_parser() -> passed. test_cluster_management() -> - %% 'cluster' and 'reset' should only work if the app is stopped {error, _} = control_action(cluster, []), {error, _} = control_action(reset, []), @@ -953,13 +992,16 @@ test_cluster_management() -> ok = control_action(reset, []), ok = control_action(start_app, []), ok = control_action(stop_app, []), + ok = assert_disc_node(), ok = control_action(force_cluster, ["invalid1@invalid", "invalid2@invalid"]), + ok = assert_ram_node(), %% join a non-existing cluster as a ram node ok = control_action(reset, []), ok = control_action(force_cluster, ["invalid1@invalid", "invalid2@invalid"]), + ok = assert_ram_node(), SecondaryNode = rabbit_misc:makenode("hare"), case net_adm:ping(SecondaryNode) of @@ -978,15 +1020,18 @@ test_cluster_management2(SecondaryNode) -> %% make a disk node ok = control_action(reset, []), ok = control_action(cluster, [NodeS]), + ok = assert_disc_node(), %% make a ram node ok = control_action(reset, []), ok = control_action(cluster, [SecondaryNodeS]), + ok = assert_ram_node(), %% join cluster as a ram node ok = control_action(reset, []), ok = control_action(force_cluster, [SecondaryNodeS, "invalid1@invalid"]), ok = control_action(start_app, []), ok = control_action(stop_app, []), + ok = assert_ram_node(), %% change cluster config while remaining in same cluster ok = control_action(force_cluster, ["invalid2@invalid", SecondaryNodeS]), @@ -998,27 +1043,45 @@ test_cluster_management2(SecondaryNode) -> "invalid2@invalid"]), ok = control_action(start_app, []), ok = control_action(stop_app, []), + ok = assert_ram_node(), - %% join empty cluster as a ram node + %% join empty cluster as a ram node (converts to disc) ok = control_action(cluster, []), ok = control_action(start_app, []), ok = control_action(stop_app, []), + ok = assert_disc_node(), - %% turn ram node into disk node + %% make a new ram node ok = control_action(reset, []), + ok = control_action(force_cluster, [SecondaryNodeS]), + ok = control_action(start_app, []), + ok = control_action(stop_app, []), + ok = assert_ram_node(), + + %% turn ram node into disk node ok = control_action(cluster, [SecondaryNodeS, NodeS]), ok = control_action(start_app, []), ok = control_action(stop_app, []), + ok = assert_disc_node(), %% convert a disk node into a ram node + ok = assert_disc_node(), ok = control_action(force_cluster, ["invalid1@invalid", "invalid2@invalid"]), + ok = assert_ram_node(), + + %% make a new disk node + ok = control_action(force_reset, []), + ok = control_action(start_app, []), + ok = control_action(stop_app, []), + ok = assert_disc_node(), %% turn a disk node into a ram node ok = control_action(reset, []), ok = control_action(cluster, [SecondaryNodeS]), ok = control_action(start_app, []), ok = control_action(stop_app, []), + ok = assert_ram_node(), %% NB: this will log an inconsistent_database error, which is harmless %% Turning cover on / off is OK even if we're not in general using cover, @@ -1044,6 +1107,10 @@ test_cluster_management2(SecondaryNode) -> {error, {no_running_cluster_nodes, _, _}} = control_action(reset, []), + %% attempt to change type when no other node is alive + {error, {no_running_cluster_nodes, _, _}} = + control_action(cluster, [SecondaryNodeS]), + %% leave system clustered, with the secondary node as a ram node ok = control_action(force_reset, []), ok = control_action(start_app, []), @@ -1137,15 +1204,16 @@ test_server_status() -> {ok, Ch} = rabbit_channel:start_link( 1, self(), Writer, self(), rabbit_framing_amqp_0_9_1, user(<<"user">>), <<"/">>, [], self(), - fun (_) -> {ok, self()} end), + rabbit_limiter:make_token(self())), [Q, Q2] = [Queue || Name <- [<<"foo">>, <<"bar">>], {new, Queue = #amqqueue{}} <- [rabbit_amqqueue:declare( rabbit_misc:r(<<"/">>, queue, Name), false, false, [], none)]], - ok = rabbit_amqqueue:basic_consume(Q, true, Ch, undefined, - <<"ctag">>, true, undefined), + ok = rabbit_amqqueue:basic_consume( + Q, true, Ch, rabbit_limiter:make_token(), + <<"ctag">>, true, undefined), %% list queues ok = info_action(list_queues, rabbit_amqqueue:info_keys(), true), @@ -1203,14 +1271,34 @@ test_spawn() -> Writer = spawn(fun () -> test_writer(Me) end), {ok, Ch} = rabbit_channel:start_link( 1, Me, Writer, Me, rabbit_framing_amqp_0_9_1, - user(<<"guest">>), <<"/">>, [], self(), - fun (_) -> {ok, self()} end), + user(<<"guest">>), <<"/">>, [], Me, + rabbit_limiter:make_token(self())), ok = rabbit_channel:do(Ch, #'channel.open'{}), receive #'channel.open_ok'{} -> ok after 1000 -> throw(failed_to_receive_channel_open_ok) end, {Writer, Ch}. +test_spawn(Node) -> + rpc:call(Node, ?MODULE, test_spawn_remote, []). + +%% Spawn an arbitrary long lived process, so we don't end up linking +%% the channel to the short-lived process (RPC, here) spun up by the +%% RPC server. +test_spawn_remote() -> + RPC = self(), + spawn(fun () -> + {Writer, Ch} = test_spawn(), + RPC ! {Writer, Ch}, + link(Ch), + receive + _ -> ok + end + end), + receive Res -> Res + after 1000 -> throw(failed_to_receive_result) + end. + user(Username) -> #user{username = Username, tags = [administrator], @@ -1218,25 +1306,6 @@ user(Username) -> impl = #internal_user{username = Username, tags = [administrator]}}. -test_statistics_event_receiver(Pid) -> - receive - Foo -> Pid ! Foo, test_statistics_event_receiver(Pid) - end. - -test_statistics_receive_event(Ch, Matcher) -> - rabbit_channel:flush(Ch), - rabbit_channel:emit_stats(Ch), - test_statistics_receive_event1(Ch, Matcher). - -test_statistics_receive_event1(Ch, Matcher) -> - receive #event{type = channel_stats, props = Props} -> - case Matcher(Props) of - true -> Props; - _ -> test_statistics_receive_event1(Ch, Matcher) - end - after 1000 -> throw(failed_to_receive_event) - end. - test_confirms() -> {_Writer, Ch} = test_spawn(), DeclareBindDurableQueue = @@ -1297,6 +1366,25 @@ test_confirms() -> passed. +test_statistics_event_receiver(Pid) -> + receive + Foo -> Pid ! Foo, test_statistics_event_receiver(Pid) + end. + +test_statistics_receive_event(Ch, Matcher) -> + rabbit_channel:flush(Ch), + Ch ! emit_stats, + test_statistics_receive_event1(Ch, Matcher). + +test_statistics_receive_event1(Ch, Matcher) -> + receive #event{type = channel_stats, props = Props} -> + case Matcher(Props) of + true -> Props; + _ -> test_statistics_receive_event1(Ch, Matcher) + end + after 1000 -> throw(failed_to_receive_event) + end. + test_statistics() -> application:set_env(rabbit, collect_statistics, fine), @@ -1314,7 +1402,7 @@ test_statistics() -> QPid = Q#amqqueue.pid, X = rabbit_misc:r(<<"/">>, exchange, <<"">>), - rabbit_tests_event_receiver:start(self()), + rabbit_tests_event_receiver:start(self(), [node()], [channel_stats]), %% Check stats empty Event = test_statistics_receive_event(Ch, fun (_) -> true end), @@ -1357,6 +1445,36 @@ test_statistics() -> rabbit_tests_event_receiver:stop(), passed. +test_refresh_events(SecondaryNode) -> + rabbit_tests_event_receiver:start(self(), [node(), SecondaryNode], + [channel_created, queue_created]), + + {_Writer, Ch} = test_spawn(), + expect_events(Ch, channel_created), + rabbit_channel:shutdown(Ch), + + {_Writer2, Ch2} = test_spawn(SecondaryNode), + expect_events(Ch2, channel_created), + rabbit_channel:shutdown(Ch2), + + {new, #amqqueue { pid = QPid } = Q} = + rabbit_amqqueue:declare(test_queue(), false, false, [], none), + expect_events(QPid, queue_created), + rabbit_amqqueue:delete(Q, false, false), + + rabbit_tests_event_receiver:stop(), + passed. + +expect_events(Pid, Type) -> + expect_event(Pid, Type), + rabbit:force_event_refresh(), + expect_event(Pid, Type). + +expect_event(Pid, Type) -> + receive #event{type = Type, props = Props} -> Pid = pget(pid, Props) + after 1000 -> throw({failed_to_receive_event, Type}) + end. + test_delegates_async(SecondaryNode) -> Self = self(), Sender = fun (Pid) -> Pid ! {invoked, Self} end, @@ -1462,16 +1580,19 @@ test_queue_cleanup(_SecondaryNode) -> ok after 1000 -> throw(failed_to_receive_queue_declare_ok) end, + rabbit_channel:shutdown(Ch), rabbit:stop(), rabbit:start(), - rabbit_channel:do(Ch, #'queue.declare'{ passive = true, - queue = ?CLEANUP_QUEUE_NAME }), + {_Writer2, Ch2} = test_spawn(), + rabbit_channel:do(Ch2, #'queue.declare'{ passive = true, + queue = ?CLEANUP_QUEUE_NAME }), receive #'channel.close'{reply_code = ?NOT_FOUND} -> ok after 2000 -> throw(failed_to_receive_channel_exit) end, + rabbit_channel:shutdown(Ch2), passed. test_declare_on_dead_queue(SecondaryNode) -> @@ -1582,6 +1703,18 @@ clean_logs(Files, Suffix) -> end || File <- Files], ok. +assert_ram_node() -> + case rabbit_mnesia:is_disc_node() of + true -> exit('not_ram_node'); + false -> ok + end. + +assert_disc_node() -> + case rabbit_mnesia:is_disc_node() of + true -> ok; + false -> exit('not_disc_node') + end. + delete_file(File) -> case file:delete(File) of ok -> ok; @@ -1673,6 +1806,10 @@ test_backing_queue() -> passed = test_queue_recover(), application:set_env(rabbit, queue_index_max_journal_entries, MaxJournal, infinity), + %% We will have restarted the message store, and thus changed + %% the order of the children of rabbit_sup. This will cause + %% problems if there are subsequent failures - see bug 24262. + ok = restart_app(), passed; _ -> passed @@ -1689,25 +1826,49 @@ msg_id_bin(X) -> msg_store_client_init(MsgStore, Ref) -> rabbit_msg_store:client_init(MsgStore, Ref, undefined, undefined). +on_disk_capture() -> + on_disk_capture({gb_sets:new(), gb_sets:new(), undefined}). +on_disk_capture({OnDisk, Awaiting, Pid}) -> + Pid1 = case Pid =/= undefined andalso gb_sets:is_empty(Awaiting) of + true -> Pid ! {self(), arrived}, undefined; + false -> Pid + end, + receive + {await, MsgIds, Pid2} -> + true = Pid1 =:= undefined andalso gb_sets:is_empty(Awaiting), + on_disk_capture({OnDisk, gb_sets:subtract(MsgIds, OnDisk), Pid2}); + {on_disk, MsgIds} -> + on_disk_capture({gb_sets:union(OnDisk, MsgIds), + gb_sets:subtract(Awaiting, MsgIds), + Pid1}); + stop -> + done + end. + +on_disk_await(Pid, MsgIds) when is_list(MsgIds) -> + Pid ! {await, gb_sets:from_list(MsgIds), self()}, + receive {Pid, arrived} -> ok end. + +on_disk_stop(Pid) -> + MRef = erlang:monitor(process, Pid), + Pid ! stop, + receive {'DOWN', MRef, process, Pid, _Reason} -> + ok + end. + +msg_store_client_init_capture(MsgStore, Ref) -> + Pid = spawn(fun on_disk_capture/0), + {Pid, rabbit_msg_store:client_init( + MsgStore, Ref, fun (MsgIds, _ActionTaken) -> + Pid ! {on_disk, MsgIds} + end, undefined)}. + msg_store_contains(Atom, MsgIds, MSCState) -> Atom = lists:foldl( fun (MsgId, Atom1) when Atom1 =:= Atom -> rabbit_msg_store:contains(MsgId, MSCState) end, Atom, MsgIds). -msg_store_sync(MsgIds, MSCState) -> - Ref = make_ref(), - Self = self(), - ok = rabbit_msg_store:sync(MsgIds, fun () -> Self ! {sync, Ref} end, - MSCState), - receive - {sync, Ref} -> ok - after - 10000 -> - io:format("Sync from msg_store missing for msg_ids ~p~n", [MsgIds]), - throw(timeout) - end. - msg_store_read(MsgIds, MSCState) -> lists:foldl(fun (MsgId, MSCStateM) -> {{ok, MsgId}, MSCStateN} = rabbit_msg_store:read( @@ -1741,22 +1902,18 @@ foreach_with_msg_store_client(MsgStore, Ref, Fun, L) -> test_msg_store() -> restart_msg_store_empty(), - Self = self(), MsgIds = [msg_id_bin(M) || M <- lists:seq(1,100)], {MsgIds1stHalf, MsgIds2ndHalf} = lists:split(50, MsgIds), Ref = rabbit_guid:guid(), - MSCState = msg_store_client_init(?PERSISTENT_MSG_STORE, Ref), + {Cap, MSCState} = msg_store_client_init_capture(?PERSISTENT_MSG_STORE, Ref), %% check we don't contain any of the msgs we're about to publish false = msg_store_contains(false, MsgIds, MSCState), %% publish the first half ok = msg_store_write(MsgIds1stHalf, MSCState), %% sync on the first half - ok = msg_store_sync(MsgIds1stHalf, MSCState), + ok = on_disk_await(Cap, MsgIds1stHalf), %% publish the second half ok = msg_store_write(MsgIds2ndHalf, MSCState), - %% sync on the first half again - the msg_store will be dirty, but - %% we won't need the fsync - ok = msg_store_sync(MsgIds1stHalf, MSCState), %% check they're all in there true = msg_store_contains(true, MsgIds, MSCState), %% publish the latter half twice so we hit the caching and ref count code @@ -1765,25 +1922,8 @@ test_msg_store() -> true = msg_store_contains(true, MsgIds, MSCState), %% sync on the 2nd half, but do lots of individual syncs to try %% and cause coalescing to happen - ok = lists:foldl( - fun (MsgId, ok) -> rabbit_msg_store:sync( - [MsgId], fun () -> Self ! {sync, MsgId} end, - MSCState) - end, ok, MsgIds2ndHalf), - lists:foldl( - fun(MsgId, ok) -> - receive - {sync, MsgId} -> ok - after - 10000 -> - io:format("Sync from msg_store missing (msg_id: ~p)~n", - [MsgId]), - throw(timeout) - end - end, ok, MsgIds2ndHalf), - %% it's very likely we're not dirty here, so the 1st half sync - %% should hit a different code path - ok = msg_store_sync(MsgIds1stHalf, MSCState), + ok = on_disk_await(Cap, MsgIds2ndHalf), + ok = on_disk_stop(Cap), %% read them all MSCState1 = msg_store_read(MsgIds, MSCState), %% read them all again - this will hit the cache, not disk @@ -1912,6 +2052,10 @@ with_empty_test_queue(Fun) -> {0, Qi} = init_test_queue(), rabbit_queue_index:delete_and_terminate(Fun(Qi)). +restart_app() -> + rabbit:stop(), + rabbit:start(). + queue_index_publish(SeqIds, Persistent, Qi) -> Ref = rabbit_guid:guid(), MsgStore = case Persistent of @@ -2151,7 +2295,7 @@ wait_for_confirms(Unconfirmed) -> wait_for_confirms( gb_sets:difference(Unconfirmed, gb_sets:from_list(Confirmed))) - after 1000 -> exit(timeout_waiting_for_confirm) + after 5000 -> exit(timeout_waiting_for_confirm) end end. diff --git a/src/rabbit_tests_event_receiver.erl b/src/rabbit_tests_event_receiver.erl index 12c43faf..abcbe0b6 100644 --- a/src/rabbit_tests_event_receiver.erl +++ b/src/rabbit_tests_event_receiver.erl @@ -16,36 +16,43 @@ -module(rabbit_tests_event_receiver). --export([start/1, stop/0]). +-export([start/3, stop/0]). -export([init/1, handle_call/2, handle_event/2, handle_info/2, terminate/2, code_change/3]). -start(Pid) -> - gen_event:add_handler(rabbit_event, ?MODULE, [Pid]). +-include("rabbit.hrl"). + +start(Pid, Nodes, Types) -> + Oks = [ok || _ <- Nodes], + {Oks, _} = rpc:multicall(Nodes, gen_event, add_handler, + [rabbit_event, ?MODULE, [Pid, Types]]). stop() -> gen_event:delete_handler(rabbit_event, ?MODULE, []). %%---------------------------------------------------------------------------- -init([Pid]) -> - {ok, Pid}. +init([Pid, Types]) -> + {ok, {Pid, Types}}. -handle_call(_Request, Pid) -> - {ok, not_understood, Pid}. +handle_call(_Request, State) -> + {ok, not_understood, State}. -handle_event(Event, Pid) -> - Pid ! Event, - {ok, Pid}. +handle_event(Event = #event{type = Type}, State = {Pid, Types}) -> + case lists:member(Type, Types) of + true -> Pid ! Event; + false -> ok + end, + {ok, State}. -handle_info(_Info, Pid) -> - {ok, Pid}. +handle_info(_Info, State) -> + {ok, State}. -terminate(_Arg, _Pid) -> +terminate(_Arg, _State) -> ok. -code_change(_OldVsn, Pid, _Extra) -> - {ok, Pid}. +code_change(_OldVsn, State, _Extra) -> + {ok, State}. %%---------------------------------------------------------------------------- diff --git a/src/rabbit_trace.erl b/src/rabbit_trace.erl index 7d36856a..f9632324 100644 --- a/src/rabbit_trace.erl +++ b/src/rabbit_trace.erl @@ -76,7 +76,7 @@ update_config(Fun) -> {ok, VHosts0} = application:get_env(rabbit, ?TRACE_VHOSTS), VHosts = Fun(VHosts0), application:set_env(rabbit, ?TRACE_VHOSTS, VHosts), - rabbit_channel:refresh_config_all(), + rabbit_channel:refresh_config_local(), ok. %%---------------------------------------------------------------------------- diff --git a/src/rabbit_upgrade.erl b/src/rabbit_upgrade.erl index a2abb1e5..9739f6b7 100644 --- a/src/rabbit_upgrade.erl +++ b/src/rabbit_upgrade.erl @@ -144,7 +144,7 @@ upgrade_mode(AllNodes) -> case nodes_running(AllNodes) of [] -> AfterUs = rabbit_mnesia:read_previously_running_nodes(), - case {is_disc_node(), AfterUs} of + case {is_disc_node_legacy(), AfterUs} of {true, []} -> primary; {true, _} -> @@ -182,12 +182,6 @@ upgrade_mode(AllNodes) -> end end. -is_disc_node() -> - %% This is pretty ugly but we can't start Mnesia and ask it (will hang), - %% we can't look at the config file (may not include us even if we're a - %% disc node). - filelib:is_regular(filename:join(dir(), "rabbit_durable_exchange.DCD")). - die(Msg, Args) -> %% We don't throw or exit here since that gets thrown %% straight out into do_boot, generating an erl_crash.dump @@ -218,7 +212,7 @@ force_tables() -> secondary_upgrade(AllNodes) -> %% must do this before we wipe out schema - IsDiscNode = is_disc_node(), + IsDiscNode = is_disc_node_legacy(), rabbit_misc:ensure_ok(mnesia:delete_schema([node()]), cannot_delete_schema), %% Note that we cluster with all nodes, rather than all disc nodes @@ -282,6 +276,14 @@ lock_filename() -> lock_filename(dir()). lock_filename(Dir) -> filename:join(Dir, ?LOCK_FILENAME). backup_dir() -> dir() ++ "-upgrade-backup". +is_disc_node_legacy() -> + %% This is pretty ugly but we can't start Mnesia and ask it (will + %% hang), we can't look at the config file (may not include us + %% even if we're a disc node). We also can't use + %% rabbit_mnesia:is_disc_node/0 because that will give false + %% postivies on Rabbit up to 2.5.1. + filelib:is_regular(filename:join(dir(), "rabbit_durable_exchange.DCD")). + %% NB: we cannot use rabbit_log here since it may not have been %% started yet info(Msg, Args) -> error_logger:info_msg(Msg, Args). diff --git a/src/supervisor2.erl b/src/supervisor2.erl index ec1ee9cd..405949ef 100644 --- a/src/supervisor2.erl +++ b/src/supervisor2.erl @@ -76,7 +76,6 @@ %% Internal exports -export([init/1, handle_call/3, handle_info/2, terminate/2, code_change/3]). -export([handle_cast/2]). --export([delayed_restart/2]). -define(DICT, dict). @@ -157,9 +156,6 @@ check_childspecs(ChildSpecs) when is_list(ChildSpecs) -> end; check_childspecs(X) -> {error, {badarg, X}}. -delayed_restart(Supervisor, RestartDetails) -> - gen_server:cast(Supervisor, {delayed_restart, RestartDetails}). - %%% --------------------------------------------------- %%% %%% Initialize the supervisor. @@ -355,12 +351,19 @@ handle_call(which_children, _From, State) -> State#state.children), {reply, Resp, State}. +%%% Hopefully cause a function-clause as there is no API function +%%% that utilizes cast. +handle_cast(null, State) -> + error_logger:error_msg("ERROR: Supervisor received cast-message 'null'~n", + []), + + {noreply, State}. -handle_cast({delayed_restart, {RestartType, Reason, Child}}, State) +handle_info({delayed_restart, {RestartType, Reason, Child}}, State) when ?is_simple(State) -> {ok, NState} = do_restart(RestartType, Reason, Child, State), {noreply, NState}; -handle_cast({delayed_restart, {RestartType, Reason, Child}}, State) -> +handle_info({delayed_restart, {RestartType, Reason, Child}}, State) -> case get_child(Child#child.name, State) of {value, Child1} -> {ok, NState} = do_restart(RestartType, Reason, Child1, State), @@ -369,14 +372,6 @@ handle_cast({delayed_restart, {RestartType, Reason, Child}}, State) -> {noreply, State} end; -%%% Hopefully cause a function-clause as there is no API function -%%% that utilizes cast. -handle_cast(null, State) -> - error_logger:error_msg("ERROR: Supervisor received cast-message 'null'~n", - []), - - {noreply, State}. - %% %% Take care of terminated children. %% @@ -539,9 +534,9 @@ do_restart({RestartType, Delay}, Reason, Child, State) -> {ok, NState} -> {ok, NState}; {terminate, NState} -> - {ok, _TRef} = timer:apply_after( - trunc(Delay*1000), ?MODULE, delayed_restart, - [self(), {{RestartType, Delay}, Reason, Child}]), + _TRef = erlang:send_after(trunc(Delay*1000), self(), + {delayed_restart, + {{RestartType, Delay}, Reason, Child}}), {ok, state_del_child(Child, NState)} end; do_restart(permanent, Reason, Child, State) -> |