diff options
91 files changed, 8644 insertions, 1597 deletions
@@ -10,7 +10,8 @@ syntax: regexp ^cover/ ^dist/ ^include/rabbit_framing\.hrl$ -^src/rabbit_framing\.erl$ +^include/rabbit_framing_spec\.hrl$ +^src/rabbit_framing_amqp.*\.erl$ ^src/.*\_usage.erl$ ^rabbit\.plt$ ^basic.plt$ @@ -12,10 +12,10 @@ EBIN_DIR=ebin INCLUDE_DIR=include DOCS_DIR=docs INCLUDES=$(wildcard $(INCLUDE_DIR)/*.hrl) $(INCLUDE_DIR)/rabbit_framing.hrl -SOURCES=$(wildcard $(SOURCE_DIR)/*.erl) $(SOURCE_DIR)/rabbit_framing.erl $(USAGES_ERL) +SOURCES=$(wildcard $(SOURCE_DIR)/*.erl) $(SOURCE_DIR)/rabbit_framing_amqp_0_9_1.erl $(SOURCE_DIR)/rabbit_framing_amqp_0_8.erl $(USAGES_ERL) BEAM_TARGETS=$(patsubst $(SOURCE_DIR)/%.erl, $(EBIN_DIR)/%.beam, $(SOURCES)) TARGETS=$(EBIN_DIR)/rabbit.app $(INCLUDE_DIR)/rabbit_framing.hrl $(BEAM_TARGETS) -WEB_URL=http://stage.rabbitmq.com/ +WEB_URL=http://www.rabbitmq.com/ MANPAGES=$(patsubst %.xml, %.gz, $(wildcard $(DOCS_DIR)/*.[0-9].xml)) WEB_MANPAGES=$(patsubst %.xml, %.man.xml, $(wildcard $(DOCS_DIR)/*.[0-9].xml) $(DOCS_DIR)/rabbitmq-service.xml) USAGES_XML=$(DOCS_DIR)/rabbitmqctl.1.xml $(DOCS_DIR)/rabbitmq-multi.1.xml @@ -40,11 +40,11 @@ BASIC_PLT=basic.plt RABBIT_PLT=rabbit.plt ifndef USE_SPECS -# our type specs rely on features / bug fixes in dialyzer that are -# only available in R13B01 upwards (R13B01 is eshell 5.7.2) +# our type specs rely on features and bug fixes in dialyzer that are +# only available in R14A upwards (R13B04 is erts 5.7.5) # # NB: the test assumes that version number will only contain single digits -USE_SPECS=$(shell if [ $$(erl -noshell -eval 'io:format(erlang:system_info(version)), halt().') \> "5.7.1" ]; then echo "true"; else echo "false"; fi) +USE_SPECS=$(shell if [ $$(erl -noshell -eval 'io:format(erlang:system_info(version)), halt().') \> "5.7.5" ]; then echo "true"; else echo "false"; fi) endif #other args: +native +"{hipe,[o3,verbose]}" -Ddebug=true +debug_info +no_strict_record_tests @@ -56,7 +56,8 @@ TARGET_SRC_DIR=dist/$(TARBALL_NAME) SIBLING_CODEGEN_DIR=../rabbitmq-codegen/ AMQP_CODEGEN_DIR=$(shell [ -d $(SIBLING_CODEGEN_DIR) ] && echo $(SIBLING_CODEGEN_DIR) || echo codegen) -AMQP_SPEC_JSON_PATH=$(AMQP_CODEGEN_DIR)/amqp-0.8.json +AMQP_SPEC_JSON_FILES_0_9_1=$(AMQP_CODEGEN_DIR)/amqp-rabbitmq-0.9.1.json +AMQP_SPEC_JSON_FILES_0_8=$(AMQP_CODEGEN_DIR)/amqp-rabbitmq-0.8.json ERL_CALL=erl_call -sname $(RABBITMQ_NODENAME) -e @@ -70,6 +71,24 @@ define usage_dep $(call usage_xml_to_erl, $(1)): $(1) $(DOCS_DIR)/usage.xsl endef +ifneq "$(SBIN_DIR)" "" +ifneq "$(TARGET_DIR)" "" +SCRIPTS_REL_PATH=$(shell ./calculate-relative $(TARGET_DIR)/sbin $(SBIN_DIR)) +endif +endif + +# Versions prior to this are not supported +NEED_MAKE := 3.80 +ifneq "$(NEED_MAKE)" "$(firstword $(sort $(NEED_MAKE) $(MAKE_VERSION)))" +$(error Versions of make prior to $(NEED_MAKE) are not supported) +endif + +# .DEFAULT_GOAL introduced in 3.81 +DEFAULT_GOAL_MAKE := 3.81 +ifneq "$(DEFAULT_GOAL_MAKE)" "$(firstword $(sort $(DEFAULT_GOAL_MAKE) $(MAKE_VERSION)))" +.DEFAULT_GOAL=all +endif + all: $(TARGETS) $(DEPS_FILE): $(SOURCES) $(INCLUDES) @@ -81,11 +100,14 @@ $(EBIN_DIR)/rabbit.app: $(EBIN_DIR)/rabbit_app.in $(BEAM_TARGETS) generate_app $(EBIN_DIR)/%.beam: erlc $(ERLC_OPTS) -pa $(EBIN_DIR) $< -$(INCLUDE_DIR)/rabbit_framing.hrl: codegen.py $(AMQP_CODEGEN_DIR)/amqp_codegen.py $(AMQP_SPEC_JSON_PATH) - $(PYTHON) codegen.py header $(AMQP_SPEC_JSON_PATH) $@ +$(INCLUDE_DIR)/rabbit_framing.hrl: codegen.py $(AMQP_CODEGEN_DIR)/amqp_codegen.py $(AMQP_SPEC_JSON_FILES_0_9_1) $(AMQP_SPEC_JSON_FILES_0_8) + $(PYTHON) codegen.py --ignore-conflicts header $(AMQP_SPEC_JSON_FILES_0_9_1) $(AMQP_SPEC_JSON_FILES_0_8) $@ + +$(SOURCE_DIR)/rabbit_framing_amqp_0_9_1.erl: codegen.py $(AMQP_CODEGEN_DIR)/amqp_codegen.py $(AMQP_SPEC_JSON_FILES_0_9_1) + $(PYTHON) codegen.py body $(AMQP_SPEC_JSON_FILES_0_9_1) $@ -$(SOURCE_DIR)/rabbit_framing.erl: codegen.py $(AMQP_CODEGEN_DIR)/amqp_codegen.py $(AMQP_SPEC_JSON_PATH) - $(PYTHON) codegen.py body $(AMQP_SPEC_JSON_PATH) $@ +$(SOURCE_DIR)/rabbit_framing_amqp_0_8.erl: codegen.py $(AMQP_CODEGEN_DIR)/amqp_codegen.py $(AMQP_SPEC_JSON_FILES_0_8) + $(PYTHON) codegen.py body $(AMQP_SPEC_JSON_FILES_0_8) $@ dialyze: $(BEAM_TARGETS) $(BASIC_PLT) $(ERL_EBIN) -eval \ @@ -110,7 +132,7 @@ $(BASIC_PLT): $(BEAM_TARGETS) clean: rm -f $(EBIN_DIR)/*.beam rm -f $(EBIN_DIR)/rabbit.app $(EBIN_DIR)/rabbit.boot $(EBIN_DIR)/rabbit.script $(EBIN_DIR)/rabbit.rel - rm -f $(INCLUDE_DIR)/rabbit_framing.hrl $(SOURCE_DIR)/rabbit_framing.erl codegen.pyc + rm -f $(INCLUDE_DIR)/rabbit_framing.hrl $(SOURCE_DIR)/rabbit_framing_amqp_*.erl codegen.pyc rm -f $(DOCS_DIR)/*.[0-9].gz $(DOCS_DIR)/*.man.xml $(DOCS_DIR)/*.erl $(USAGES_ERL) rm -f $(RABBIT_PLT) rm -f $(DEPS_FILE) @@ -184,7 +206,7 @@ srcdist: distclean >> $(TARGET_SRC_DIR)/INSTALL cp README.in $(TARGET_SRC_DIR)/README elinks -dump -no-references -no-numbering $(WEB_URL)build-server.html \ - >> $(TARGET_SRC_DIR)/BUILD + >> $(TARGET_SRC_DIR)/README sed -i.save 's/%%VSN%%/$(VERSION)/' $(TARGET_SRC_DIR)/ebin/rabbit_app.in && rm -f $(TARGET_SRC_DIR)/ebin/rabbit_app.in.save cp -r $(AMQP_CODEGEN_DIR)/* $(TARGET_SRC_DIR)/codegen/ @@ -205,9 +227,10 @@ distclean: clean # xmlto can not read from standard input, so we mess with a tmp file. %.gz: %.xml $(DOCS_DIR)/examples-to-end.xsl - xsltproc $(DOCS_DIR)/examples-to-end.xsl $< > $<.tmp && \ - xmlto man -o $(DOCS_DIR) --stringparam man.indent.verbatims=0 $<.tmp && \ - gzip -f $(DOCS_DIR)/`basename $< .xml` + xmlto --version | grep -E '^xmlto version 0\.0\.([0-9]|1[1-8])$$' >/dev/null || opt='--stringparam man.indent.verbatims=0' ; \ + xsltproc $(DOCS_DIR)/examples-to-end.xsl $< > $<.tmp && \ + xmlto man -o $(DOCS_DIR) $$opt $<.tmp && \ + gzip -f $(DOCS_DIR)/`basename $< .xml` rm -f $<.tmp # Use tmp files rather than a pipeline so that we get meaningful errors @@ -234,13 +257,7 @@ $(SOURCE_DIR)/%_usage.erl: docs_all: $(MANPAGES) $(WEB_MANPAGES) -install: SCRIPTS_REL_PATH=$(shell ./calculate-relative $(TARGET_DIR)/sbin $(SBIN_DIR)) install: all docs_all install_dirs - @[ -n "$(TARGET_DIR)" ] || (echo "Please set TARGET_DIR."; false) - @[ -n "$(SBIN_DIR)" ] || (echo "Please set SBIN_DIR."; false) - @[ -n "$(MAN_DIR)" ] || (echo "Please set MAN_DIR."; false) - - mkdir -p $(TARGET_DIR) cp -r ebin include LICENSE LICENSE-MPL-RabbitMQ INSTALL $(TARGET_DIR) chmod 0755 scripts/* @@ -256,10 +273,16 @@ install: all docs_all install_dirs done install_dirs: - mkdir -p $(SBIN_DIR) + @ OK=true && \ + { [ -n "$(TARGET_DIR)" ] || { echo "Please set TARGET_DIR."; OK=false; }; } && \ + { [ -n "$(SBIN_DIR)" ] || { echo "Please set SBIN_DIR."; OK=false; }; } && \ + { [ -n "$(MAN_DIR)" ] || { echo "Please set MAN_DIR."; OK=false; }; } && $$OK + mkdir -p $(TARGET_DIR)/sbin + mkdir -p $(SBIN_DIR) + mkdir -p $(MAN_DIR) -$(foreach XML, $(USAGES_XML), $(eval $(call usage_dep, $(XML)))) +$(foreach XML,$(USAGES_XML),$(eval $(call usage_dep, $(XML)))) # Note that all targets which depend on clean must have clean in their # name. Also any target that doesn't depend on clean should not have @@ -93,6 +93,27 @@ class PackedMethodBitField: def full(self): return self.count() == 8 +def multiLineFormat(things, prologue, separator, lineSeparator, epilogue, thingsPerLine = 4): + r = [prologue] + i = 0 + for t in things: + if i != 0: + if i % thingsPerLine == 0: + r += [lineSeparator] + else: + r += [separator] + r += [t] + i += 1 + r += [epilogue] + return "".join(r) + +def prettyType(typeName, subTypes, typesPerLine = 4): + """Pretty print a type signature made up of many alternative subtypes""" + sTs = multiLineFormat(subTypes, + "( ", " | ", "\n | ", " )", + thingsPerLine = typesPerLine) + return "-type(%s ::\n %s)." % (typeName, sTs) + def printFileHeader(): print """%% Autogenerated code. Do not edit. %% @@ -294,11 +315,16 @@ def genErl(spec): methods = spec.allMethods() printFileHeader() - print """-module(rabbit_framing). --include("rabbit_framing.hrl"). - + module = "rabbit_framing_amqp_%d_%d" % (spec.major, spec.minor) + if spec.revision != 0: + module = "%s_%d" % (module, spec.revision) + if module == "rabbit_framing_amqp_8_0": + module = "rabbit_framing_amqp_0_8" + print "-module(%s)." % module + print """-include("rabbit_framing.hrl"). + +-export([version/0]). -export([lookup_method_name/1]). - -export([method_id/1]). -export([method_has_content/1]). -export([is_method_synchronous/1]). @@ -311,10 +337,92 @@ def genErl(spec): -export([lookup_amqp_exception/1]). -export([amqp_exception/1]). +""" + print "%% Various types" + print "-ifdef(use_specs)." + + print """-export_type([amqp_table/0, amqp_property_type/0, amqp_method_record/0, + amqp_method_name/0, amqp_method/0, amqp_class_id/0, + amqp_value/0, amqp_array/0, amqp_exception/0, amqp_property_record/0]). + +-type(amqp_field_type() :: + 'longstr' | 'signedint' | 'decimal' | 'timestamp' | + 'table' | 'byte' | 'double' | 'float' | 'long' | + 'short' | 'bool' | 'binary' | 'void'). +-type(amqp_property_type() :: + 'shortstr' | 'longstr' | 'octet' | 'shortint' | 'longint' | + 'longlongint' | 'timestamp' | 'bit' | 'table'). + +-type(amqp_table() :: [{binary(), amqp_field_type(), amqp_value()}]). +-type(amqp_array() :: [{amqp_field_type(), amqp_value()}]). +-type(amqp_value() :: binary() | % longstr + integer() | % signedint + {non_neg_integer(), non_neg_integer()} | % decimal + amqp_table() | + amqp_array() | + byte() | % byte + float() | % double + integer() | % long + integer() | % short + boolean() | % bool + binary() | % binary + 'undefined' | % void + non_neg_integer() % timestamp + ). +""" + + print prettyType("amqp_method_name()", + [m.erlangName() for m in methods]) + print prettyType("amqp_method()", + ["{%s, %s}" % (m.klass.index, m.index) for m in methods], + 6) + print prettyType("amqp_method_record()", + ["#%s{}" % (m.erlangName()) for m in methods]) + fieldNames = set() + for m in methods: + fieldNames.update(m.arguments) + fieldNames = [erlangize(f.name) for f in fieldNames] + print prettyType("amqp_method_field_name()", + fieldNames) + print prettyType("amqp_property_record()", + ["#'P_%s'{}" % erlangize(c.name) for c in spec.allClasses()]) + print prettyType("amqp_exception()", + ["'%s'" % erlangConstantName(c).lower() for (c, v, cls) in spec.constants]) + print prettyType("amqp_exception_code()", + ["%i" % v for (c, v, cls) in spec.constants]) + classIds = set() + for m in spec.allMethods(): + classIds.add(m.klass.index) + print prettyType("amqp_class_id()", + ["%i" % ci for ci in classIds]) + print "-endif. % use_specs" + + print """ +%% Method signatures +-ifdef(use_specs). +-spec(version/0 :: () -> {non_neg_integer(), non_neg_integer(), non_neg_integer()}). +-spec(lookup_method_name/1 :: (amqp_method()) -> amqp_method_name()). +-spec(method_id/1 :: (amqp_method_name()) -> amqp_method()). +-spec(method_has_content/1 :: (amqp_method_name()) -> boolean()). +-spec(is_method_synchronous/1 :: (amqp_method_record()) -> boolean()). +-spec(method_record/1 :: (amqp_method_name()) -> amqp_method_record()). +-spec(method_fieldnames/1 :: (amqp_method_name()) -> [amqp_method_field_name()]). +-spec(decode_method_fields/2 :: (amqp_method_name(), binary()) -> amqp_method_record()). +-spec(decode_properties/2 :: (non_neg_integer(), binary()) -> amqp_property_record()). +-spec(encode_method_fields/1 :: (amqp_method_record()) -> binary()). +-spec(encode_properties/1 :: (amqp_method_record()) -> binary()). +-spec(lookup_amqp_exception/1 :: (amqp_exception()) -> {boolean(), amqp_exception_code(), binary()}). +-spec(amqp_exception/1 :: (amqp_exception_code()) -> amqp_exception()). +-endif. % use_specs + bitvalue(true) -> 1; bitvalue(false) -> 0; bitvalue(undefined) -> 0. """ + version = "{%d, %d, %d}" % (spec.major, spec.minor, spec.revision) + if version == '{8, 0, 0}': version = '{0, 8, 0}' + print "version() -> %s." % (version) + for m in methods: genLookupMethodName(m) print "lookup_method_name({_ClassId, _MethodId} = Id) -> exit({unknown_method_id, Id})." @@ -373,8 +481,6 @@ def genHrl(spec): methods = spec.allMethods() printFileHeader() - print "-define(PROTOCOL_VERSION_MAJOR, %d)." % (spec.major) - print "-define(PROTOCOL_VERSION_MINOR, %d)." % (spec.minor) print "-define(PROTOCOL_PORT, %d)." % (spec.port) for (c,v,cls) in spec.constants: @@ -388,6 +494,7 @@ def genHrl(spec): for c in spec.allClasses(): print "-record('P_%s', {%s})." % (erlangize(c.name), fieldNameList(c.fields)) + def generateErl(specPath): genErl(AmqpSpec(specPath)) @@ -395,5 +502,6 @@ def generateHrl(specPath): genHrl(AmqpSpec(specPath)) if __name__ == "__main__": - do_main(generateHrl, generateErl) + do_main_dict({"header": generateHrl, + "body": generateErl}) diff --git a/docs/html-to-website-xml.xsl b/docs/html-to-website-xml.xsl index f2117e26..662dbea0 100644 --- a/docs/html-to-website-xml.xsl +++ b/docs/html-to-website-xml.xsl @@ -58,13 +58,13 @@ <!-- Specific instructions to revert the DocBook HTML to be more like our ad-hoc XML schema --> <xsl:template match="div[@class='refsect1'] | div[@class='refnamediv'] | div[@class='refsynopsisdiv']"> - <doc:section name="{@title}"> + <doc:section name="{h2}"> <xsl:apply-templates select="node()"/> </doc:section> </xsl:template> <xsl:template match="div[@class='refsect2']"> - <doc:subsection name="{@title}"> + <doc:subsection name="{h3}"> <xsl:apply-templates select="node()"/> </doc:subsection> </xsl:template> diff --git a/docs/rabbitmqctl.1.xml b/docs/rabbitmqctl.1.xml index 5e2668c1..a7d064f1 100644 --- a/docs/rabbitmqctl.1.xml +++ b/docs/rabbitmqctl.1.xml @@ -270,8 +270,8 @@ <title>Cluster management</title> <variablelist> - <varlistentry> - <term><cmdsynopsis><command>cluster</command> <arg choice="req"><replaceable>clusternode</replaceable></arg></cmdsynopsis></term> + <varlistentry id="cluster"> + <term><cmdsynopsis><command>cluster</command><arg choice="req" role="usage-option-list"><replaceable>clusternode</replaceable> ...</arg></cmdsynopsis></term> <listitem> <variablelist> <varlistentry> @@ -281,7 +281,8 @@ </variablelist> <para> Instruct the node to become member of a cluster with the - specified nodes. + specified nodes. To cluster with currently offline nodes, + use <link linkend="force_cluster"><command>force_cluster</command></link>. </para> <para> Cluster nodes can be of two types: disk or ram. Disk nodes @@ -334,6 +335,29 @@ </para> </listitem> </varlistentry> + <varlistentry id="force_cluster"> + <term><cmdsynopsis><command>force_cluster</command><arg choice="req" role="usage-option-list"><replaceable>clusternode</replaceable> ...</arg></cmdsynopsis></term> + <listitem> + <variablelist> + <varlistentry> + <term>clusternode</term> + <listitem><para>Subset of the nodes of the cluster to which this node should be connected.</para></listitem> + </varlistentry> + </variablelist> + <para> + Instruct the node to become member of a cluster with the + specified nodes. This will succeed even if the specified nodes + are offline. For a more detailed description, see + <link linkend="cluster"><command>cluster</command>.</link> + </para> + <para> + Note that this variant of the cluster command just + ignores the current status of the specified nodes. + Clustering may still fail for a variety of other + reasons. + </para> + </listitem> + </varlistentry> </variablelist> </refsect2> @@ -603,10 +627,12 @@ <para role="example-prefix">For example:</para> <screen role="example">rabbitmqctl list_permissions -p /myvhost</screen> <para role="example"> - This command instructs the RabbitMQ broker to list all the - users which have been granted access to the virtual host - called <command>/myvhost</command>, and the permissions they - have for operations on resources in that virtual host. + This command instructs the RabbitMQ broker to list all + the users which have been granted access to the virtual + host called <command>/myvhost</command>, and the + permissions they have for operations on resources in + that virtual host. Note that an empty string means no + permissions granted. </para> </listitem> </varlistentry> @@ -862,6 +888,10 @@ <listitem><para>Number of channels using the connection.</para></listitem> </varlistentry> <varlistentry> + <term>protocol</term> + <listitem><para>Version of the AMQP protocol in use (currently one of <command>{0,9,1}</command> or <command>{0,8,0}</command>). Note that if a client requests an AMQP 0-9 connection, we treat it as AMQP 0-9-1.</para></listitem> + </varlistentry> + <varlistentry> <term>user</term> <listitem><para>Username associated with the connection.</para></listitem> </varlistentry> diff --git a/ebin/rabbit_app.in b/ebin/rabbit_app.in index bdf407eb..2cd28abb 100644 --- a/ebin/rabbit_app.in +++ b/ebin/rabbit_app.in @@ -11,16 +11,19 @@ rabbit_sup, rabbit_tcp_client_sup]}, {applications, [kernel, stdlib, sasl, mnesia, os_mon]}, -%% we also depend on ssl but it shouldn't be in here as we don't -%% actually want to start it +%% we also depend on crypto, public_key and ssl but they shouldn't be +%% in here as we don't actually want to start it {mod, {rabbit, []}}, {env, [{tcp_listeners, [{"0.0.0.0", 5672}]}, {ssl_listeners, []}, {ssl_options, []}, {vm_memory_high_watermark, 0.4}, - {backing_queue_module, rabbit_invariable_queue}, + {msg_store_index_module, rabbit_msg_store_ets_index}, + {backing_queue_module, rabbit_variable_queue}, {persister_max_wrap_entries, 500}, {persister_hibernate_after, 10000}, + {msg_store_file_size_limit, 16777216}, + {queue_index_max_journal_entries, 262144}, {default_user, <<"guest">>}, {default_pass, <<"guest">>}, {default_vhost, <<"/">>}, diff --git a/include/rabbit.hrl b/include/rabbit.hrl index 145f6104..6364d60f 100644 --- a/include/rabbit.hrl +++ b/include/rabbit.hrl @@ -36,7 +36,8 @@ -record(vhost, {virtual_host, dummy}). --record(connection, {user, timeout_sec, frame_max, vhost, client_properties}). +-record(connection, {protocol, user, timeout_sec, frame_max, vhost, + client_properties}). -record(content, {class_id, @@ -44,6 +45,7 @@ properties_bin, %% either 'none', or an encoded properties binary %% Note: at most one of properties and properties_bin can be %% 'none' at once. + protocol, %% The protocol under which properties_bin was encoded payload_fragments_rev %% list of binaries, in reverse order (!) }). @@ -51,7 +53,8 @@ -record(exchange, {name, type, durable, auto_delete, arguments}). --record(amqqueue, {name, durable, auto_delete, arguments, pid}). +-record(amqqueue, {name, durable, auto_delete, exclusive_owner = none, + arguments, pid}). %% mnesia doesn't like unary records, so we add a dummy 'value' field -record(route, {binding, value = const}). @@ -67,115 +70,13 @@ -record(ssl_socket, {tcp, ssl}). -record(delivery, {mandatory, immediate, txn, sender, message}). - -record(amqp_error, {name, explanation, method = none}). %%---------------------------------------------------------------------------- --ifdef(use_specs). - --include("rabbit_framing_spec.hrl"). - --type(maybe(T) :: T | 'none'). --type(erlang_node() :: atom()). --type(ssl_socket() :: #ssl_socket{}). --type(socket() :: port() | ssl_socket()). --type(thunk(T) :: fun(() -> T)). --type(info_key() :: atom()). --type(info() :: {info_key(), any()}). --type(regexp() :: binary()). --type(file_path() :: string()). - -%% this is really an abstract type, but dialyzer does not support them --type(guid() :: binary()). --type(txn() :: guid()). --type(pkey() :: guid()). --type(r(Kind) :: - #resource{virtual_host :: vhost(), - kind :: Kind, - name :: resource_name()}). --type(queue_name() :: r('queue')). --type(exchange_name() :: r('exchange')). --type(user() :: - #user{username :: username(), - password :: password()}). --type(permission() :: - #permission{configure :: regexp(), - write :: regexp(), - read :: regexp()}). --type(amqqueue() :: - #amqqueue{name :: queue_name(), - durable :: boolean(), - auto_delete :: boolean(), - arguments :: amqp_table(), - pid :: maybe(pid())}). --type(exchange() :: - #exchange{name :: exchange_name(), - type :: exchange_type(), - durable :: boolean(), - auto_delete :: boolean(), - arguments :: amqp_table()}). --type(binding() :: - #binding{exchange_name :: exchange_name(), - queue_name :: queue_name(), - key :: binding_key()}). -%% TODO: make this more precise by tying specific class_ids to -%% specific properties --type(undecoded_content() :: - #content{class_id :: amqp_class_id(), - properties :: 'none', - properties_bin :: binary(), - payload_fragments_rev :: [binary()]} | - #content{class_id :: amqp_class_id(), - properties :: amqp_properties(), - properties_bin :: 'none', - payload_fragments_rev :: [binary()]}). --type(unencoded_content() :: undecoded_content()). --type(decoded_content() :: - #content{class_id :: amqp_class_id(), - properties :: amqp_properties(), - properties_bin :: maybe(binary()), - payload_fragments_rev :: [binary()]}). --type(encoded_content() :: - #content{class_id :: amqp_class_id(), - properties :: maybe(amqp_properties()), - properties_bin :: binary(), - payload_fragments_rev :: [binary()]}). --type(content() :: undecoded_content() | decoded_content()). --type(basic_message() :: - #basic_message{exchange_name :: exchange_name(), - routing_key :: routing_key(), - content :: content(), - guid :: guid(), - is_persistent :: boolean()}). --type(message() :: basic_message()). --type(delivery() :: - #delivery{mandatory :: boolean(), - immediate :: boolean(), - txn :: maybe(txn()), - sender :: pid(), - message :: message()}). -%% this really should be an abstract type --type(msg_id() :: non_neg_integer()). --type(qmsg() :: {queue_name(), pid(), msg_id(), boolean(), message()}). --type(listener() :: - #listener{node :: erlang_node(), - protocol :: atom(), - host :: string() | atom(), - port :: non_neg_integer()}). --type(not_found() :: {'error', 'not_found'}). --type(routing_result() :: 'routed' | 'unroutable' | 'not_delivered'). --type(amqp_error() :: - #amqp_error{name :: atom(), - explanation :: string(), - method :: atom()}). - --endif. - -%%---------------------------------------------------------------------------- - -define(COPYRIGHT_MESSAGE, "Copyright (C) 2007-2010 LShift Ltd., Cohesive Financial Technologies LLC., and Rabbit Technologies Ltd."). -define(INFORMATION_MESSAGE, "Licensed under the MPL. See http://www.rabbitmq.com/"). +-define(PROTOCOL_VERSION, "AMQP 0-9-1 / 0-9 / 0-8"). -define(ERTS_MINIMUM, "5.6.3"). -define(MAX_WAIT, 16#ffffffff). diff --git a/include/rabbit_backing_queue_spec.hrl b/include/rabbit_backing_queue_spec.hrl index 1b536dfa..005994f0 100644 --- a/include/rabbit_backing_queue_spec.hrl +++ b/include/rabbit_backing_queue_spec.hrl @@ -18,11 +18,11 @@ %% are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial %% Technologies LLC, and Rabbit Technologies Ltd. %% -%% Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift +%% Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift %% Ltd. Portions created by Cohesive Financial Technologies LLC are -%% Copyright (C) 2007-2009 Cohesive Financial Technologies +%% Copyright (C) 2007-2010 Cohesive Financial Technologies %% LLC. Portions created by Rabbit Technologies Ltd are Copyright -%% (C) 2007-2009 Rabbit Technologies Ltd. +%% (C) 2007-2010 Rabbit Technologies Ltd. %% %% All Rights Reserved. %% @@ -31,33 +31,34 @@ -type(fetch_result() :: %% Message, IsDelivered, AckTag, Remaining_Len - ('empty'|{basic_message(), boolean(), ack(), non_neg_integer()})). + ('empty'|{rabbit_types:basic_message(), boolean(), ack(), non_neg_integer()})). -type(is_durable() :: boolean()). -type(attempt_recovery() :: boolean()). -type(purged_msg_count() :: non_neg_integer()). -type(ack_required() :: boolean()). --spec(start/1 :: ([queue_name()]) -> 'ok'). --spec(init/3 :: (queue_name(), is_durable(), attempt_recovery()) -> state()). +-spec(start/1 :: ([rabbit_amqqueue:name()]) -> 'ok'). +-spec(stop/0 :: () -> 'ok'). +-spec(init/3 :: (rabbit_amqqueue:name(), is_durable(), attempt_recovery()) -> state()). -spec(terminate/1 :: (state()) -> state()). -spec(delete_and_terminate/1 :: (state()) -> state()). -spec(purge/1 :: (state()) -> {purged_msg_count(), state()}). --spec(publish/2 :: (basic_message(), state()) -> state()). +-spec(publish/2 :: (rabbit_types:basic_message(), state()) -> state()). -spec(publish_delivered/3 :: - (ack_required(), basic_message(), state()) -> {ack(), state()}). + (ack_required(), rabbit_types:basic_message(), state()) -> {ack(), state()}). -spec(fetch/2 :: (ack_required(), state()) -> {fetch_result(), state()}). -spec(ack/2 :: ([ack()], state()) -> state()). --spec(tx_publish/3 :: (txn(), basic_message(), state()) -> state()). --spec(tx_ack/3 :: (txn(), [ack()], state()) -> state()). --spec(tx_rollback/2 :: (txn(), state()) -> {[ack()], state()}). --spec(tx_commit/3 :: (txn(), fun (() -> any()), state()) -> {[ack()], state()}). +-spec(tx_publish/3 :: (rabbit_types:txn(), rabbit_types:basic_message(), state()) -> state()). +-spec(tx_ack/3 :: (rabbit_types:txn(), [ack()], state()) -> state()). +-spec(tx_rollback/2 :: (rabbit_types:txn(), state()) -> {[ack()], state()}). +-spec(tx_commit/3 :: (rabbit_types:txn(), fun (() -> any()), state()) -> {[ack()], state()}). -spec(requeue/2 :: ([ack()], state()) -> state()). -spec(len/1 :: (state()) -> non_neg_integer()). -spec(is_empty/1 :: (state()) -> boolean()). -spec(set_ram_duration_target/2 :: (('undefined' | 'infinity' | number()), state()) -> state()). -spec(ram_duration/1 :: (state()) -> {number(), state()}). --spec(needs_sync/1 :: (state()) -> boolean()). --spec(sync/1 :: (state()) -> state()). +-spec(needs_idle_timeout/1 :: (state()) -> boolean()). +-spec(idle_timeout/1 :: (state()) -> state()). -spec(handle_pre_hibernate/1 :: (state()) -> state()). -spec(status/1 :: (state()) -> [{atom(), any()}]). diff --git a/include/rabbit_exchange_type_spec.hrl b/include/rabbit_exchange_type_spec.hrl index 9864f1eb..f05bcb84 100644 --- a/include/rabbit_exchange_type_spec.hrl +++ b/include/rabbit_exchange_type_spec.hrl @@ -18,11 +18,11 @@ %% are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial %% Technologies LLC, and Rabbit Technologies Ltd. %% -%% Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift +%% Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift %% Ltd. Portions created by Cohesive Financial Technologies LLC are -%% Copyright (C) 2007-2009 Cohesive Financial Technologies +%% Copyright (C) 2007-2010 Cohesive Financial Technologies %% LLC. Portions created by Rabbit Technologies Ltd are Copyright -%% (C) 2007-2009 Rabbit Technologies Ltd. +%% (C) 2007-2010 Rabbit Technologies Ltd. %% %% All Rights Reserved. %% @@ -31,12 +31,19 @@ -ifdef(use_specs). -spec(description/0 :: () -> [{atom(), any()}]). --spec(publish/2 :: (exchange(), delivery()) -> {routing_result(), [pid()]}). --spec(validate/1 :: (exchange()) -> 'ok'). --spec(create/1 :: (exchange()) -> 'ok'). --spec(recover/2 :: (exchange(), list(binding())) -> 'ok'). --spec(delete/2 :: (exchange(), list(binding())) -> 'ok'). --spec(add_binding/2 :: (exchange(), binding()) -> 'ok'). --spec(remove_bindings/2 :: (exchange(), list(binding())) -> 'ok'). +-spec(publish/2 :: (rabbit_types:exchange(), rabbit_types:delivery()) + -> {rabbit_router:routing_result(), [pid()]}). +-spec(validate/1 :: (rabbit_types:exchange()) -> 'ok'). +-spec(create/1 :: (rabbit_types:exchange()) -> 'ok'). +-spec(recover/2 :: (rabbit_types:exchange(), + [rabbit_types:binding()]) -> 'ok'). +-spec(delete/2 :: (rabbit_types:exchange(), + [rabbit_types:binding()]) -> 'ok'). +-spec(add_binding/2 :: (rabbit_types:exchange(), + rabbit_types:binding()) -> 'ok'). +-spec(remove_bindings/2 :: (rabbit_types:exchange(), + [rabbit_types:binding()]) -> 'ok'). +-spec(assert_args_equivalence/2 :: (rabbit_types:exchange(), + rabbit_framing:amqp_table()) -> 'ok'). -endif. diff --git a/include/rabbit_framing_spec.hrl b/include/rabbit_msg_store.hrl index 1a979899..d96fa758 100644 --- a/include/rabbit_framing_spec.hrl +++ b/include/rabbit_msg_store.hrl @@ -29,32 +29,13 @@ %% Contributor(s): ______________________________________. %% -%% TODO: much of this should be generated +-include("rabbit.hrl"). --type(amqp_field_type() :: - 'longstr' | 'signedint' | 'decimal' | 'timestamp' | - 'table' | 'byte' | 'double' | 'float' | 'long' | - 'short' | 'bool' | 'binary' | 'void'). --type(amqp_property_type() :: - 'shortstr' | 'longstr' | 'octet' | 'shortint' | 'longint' | - 'longlongint' | 'timestamp' | 'bit' | 'table'). -%% we could make this more precise but ultimately are limited by -%% dialyzer's lack of support for recursive types --type(amqp_table() :: [{binary(), amqp_field_type(), any()}]). -%% TODO: make this more precise --type(amqp_class_id() :: non_neg_integer()). -%% TODO: make this more precise --type(amqp_properties() :: tuple()). -%% TODO: make this more precise --type(amqp_method() :: tuple()). -%% TODO: make this more precise --type(amqp_method_name() :: atom()). --type(channel_number() :: non_neg_integer()). --type(resource_name() :: binary()). --type(routing_key() :: binary()). --type(username() :: binary()). --type(password() :: binary()). --type(vhost() :: binary()). --type(ctag() :: binary()). --type(exchange_type() :: atom()). --type(binding_key() :: binary()). +-ifdef(use_specs). + +-type(msg() :: any()). + +-endif. + +-record(msg_location, + {guid, ref_count, file, offset, total_size}). diff --git a/include/rabbit_msg_store_index.hrl b/include/rabbit_msg_store_index.hrl new file mode 100644 index 00000000..fba0b7cd --- /dev/null +++ b/include/rabbit_msg_store_index.hrl @@ -0,0 +1,59 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developers of the Original Code are LShift Ltd, +%% Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd, +%% Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd +%% are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial +%% Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift +%% Ltd. Portions created by Cohesive Financial Technologies LLC are +%% Copyright (C) 2007-2010 Cohesive Financial Technologies +%% LLC. Portions created by Rabbit Technologies Ltd are Copyright +%% (C) 2007-2010 Rabbit Technologies Ltd. +%% +%% All Rights Reserved. +%% +%% Contributor(s): ______________________________________. +%% + +-include("rabbit_msg_store.hrl"). + +%%---------------------------------------------------------------------------- + +-ifdef(use_specs). + +-type(dir() :: any()). +-type(index_state() :: any()). +-type(keyvalue() :: any()). +-type(fieldpos() :: non_neg_integer()). +-type(fieldvalue() :: any()). + +-spec(new/1 :: (dir()) -> index_state()). +-spec(recover/1 :: (dir()) -> rabbit_types:ok_or_error2(index_state(), any())). +-spec(lookup/2 :: + (rabbit_guid:guid(), index_state()) -> ('not_found' | keyvalue())). +-spec(insert/2 :: (keyvalue(), index_state()) -> 'ok'). +-spec(update/2 :: (keyvalue(), index_state()) -> 'ok'). +-spec(update_fields/3 :: (rabbit_guid:guid(), ({fieldpos(), fieldvalue()} | + [{fieldpos(), fieldvalue()}]), + index_state()) -> 'ok'). +-spec(delete/2 :: (rabbit_guid:guid(), index_state()) -> 'ok'). +-spec(delete_by_file/2 :: (fieldvalue(), index_state()) -> 'ok'). +-spec(terminate/1 :: (index_state()) -> any()). + +-endif. + +%%---------------------------------------------------------------------------- diff --git a/packaging/RPMS/Fedora/rabbitmq-server.spec b/packaging/RPMS/Fedora/rabbitmq-server.spec index 4a7240e0..bf03c6ac 100644 --- a/packaging/RPMS/Fedora/rabbitmq-server.spec +++ b/packaging/RPMS/Fedora/rabbitmq-server.spec @@ -105,6 +105,12 @@ if [ $1 = 0 ]; then # Leave rabbitmq user and group fi +# Clean out plugin activation state, both on uninstall and upgrade +rm -rf %{_rabbit_erllibdir}/priv +for ext in rel script boot ; do + rm -f %{_rabbit_erllibdir}/ebin/rabbit.$ext +done + %files -f ../%{name}.files %defattr(-,root,root,-) %attr(0750, rabbitmq, rabbitmq) %dir %{_localstatedir}/lib/rabbitmq @@ -120,6 +126,12 @@ fi rm -rf %{buildroot} %changelog +* Wed Jul 14 2010 Emile Joubert <emile@rabbitmq.com> 1.8.1-1 +- New Upstream Release + +* Tue Jun 15 2010 Matthew Sackman <matthew@rabbitmq.com> 1.8.0-1 +- New Upstream Release + * Mon Feb 15 2010 Matthew Sackman <matthew@lshift.net> 1.7.2-1 - New Upstream Release diff --git a/packaging/common/rabbitmq-server.ocf b/packaging/common/rabbitmq-server.ocf index 97c58ea2..b969535a 100755 --- a/packaging/common/rabbitmq-server.ocf +++ b/packaging/common/rabbitmq-server.ocf @@ -35,21 +35,21 @@ ## ## OCF instance parameters -## OCF_RESKEY_multi -## OCF_RESKEY_ctl -## OCF_RESKEY_nodename -## OCF_RESKEY_ip -## OCF_RESKEY_port -## OCF_RESKEY_cluster_config_file -## OCF_RESKEY_config_file -## OCF_RESKEY_log_base -## OCF_RESKEY_mnesia_base -## OCF_RESKEY_server_start_args +## OCF_RESKEY_multi +## OCF_RESKEY_ctl +## OCF_RESKEY_nodename +## OCF_RESKEY_ip +## OCF_RESKEY_port +## OCF_RESKEY_config_file +## OCF_RESKEY_log_base +## OCF_RESKEY_mnesia_base +## OCF_RESKEY_server_start_args ####################################################################### # Initialization: -. ${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/resource.d/heartbeat} +. ${OCF_FUNCTIONS_DIR}/.ocf-shellfuncs ####################################################################### @@ -63,7 +63,7 @@ OCF_RESKEY_log_base_default="/var/log/rabbitmq" : ${OCF_RESKEY_log_base=${OCF_RESKEY_log_base_default}} meta_data() { - cat <<END + cat <<END <?xml version="1.0"?> <!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd"> <resource-agent name="rabbitmq-server"> @@ -113,15 +113,7 @@ The IP address for rabbitmq-server to listen on The IP Port for rabbitmq-server to listen on </longdesc> <shortdesc lang="en">IP Port</shortdesc> -<content type="string" default="" /> -</parameter> - -<parameter name="cluster_config_file" unique="0" required="0"> -<longdesc lang="en"> -Location of the cluster config file -</longdesc> -<shortdesc lang="en">Cluster config file path</shortdesc> -<content type="string" default="" /> +<content type="integer" default="" /> </parameter> <parameter name="config_file" unique="0" required="0"> @@ -161,7 +153,8 @@ Additional arguments provided to the server on startup <actions> <action name="start" timeout="600" /> <action name="stop" timeout="120" /> -<action name="monitor" timeout="20" interval="10" depth="0" start-delay="0" /> +<action name="status" timeout="20" interval="10" /> +<action name="monitor" timeout="20" interval="10" /> <action name="validate-all" timeout="30" /> <action name="meta-data" timeout="5" /> </actions> @@ -170,8 +163,8 @@ END } rabbit_usage() { - cat <<END -usage: $0 {start|stop|monitor|validate-all|meta-data} + cat <<END +usage: $0 {start|stop|status|monitor|validate-all|meta-data} Expects to have a fully populated OCF RA-compliant environment set. END @@ -182,7 +175,6 @@ RABBITMQ_CTL=$OCF_RESKEY_ctl RABBITMQ_NODENAME=$OCF_RESKEY_nodename RABBITMQ_NODE_IP_ADDRESS=$OCF_RESKEY_ip RABBITMQ_NODE_PORT=$OCF_RESKEY_port -RABBITMQ_CLUSTER_CONFIG_FILE=$OCF_RESKEY_cluster_config_file RABBITMQ_CONFIG_FILE=$OCF_RESKEY_config_file RABBITMQ_LOG_BASE=$OCF_RESKEY_log_base RABBITMQ_MNESIA_BASE=$OCF_RESKEY_mnesia_base @@ -193,7 +185,6 @@ RABBITMQ_SERVER_START_ARGS=$OCF_RESKEY_server_start_args export_vars() { [ ! -z $RABBITMQ_NODE_IP_ADDRESS ] && export RABBITMQ_NODE_IP_ADDRESS [ ! -z $RABBITMQ_NODE_PORT ] && export RABBITMQ_NODE_PORT - [ ! -z $RABBITMQ_CLUSTER_CONFIG_FILE ] && export RABBITMQ_CLUSTER_CONFIG_FILE [ ! -z $RABBITMQ_CONFIG_FILE ] && export RABBITMQ_CONFIG_FILE [ ! -z $RABBITMQ_LOG_BASE ] && export RABBITMQ_LOG_BASE [ ! -z $RABBITMQ_MNESIA_BASE ] && export RABBITMQ_MNESIA_BASE @@ -202,35 +193,30 @@ export_vars() { rabbit_validate_partial() { if [ ! -x $RABBITMQ_MULTI ]; then - ocf_log err "rabbitmq-server multi $RABBITMQ_MULTI does not exist or is not executable"; - return $OCF_ERR_ARGS; + ocf_log err "rabbitmq-server multi $RABBITMQ_MULTI does not exist or is not executable"; + exit $OCF_ERR_INSTALLED; fi if [ ! -x $RABBITMQ_CTL ]; then - ocf_log err "rabbitmq-server ctl $RABBITMQ_CTL does not exist or is not executable"; - return $OCF_ERR_ARGS; + ocf_log err "rabbitmq-server ctl $RABBITMQ_CTL does not exist or is not executable"; + exit $OCF_ERR_INSTALLED; fi } rabbit_validate_full() { - if [ ! -z $RABBITMQ_CLUSTER_CONFIG_FILE ] && [ ! -e $RABBITMQ_CLUSTER_CONFIG_FILE ]; then - ocf_log err "rabbitmq-server cluster_config_file $RABBITMQ_CLUSTER_CONFIG_FILE does not exist or is not a file"; - return $OCF_ERR_ARGS; - fi - if [ ! -z $RABBITMQ_CONFIG_FILE ] && [ ! -e $RABBITMQ_CONFIG_FILE ]; then - ocf_log err "rabbitmq-server config_file $RABBITMQ_CONFIG_FILE does not exist or is not a file"; - return $OCF_ERR_ARGS; + ocf_log err "rabbitmq-server config_file $RABBITMQ_CONFIG_FILE does not exist or is not a file"; + exit $OCF_ERR_INSTALLED; fi if [ ! -z $RABBITMQ_LOG_BASE ] && [ ! -d $RABBITMQ_LOG_BASE ]; then - ocf_log err "rabbitmq-server log_base $RABBITMQ_LOG_BASE does not exist or is not a directory"; - return $OCF_ERR_ARGS; + ocf_log err "rabbitmq-server log_base $RABBITMQ_LOG_BASE does not exist or is not a directory"; + exit $OCF_ERR_INSTALLED; fi if [ ! -z $RABBITMQ_MNESIA_BASE ] && [ ! -d $RABBITMQ_MNESIA_BASE ]; then - ocf_log err "rabbitmq-server mnesia_base $RABBITMQ_MNESIA_BASE does not exist or is not a directory"; - return $OCF_ERR_ARGS; + ocf_log err "rabbitmq-server mnesia_base $RABBITMQ_MNESIA_BASE does not exist or is not a directory"; + exit $OCF_ERR_INSTALLED; fi rabbit_validate_partial @@ -243,25 +229,26 @@ rabbit_status() { $RABBITMQ_CTL $NODENAME_ARG status > /dev/null 2> /dev/null rc=$? case "$rc" in - 0) - return $OCF_SUCCESS - ;; - 2) - return $OCF_NOT_RUNNING - ;; - *) - ocf_log err "Unexpected return from rabbitmqctl $NODENAME_ARG status: $rc" - return $OCF_ERR_GENERIC + 0) + ocf_log debug "RabbitMQ server is running normally" + return $OCF_SUCCESS + ;; + 2) + ocf_log debug "RabbitMQ server is not running" + return $OCF_NOT_RUNNING + ;; + *) + ocf_log err "Unexpected return from rabbitmqctl $NODENAME_ARG status: $rc" + exit $OCF_ERR_GENERIC esac } rabbit_start() { local rc - rabbit_validate_full - rc=$? - if [ "$rc" != $OCF_SUCCESS ]; then - return $rc + if rabbit_status; then + ocf_log info "Resource already running." + return $OCF_SUCCESS fi export_vars @@ -270,24 +257,23 @@ rabbit_start() { rc=$? if [ "$rc" != 0 ]; then - ocf_log err "rabbitmq-server start command failed: $RABBITMQ_MULTI start_all 1, $rc" - return $rc + ocf_log err "rabbitmq-server start command failed: $RABBITMQ_MULTI start_all 1, $rc" + return $rc fi # Spin waiting for the server to come up. # Let the CRM/LRM time us out if required start_wait=1 while [ $start_wait = 1 ]; do - rabbit_status - rc=$? - if [ "$rc" = $OCF_SUCCESS ]; then - start_wait=0 - - elif [ "$rc" != $OCF_NOT_RUNNING ]; then - ocf_log info "rabbitmq-server start failed: $rc" - return $OCF_ERR_GENERIC - fi - sleep 2 + rabbit_status + rc=$? + if [ "$rc" = $OCF_SUCCESS ]; then + start_wait=0 + elif [ "$rc" != $OCF_NOT_RUNNING ]; then + ocf_log info "rabbitmq-server start failed: $rc" + exit $OCF_ERR_GENERIC + fi + sleep 1 done return $OCF_SUCCESS @@ -295,28 +281,34 @@ rabbit_start() { rabbit_stop() { local rc + + if ! rabbit_status; then + ocf_log info "Resource not running." + return $OCF_SUCCESS + fi + $RABBITMQ_MULTI stop_all & rc=$? if [ "$rc" != 0 ]; then - ocf_log err "rabbitmq-server stop command failed: $RABBITMQ_MULTI stop_all, $rc" - return $rc + ocf_log err "rabbitmq-server stop command failed: $RABBITMQ_MULTI stop_all, $rc" + return $rc fi # Spin waiting for the server to shut down. # Let the CRM/LRM time us out if required stop_wait=1 while [ $stop_wait = 1 ]; do - rabbit_status - rc=$? - if [ "$rc" = $OCF_NOT_RUNNING ]; then - stop_wait=0 + rabbit_status + rc=$? + if [ "$rc" = $OCF_NOT_RUNNING ]; then + stop_wait=0 break - elif [ "$rc" != $OCF_SUCCESS ]; then - ocf_log info "rabbitmq-server stop failed: $rc" - return $OCF_ERR_GENERIC - fi - sleep 2 + elif [ "$rc" != $OCF_SUCCESS ]; then + ocf_log info "rabbitmq-server stop failed: $rc" + exit $OCF_ERR_GENERIC + fi + sleep 1 done return $OCF_SUCCESS @@ -329,34 +321,38 @@ rabbit_monitor() { case $__OCF_ACTION in meta-data) - meta_data - exit $OCF_SUCCESS - ;; + meta_data + exit $OCF_SUCCESS + ;; usage|help) - rabbit_usage - exit $OCF_SUCCESS - ;; + rabbit_usage + exit $OCF_SUCCESS + ;; esac -rabbit_validate_partial || exit +if ocf_is_probe; then + rabbit_validate_partial +else + rabbit_validate_full +fi case $__OCF_ACTION in start) - rabbit_start + rabbit_start ;; stop) - rabbit_stop + rabbit_stop ;; - monitor) - rabbit_monitor + status|monitor) + rabbit_monitor ;; validate-all) exit $OCF_SUCCESS - ;; + ;; *) - rabbit_usage - exit $OCF_ERR_UNIMPLEMENTED - ;; + rabbit_usage + exit $OCF_ERR_UNIMPLEMENTED + ;; esac -exit $?
\ No newline at end of file +exit $? diff --git a/packaging/debs/Debian/debian/changelog b/packaging/debs/Debian/debian/changelog index 63b50749..0dccf938 100644 --- a/packaging/debs/Debian/debian/changelog +++ b/packaging/debs/Debian/debian/changelog @@ -1,3 +1,15 @@ +rabbitmq-server (1.8.1-1) lucid; urgency=low + + * New Upstream Release + + -- Emile Joubert <emile@rabbitmq.com> Wed, 14 Jul 2010 15:05:24 +0100 + +rabbitmq-server (1.8.0-1) intrepid; urgency=low + + * New Upstream Release + + -- Matthew Sackman <matthew@rabbitmq.com> Tue, 15 Jun 2010 12:48:48 +0100 + rabbitmq-server (1.7.2-1) intrepid; urgency=low * New Upstream Release diff --git a/packaging/debs/Debian/debian/postrm.in b/packaging/debs/Debian/debian/postrm.in index bfcf1f53..5290de9b 100644 --- a/packaging/debs/Debian/debian/postrm.in +++ b/packaging/debs/Debian/debian/postrm.in @@ -18,6 +18,13 @@ set -e # for details, see http://www.debian.org/doc/debian-policy/ or # the debian-policy package +remove_plugin_traces() { + # Remove traces of plugins + rm -rf @RABBIT_LIB@/priv @RABBIT_LIB@/plugins + for ext in rel script boot ; do + rm -f @RABBIT_LIB@/ebin/rabbit.$ext + done +} case "$1" in purge) @@ -34,11 +41,7 @@ case "$1" in if [ -d /etc/rabbitmq ]; then rm -r /etc/rabbitmq fi - # Remove traces of plugins - rm -rf @RABBIT_LIB@/priv @RABBIT_LIB@/plugins - for ext in rel script boot ; do - rm -f @RABBIT_LIB@/ebin/rabbit.$ext - done + remove_plugin_traces if getent passwd rabbitmq >/dev/null; then # Stop epmd if run by the rabbitmq user pkill -u rabbitmq epmd || : @@ -50,7 +53,11 @@ case "$1" in fi ;; - remove|upgrade|failed-upgrade|abort-install|abort-upgrade|disappear) + remove|upgrade) + remove_plugin_traces + ;; + + failed-upgrade|abort-install|abort-upgrade|disappear) ;; *) diff --git a/packaging/debs/Debian/debian/rules b/packaging/debs/Debian/debian/rules index 1238b142..6b6df33b 100644 --- a/packaging/debs/Debian/debian/rules +++ b/packaging/debs/Debian/debian/rules @@ -13,7 +13,7 @@ DOCDIR=$(DEB_DESTDIR)usr/share/doc/rabbitmq-server/ install/rabbitmq-server:: mkdir -p $(DOCDIR) - rm $(RABBIT_LIB)LICENSE* + rm $(RABBIT_LIB)LICENSE* $(RABBIT_LIB)INSTALL* for script in rabbitmqctl rabbitmq-server rabbitmq-multi; do \ install -p -D -m 0755 debian/rabbitmq-script-wrapper $(DEB_DESTDIR)usr/sbin/$$script; \ done diff --git a/packaging/macports/Makefile b/packaging/macports/Makefile index 0ef7dd5e..3a22eef0 100644 --- a/packaging/macports/Makefile +++ b/packaging/macports/Makefile @@ -31,15 +31,22 @@ $(DEST)/Portfile: Portfile.in -f checksums.sed <$^ >$@ rm checksums.sed +# The purpose of the intricate substitution below is to set up similar +# environment vars to the ones that su will on Linux. On OS X, we +# have to use the -m option to su in order to be able to set the shell +# (which for the rabbitmq user would otherwise be /dev/null). But the +# -m option means that *all* environment vars get preserved. Erlang +# needs vars such as HOME to be set. So we have to set them +# explicitly. macports: dirs $(DEST)/Portfile for f in rabbitmq-asroot-script-wrapper rabbitmq-script-wrapper ; do \ cp $(COMMON_DIR)/$$f $(DEST)/files ; \ done - sed -i -e 's|@SU_RABBITMQ_SH_C@|SHELL=/bin/sh su -m rabbitmq -c|' \ + sed -i -e 's|@SU_RABBITMQ_SH_C@|SHELL=/bin/sh HOME=/var/lib/rabbitmq USER=rabbitmq LOGNAME=rabbitmq PATH="$$(eval `PATH=MACPORTS_PREFIX/bin /usr/libexec/path_helper -s`; echo $$PATH)" su -m rabbitmq -c|' \ $(DEST)/files/rabbitmq-script-wrapper cp patch-org.macports.rabbitmq-server.plist.diff $(DEST)/files if [ -n "$(MACPORTS_USERHOST)" ] ; then \ - tar cf - -C $(MACPORTS_DIR) . | ssh $(SSH_OPTS) lshift@macrabbit ' \ + tar cf - -C $(MACPORTS_DIR) . | ssh $(SSH_OPTS) $(MACPORTS_USERHOST) ' \ d="/tmp/mkportindex.$$$$" ; \ mkdir $$d \ && cd $$d \ @@ -52,4 +59,4 @@ macports: dirs $(DEST)/Portfile fi clean: - rm -rf $(DEST) checksums.sed + rm -rf $(MACPORTS_DIR) checksums.sed diff --git a/packaging/macports/Portfile.in b/packaging/macports/Portfile.in index 62cdad8e..f30460d3 100644 --- a/packaging/macports/Portfile.in +++ b/packaging/macports/Portfile.in @@ -4,9 +4,8 @@ PortSystem 1.0 name rabbitmq-server version @VERSION@ -revision 1 categories net -maintainers rabbitmq.com:tonyg +maintainers paperplanes.de:meyer rabbitmq.com:tonyg openmaintainer platforms darwin description The RabbitMQ AMQP Server long_description \ @@ -23,8 +22,8 @@ checksums \ sha1 @sha1@ \ rmd160 @rmd160@ -depends_build port:erlang port:xmlto port:libxslt -depends_run port:erlang +depends_lib port:erlang +depends_build port:xmlto port:libxslt platform darwin 7 { depends_build-append port:py25-simplejson @@ -76,26 +75,18 @@ post-destroot { reinplace -E "s:(/etc/rabbitmq/rabbitmq.conf):${prefix}\\1:g" \ ${realsbin}/rabbitmq-env - reinplace -E "s:(CLUSTER_CONFIG_FILE)=/:\\1=${prefix}/:" \ - ${realsbin}/rabbitmq-multi \ - ${realsbin}/rabbitmq-server \ - ${realsbin}/rabbitmqctl - reinplace -E "s:(LOG_BASE)=/:\\1=${prefix}/:" \ - ${realsbin}/rabbitmq-multi \ - ${realsbin}/rabbitmq-server \ - ${realsbin}/rabbitmqctl - reinplace -E "s:(MNESIA_BASE)=/:\\1=${prefix}/:" \ - ${realsbin}/rabbitmq-multi \ - ${realsbin}/rabbitmq-server \ - ${realsbin}/rabbitmqctl - reinplace -E "s:(PIDS_FILE)=/:\\1=${prefix}/:" \ - ${realsbin}/rabbitmq-multi \ - ${realsbin}/rabbitmq-server \ - ${realsbin}/rabbitmqctl + foreach var {CONFIG_FILE CLUSTER_CONFIG_FILE LOG_BASE MNESIA_BASE PIDS_FILE} { + reinplace -E "s:^($var)=/:\\1=${prefix}/:" \ + ${realsbin}/rabbitmq-multi \ + ${realsbin}/rabbitmq-server \ + ${realsbin}/rabbitmqctl + } xinstall -m 555 ${filespath}/rabbitmq-script-wrapper \ ${wrappersbin}/rabbitmq-multi + reinplace -E "s:MACPORTS_PREFIX/bin:${prefix}/bin:" \ + ${wrappersbin}/rabbitmq-multi reinplace -E "s:/usr/lib/rabbitmq/bin/:${prefix}/lib/rabbitmq/bin/:" \ ${wrappersbin}/rabbitmq-multi reinplace -E "s:/var/lib/rabbitmq:${prefix}/var/lib/rabbitmq:" \ diff --git a/packaging/macports/make-port-diff.sh b/packaging/macports/make-port-diff.sh new file mode 100755 index 00000000..3eb1b9f5 --- /dev/null +++ b/packaging/macports/make-port-diff.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +# This script grabs the latest rabbitmq-server bits from the main +# macports subversion repo, and from the rabbitmq.com macports repo, +# and produces a diff from the former to the latter for submission +# through the macports trac. + +set -e + +dir=/tmp/$(basename $0).$$ +mkdir -p $dir/macports $dir/rabbitmq + +# Get the files from the macports subversion repo +cd $dir/macports +svn checkout http://svn.macports.org/repository/macports/trunk/dports/net/rabbitmq-server/ 2>&1 >/dev/null + +# Clear out the svn $id tag +sed -i -e 's|^# \$.*$|# $Id$|' rabbitmq-server/Portfile + +# Get the files from the rabbitmq.com macports repo +cd ../rabbitmq +curl -s http://www.rabbitmq.com/releases/macports/net/rabbitmq-server.tgz | tar xzf - + +cd .. +diff -Naur --exclude=.svn macports rabbitmq +cd / +rm -rf $dir diff --git a/scripts/rabbitmq-multi b/scripts/rabbitmq-multi index 8341d35c..59050692 100755 --- a/scripts/rabbitmq-multi +++ b/scripts/rabbitmq-multi @@ -29,7 +29,8 @@ ## ## Contributor(s): ______________________________________. ## -NODENAME=rabbit +[ "x" = "x$HOSTNAME" ] && HOSTNAME=`env hostname -s` +NODENAME=rabbit@${HOSTNAME%%.*} SCRIPT_HOME=$(dirname $0) PIDS_FILE=/var/lib/rabbitmq/pids MULTI_ERL_ARGS= diff --git a/scripts/rabbitmq-multi.bat b/scripts/rabbitmq-multi.bat index a4b7f2e9..a4f8c8b4 100644 --- a/scripts/rabbitmq-multi.bat +++ b/scripts/rabbitmq-multi.bat @@ -42,8 +42,12 @@ if "!RABBITMQ_BASE!"=="" ( set RABBITMQ_BASE=!APPDATA!\RabbitMQ
)
+if "!COMPUTERNAME!"=="" (
+ set COMPUTERNAME=localhost
+)
+
if "!RABBITMQ_NODENAME!"=="" (
- set RABBITMQ_NODENAME=rabbit
+ set RABBITMQ_NODENAME=rabbit@!COMPUTERNAME!
)
if "!RABBITMQ_NODE_IP_ADDRESS!"=="" (
diff --git a/scripts/rabbitmq-server b/scripts/rabbitmq-server index aaa131ee..e2028728 100755 --- a/scripts/rabbitmq-server +++ b/scripts/rabbitmq-server @@ -30,7 +30,8 @@ ## Contributor(s): ______________________________________. ## -NODENAME=rabbit +[ "x" = "x$HOSTNAME" ] && HOSTNAME=`env hostname -s` +NODENAME=rabbit@${HOSTNAME%%.*} SERVER_ERL_ARGS="+K true +A30 +P 1048576 \ -kernel inet_default_listen_options [{nodelay,true}] \ -kernel inet_default_connect_options [{nodelay,true}]" @@ -81,12 +82,6 @@ fi [ -f "${RABBITMQ_LOGS}" ] && cat "${RABBITMQ_LOGS}" >> "${RABBITMQ_LOGS}${RABBITMQ_BACKUP_EXTENSION}" [ -f "${RABBITMQ_SASL_LOGS}" ] && cat "${RABBITMQ_SASL_LOGS}" >> "${RABBITMQ_SASL_LOGS}${RABBITMQ_BACKUP_EXTENSION}" -if [ -f "$RABBITMQ_CLUSTER_CONFIG_FILE" ]; then - RABBITMQ_CLUSTER_CONFIG_OPTION="-rabbit cluster_config \"$RABBITMQ_CLUSTER_CONFIG_FILE\"" -else - RABBITMQ_CLUSTER_CONFIG_OPTION="" -fi - RABBITMQ_START_RABBIT= [ "x" = "x$RABBITMQ_ALLOW_INPUT" ] && RABBITMQ_START_RABBIT='-noinput' @@ -138,6 +133,5 @@ exec erl \ -os_mon start_disksup false \ -os_mon start_memsup false \ -mnesia dir "\"${RABBITMQ_MNESIA_DIR}\"" \ - ${RABBITMQ_CLUSTER_CONFIG_OPTION} \ ${RABBITMQ_SERVER_START_ARGS} \ "$@" diff --git a/scripts/rabbitmq-server.bat b/scripts/rabbitmq-server.bat index b3046594..86fbc463 100644 --- a/scripts/rabbitmq-server.bat +++ b/scripts/rabbitmq-server.bat @@ -42,8 +42,12 @@ if "!RABBITMQ_BASE!"=="" ( set RABBITMQ_BASE=!APPDATA!\RabbitMQ
)
+if "!COMPUTERNAME!"=="" (
+ set COMPUTERNAME=localhost
+)
+
if "!RABBITMQ_NODENAME!"=="" (
- set RABBITMQ_NODENAME=rabbit
+ set RABBITMQ_NODENAME=rabbit@!COMPUTERNAME!
)
if "!RABBITMQ_NODE_IP_ADDRESS!"=="" (
@@ -99,14 +103,6 @@ if exist "!SASL_LOGS!" ( rem End of log management
-if "!RABBITMQ_CLUSTER_CONFIG_FILE!"=="" (
- set RABBITMQ_CLUSTER_CONFIG_FILE=!RABBITMQ_BASE!\rabbitmq_cluster.config
-)
-set CLUSTER_CONFIG=
-if not exist "!RABBITMQ_CLUSTER_CONFIG_FILE!" GOTO L1
-set CLUSTER_CONFIG=-rabbit cluster_config \""!RABBITMQ_CLUSTER_CONFIG_FILE:\=/!"\"
-:L1
-
if "!RABBITMQ_MNESIA_DIR!"=="" (
set RABBITMQ_MNESIA_DIR=!RABBITMQ_MNESIA_BASE!/!RABBITMQ_NODENAME!-mnesia
)
diff --git a/scripts/rabbitmq-service.bat b/scripts/rabbitmq-service.bat index a25e236a..2217a451 100644 --- a/scripts/rabbitmq-service.bat +++ b/scripts/rabbitmq-service.bat @@ -48,8 +48,12 @@ if "!RABBITMQ_BASE!"=="" ( set RABBITMQ_BASE=!APPDATA!\!RABBITMQ_SERVICENAME!
)
+if "!COMPUTERNAME!"=="" (
+ set COMPUTERNAME=localhost
+)
+
if "!RABBITMQ_NODENAME!"=="" (
- set RABBITMQ_NODENAME=rabbit
+ set RABBITMQ_NODENAME=rabbit@!COMPUTERNAME!
)
if "!RABBITMQ_NODE_IP_ADDRESS!"=="" (
@@ -132,14 +136,6 @@ if exist "!SASL_LOGS!" ( rem End of log management
-if "!RABBITMQ_CLUSTER_CONFIG_FILE!"=="" (
- set RABBITMQ_CLUSTER_CONFIG_FILE=!RABBITMQ_BASE!\rabbitmq_cluster.config
-)
-set CLUSTER_CONFIG=
-if not exist "!RABBITMQ_CLUSTER_CONFIG_FILE!" GOTO L1
-set CLUSTER_CONFIG=-rabbit cluster_config \""!RABBITMQ_CLUSTER_CONFIG_FILE:\=/!"\"
-:L1
-
if "!RABBITMQ_MNESIA_DIR!"=="" (
set RABBITMQ_MNESIA_DIR=!RABBITMQ_MNESIA_BASE!/!RABBITMQ_NODENAME!-mnesia
)
diff --git a/scripts/rabbitmqctl b/scripts/rabbitmqctl index cfb775eb..92e5312b 100755 --- a/scripts/rabbitmqctl +++ b/scripts/rabbitmqctl @@ -30,7 +30,8 @@ ## Contributor(s): ______________________________________. ## -NODENAME=rabbit +[ "x" = "x$HOSTNAME" ] && HOSTNAME=`env hostname -s` +NODENAME=rabbit@${HOSTNAME%%.*} . `dirname $0`/rabbitmq-env diff --git a/scripts/rabbitmqctl.bat b/scripts/rabbitmqctl.bat index 55572451..563b9e58 100644 --- a/scripts/rabbitmqctl.bat +++ b/scripts/rabbitmqctl.bat @@ -38,8 +38,12 @@ set TDP0=%~dp0 set STAR=%*
setlocal enabledelayedexpansion
+if "!COMPUTERNAME!"=="" (
+ set COMPUTERNAME=localhost
+)
+
if "!RABBITMQ_NODENAME!"=="" (
- set RABBITMQ_NODENAME=rabbit
+ set RABBITMQ_NODENAME=rabbit@!COMPUTERNAME!
)
if not exist "!ERLANG_HOME!\bin\erl.exe" (
diff --git a/src/bpqueue.erl b/src/bpqueue.erl new file mode 100644 index 00000000..49874aa6 --- /dev/null +++ b/src/bpqueue.erl @@ -0,0 +1,286 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developers of the Original Code are LShift Ltd, +%% Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd, +%% Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd +%% are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial +%% Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift +%% Ltd. Portions created by Cohesive Financial Technologies LLC are +%% Copyright (C) 2007-2010 Cohesive Financial Technologies +%% LLC. Portions created by Rabbit Technologies Ltd are Copyright +%% (C) 2007-2010 Rabbit Technologies Ltd. +%% +%% All Rights Reserved. +%% +%% Contributor(s): ______________________________________. +%% + +-module(bpqueue). + +%% Block-prefixed queue. From the perspective of the queue interface +%% the datastructure acts like a regular queue where each value is +%% paired with the prefix. +%% +%% This is implemented as a queue of queues, which is more space and +%% time efficient, whilst supporting the normal queue interface. Each +%% inner queue has a prefix, which does not need to be unique, and it +%% is guaranteed that no two consecutive blocks have the same +%% prefix. len/1 returns the flattened length of the queue and is +%% O(1). + +-export([new/0, is_empty/1, len/1, in/3, in_r/3, out/1, out_r/1, join/2, + foldl/3, foldr/3, from_list/1, to_list/1, map_fold_filter_l/4, + map_fold_filter_r/4]). + +%%---------------------------------------------------------------------------- + +-ifdef(use_specs). + +-export_type([bpqueue/0]). + +-type(bpqueue() :: {non_neg_integer(), queue()}). +-type(prefix() :: any()). +-type(value() :: any()). +-type(result() :: ({'empty', bpqueue()} | + {{'value', prefix(), value()}, bpqueue()})). + +-spec(new/0 :: () -> bpqueue()). +-spec(is_empty/1 :: (bpqueue()) -> boolean()). +-spec(len/1 :: (bpqueue()) -> non_neg_integer()). +-spec(in/3 :: (prefix(), value(), bpqueue()) -> bpqueue()). +-spec(in_r/3 :: (prefix(), value(), bpqueue()) -> bpqueue()). +-spec(out/1 :: (bpqueue()) -> result()). +-spec(out_r/1 :: (bpqueue()) -> result()). +-spec(join/2 :: (bpqueue(), bpqueue()) -> bpqueue()). +-spec(foldl/3 :: (fun ((prefix(), value(), B) -> B), B, bpqueue()) -> B). +-spec(foldr/3 :: (fun ((prefix(), value(), B) -> B), B, bpqueue()) -> B). +-spec(from_list/1 :: ([{prefix(), [value()]}]) -> bpqueue()). +-spec(to_list/1 :: (bpqueue()) -> [{prefix(), [value()]}]). +-spec(map_fold_filter_l/4 :: ((fun ((prefix()) -> boolean())), + (fun ((value(), B) -> + ({prefix(), value(), B} | 'stop'))), + B, + bpqueue()) -> + {bpqueue(), B}). +-spec(map_fold_filter_r/4 :: ((fun ((prefix()) -> boolean())), + (fun ((value(), B) -> + ({prefix(), value(), B} | 'stop'))), + B, + bpqueue()) -> + {bpqueue(), B}). + +-endif. + +%%---------------------------------------------------------------------------- + +new() -> {0, queue:new()}. + +is_empty({0, _Q}) -> true; +is_empty(_BPQ) -> false. + +len({N, _Q}) -> N. + +in(Prefix, Value, {0, Q}) -> + {1, queue:in({Prefix, queue:from_list([Value])}, Q)}; +in(Prefix, Value, BPQ) -> + in1({fun queue:in/2, fun queue:out_r/1}, Prefix, Value, BPQ). + +in_r(Prefix, Value, BPQ = {0, _Q}) -> + in(Prefix, Value, BPQ); +in_r(Prefix, Value, BPQ) -> + in1({fun queue:in_r/2, fun queue:out/1}, Prefix, Value, BPQ). + +in1({In, Out}, Prefix, Value, {N, Q}) -> + {N+1, case Out(Q) of + {{value, {Prefix, InnerQ}}, Q1} -> + In({Prefix, In(Value, InnerQ)}, Q1); + {{value, {_Prefix, _InnerQ}}, _Q1} -> + In({Prefix, queue:in(Value, queue:new())}, Q) + end}. + +in_q(Prefix, Queue, BPQ = {0, Q}) -> + case queue:len(Queue) of + 0 -> BPQ; + N -> {N, queue:in({Prefix, Queue}, Q)} + end; +in_q(Prefix, Queue, BPQ) -> + in_q1({fun queue:in/2, fun queue:out_r/1, + fun queue:join/2}, + Prefix, Queue, BPQ). + +in_q_r(Prefix, Queue, BPQ = {0, _Q}) -> + in_q(Prefix, Queue, BPQ); +in_q_r(Prefix, Queue, BPQ) -> + in_q1({fun queue:in_r/2, fun queue:out/1, + fun (T, H) -> queue:join(H, T) end}, + Prefix, Queue, BPQ). + +in_q1({In, Out, Join}, Prefix, Queue, BPQ = {N, Q}) -> + case queue:len(Queue) of + 0 -> BPQ; + M -> {N + M, case Out(Q) of + {{value, {Prefix, InnerQ}}, Q1} -> + In({Prefix, Join(InnerQ, Queue)}, Q1); + {{value, {_Prefix, _InnerQ}}, _Q1} -> + In({Prefix, Queue}, Q) + end} + end. + +out({0, _Q} = BPQ) -> {empty, BPQ}; +out(BPQ) -> out1({fun queue:in_r/2, fun queue:out/1}, BPQ). + +out_r({0, _Q} = BPQ) -> {empty, BPQ}; +out_r(BPQ) -> out1({fun queue:in/2, fun queue:out_r/1}, BPQ). + +out1({In, Out}, {N, Q}) -> + {{value, {Prefix, InnerQ}}, Q1} = Out(Q), + {{value, Value}, InnerQ1} = Out(InnerQ), + Q2 = case queue:is_empty(InnerQ1) of + true -> Q1; + false -> In({Prefix, InnerQ1}, Q1) + end, + {{value, Prefix, Value}, {N-1, Q2}}. + +join({0, _Q}, BPQ) -> + BPQ; +join(BPQ, {0, _Q}) -> + BPQ; +join({NHead, QHead}, {NTail, QTail}) -> + {{value, {Prefix, InnerQHead}}, QHead1} = queue:out_r(QHead), + {NHead + NTail, + case queue:out(QTail) of + {{value, {Prefix, InnerQTail}}, QTail1} -> + queue:join( + queue:in({Prefix, queue:join(InnerQHead, InnerQTail)}, QHead1), + QTail1); + {{value, {_Prefix, _InnerQTail}}, _QTail1} -> + queue:join(QHead, QTail) + end}. + +foldl(_Fun, Init, {0, _Q}) -> Init; +foldl( Fun, Init, {_N, Q}) -> fold1(fun queue:out/1, Fun, Init, Q). + +foldr(_Fun, Init, {0, _Q}) -> Init; +foldr( Fun, Init, {_N, Q}) -> fold1(fun queue:out_r/1, Fun, Init, Q). + +fold1(Out, Fun, Init, Q) -> + case Out(Q) of + {empty, _Q} -> + Init; + {{value, {Prefix, InnerQ}}, Q1} -> + fold1(Out, Fun, fold1(Out, Fun, Prefix, Init, InnerQ), Q1) + end. + +fold1(Out, Fun, Prefix, Init, InnerQ) -> + case Out(InnerQ) of + {empty, _Q} -> + Init; + {{value, Value}, InnerQ1} -> + fold1(Out, Fun, Prefix, Fun(Prefix, Value, Init), InnerQ1) + end. + +from_list(List) -> + {FinalPrefix, FinalInnerQ, ListOfPQs1, Len} = + lists:foldl( + fun ({_Prefix, []}, Acc) -> + Acc; + ({Prefix, InnerList}, {Prefix, InnerQ, ListOfPQs, LenAcc}) -> + {Prefix, queue:join(InnerQ, queue:from_list(InnerList)), + ListOfPQs, LenAcc + length(InnerList)}; + ({Prefix1, InnerList}, {Prefix, InnerQ, ListOfPQs, LenAcc}) -> + {Prefix1, queue:from_list(InnerList), + [{Prefix, InnerQ} | ListOfPQs], LenAcc + length(InnerList)} + end, {undefined, queue:new(), [], 0}, List), + ListOfPQs2 = [{FinalPrefix, FinalInnerQ} | ListOfPQs1], + [{undefined, InnerQ1} | Rest] = All = lists:reverse(ListOfPQs2), + {Len, queue:from_list(case queue:is_empty(InnerQ1) of + true -> Rest; + false -> All + end)}. + +to_list({0, _Q}) -> []; +to_list({_N, Q}) -> [{Prefix, queue:to_list(InnerQ)} || + {Prefix, InnerQ} <- queue:to_list(Q)]. + +%% map_fold_filter_[lr](FilterFun, Fun, Init, BPQ) -> {BPQ, Init} +%% where FilterFun(Prefix) -> boolean() +%% Fun(Value, Init) -> {Prefix, Value, Init} | stop +%% +%% The filter fun allows you to skip very quickly over blocks that +%% you're not interested in. Such blocks appear in the resulting bpq +%% without modification. The Fun is then used both to map the value, +%% which also allows you to change the prefix (and thus block) of the +%% value, and also to modify the Init/Acc (just like a fold). If the +%% Fun returns 'stop' then it is not applied to any further items. +map_fold_filter_l(_PFilter, _Fun, Init, BPQ = {0, _Q}) -> + {BPQ, Init}; +map_fold_filter_l(PFilter, Fun, Init, {N, Q}) -> + map_fold_filter1({fun queue:out/1, fun queue:in/2, + fun in_q/3, fun join/2}, + N, PFilter, Fun, Init, Q, new()). + +map_fold_filter_r(_PFilter, _Fun, Init, BPQ = {0, _Q}) -> + {BPQ, Init}; +map_fold_filter_r(PFilter, Fun, Init, {N, Q}) -> + map_fold_filter1({fun queue:out_r/1, fun queue:in_r/2, + fun in_q_r/3, fun (T, H) -> join(H, T) end}, + N, PFilter, Fun, Init, Q, new()). + +map_fold_filter1(Funs = {Out, _In, InQ, Join}, Len, PFilter, Fun, + Init, Q, QNew) -> + case Out(Q) of + {empty, _Q} -> + {QNew, Init}; + {{value, {Prefix, InnerQ}}, Q1} -> + case PFilter(Prefix) of + true -> + {Init1, QNew1, Cont} = + map_fold_filter2(Funs, Fun, Prefix, Prefix, + Init, InnerQ, QNew, queue:new()), + case Cont of + false -> {Join(QNew1, {Len - len(QNew1), Q1}), Init1}; + true -> map_fold_filter1(Funs, Len, PFilter, Fun, + Init1, Q1, QNew1) + end; + false -> + map_fold_filter1(Funs, Len, PFilter, Fun, + Init, Q1, InQ(Prefix, InnerQ, QNew)) + end + end. + +map_fold_filter2(Funs = {Out, In, InQ, _Join}, Fun, OrigPrefix, Prefix, + Init, InnerQ, QNew, InnerQNew) -> + case Out(InnerQ) of + {empty, _Q} -> + {Init, InQ(OrigPrefix, InnerQ, + InQ(Prefix, InnerQNew, QNew)), true}; + {{value, Value}, InnerQ1} -> + case Fun(Value, Init) of + stop -> + {Init, InQ(OrigPrefix, InnerQ, + InQ(Prefix, InnerQNew, QNew)), false}; + {Prefix1, Value1, Init1} -> + {Prefix2, QNew1, InnerQNew1} = + case Prefix1 =:= Prefix of + true -> {Prefix, QNew, In(Value1, InnerQNew)}; + false -> {Prefix1, InQ(Prefix, InnerQNew, QNew), + In(Value1, queue:new())} + end, + map_fold_filter2(Funs, Fun, OrigPrefix, Prefix2, + Init1, InnerQ1, QNew1, InnerQNew1) + end + end. diff --git a/src/delegate.erl b/src/delegate.erl index 12eb814f..3f57953b 100644 --- a/src/delegate.erl +++ b/src/delegate.erl @@ -44,9 +44,10 @@ -ifdef(use_specs). --spec(start_link/1 :: (non_neg_integer()) -> {'ok', pid()}). --spec(invoke_no_result/2 :: (pid() | [pid()], fun((pid()) -> any())) -> 'ok'). --spec(invoke/2 :: (pid() | [pid()], fun((pid()) -> A)) -> A). +-spec(start_link/1 :: (non_neg_integer()) -> rabbit_types:ok(pid())). +-spec(invoke_no_result/2 :: + (pid() | [pid()], fun ((pid()) -> any())) -> 'ok'). +-spec(invoke/2 :: (pid() | [pid()], fun ((pid()) -> A)) -> A). -spec(process_count/0 :: () -> non_neg_integer()). @@ -63,7 +64,7 @@ start_link(Hash) -> gen_server2:start_link({local, server(Hash)}, ?MODULE, [], []). invoke(Pid, Fun) when is_pid(Pid) -> - [Res] = invoke_per_node([{node(Pid), [Pid]}], Fun), + [Res] = invoke_per_node(split_delegate_per_node([Pid]), Fun), case Res of {ok, Result, _} -> Result; @@ -73,7 +74,7 @@ invoke(Pid, Fun) when is_pid(Pid) -> invoke(Pids, Fun) when is_list(Pids) -> lists:foldl( - fun({Status, Result, Pid}, {Good, Bad}) -> + fun ({Status, Result, Pid}, {Good, Bad}) -> case Status of ok -> {[{Pid, Result}|Good], Bad}; error -> {Good, [{Pid, Result}|Bad]} @@ -83,7 +84,7 @@ invoke(Pids, Fun) when is_list(Pids) -> invoke_per_node(split_delegate_per_node(Pids), Fun)). invoke_no_result(Pid, Fun) when is_pid(Pid) -> - invoke_no_result_per_node([{node(Pid), [Pid]}], Fun), + invoke_no_result_per_node(split_delegate_per_node([Pid]), Fun), ok; invoke_no_result(Pids, Fun) when is_list(Pids) -> @@ -99,42 +100,47 @@ internal_cast(Node, Thunk) when is_atom(Node) -> gen_server2:cast({remote_server(Node), Node}, {thunk, Thunk}). split_delegate_per_node(Pids) -> - orddict:to_list( - lists:foldl( - fun (Pid, D) -> - orddict:update(node(Pid), - fun (Pids1) -> [Pid | Pids1] end, - [Pid], D) - end, - orddict:new(), Pids)). + LocalNode = node(), + {Local, Remote} = + lists:foldl( + fun (Pid, {L, D}) -> + Node = node(Pid), + case Node of + LocalNode -> {[Pid|L], D}; + _ -> {L, orddict:append(Node, Pid, D)} + end + end, + {[], orddict:new()}, Pids), + {Local, orddict:to_list(Remote)}. -invoke_per_node([{Node, Pids}], Fun) when Node == node() -> - safe_invoke(Pids, Fun); invoke_per_node(NodePids, Fun) -> lists:append(delegate_per_node(NodePids, Fun, fun internal_call/2)). -invoke_no_result_per_node([{Node, Pids}], Fun) when Node == node() -> - %% This is not actually async! However, in practice Fun will - %% always be something that does a gen_server:cast or similar, so - %% I don't think it's a problem unless someone misuses this - %% function. Making this *actually* async would be painful as we - %% can't spawn at this point or we break effect ordering. - safe_invoke(Pids, Fun); invoke_no_result_per_node(NodePids, Fun) -> delegate_per_node(NodePids, Fun, fun internal_cast/2), ok. -delegate_per_node(NodePids, Fun, DelegateFun) -> +delegate_per_node({LocalPids, NodePids}, Fun, DelegateFun) -> + %% In the case where DelegateFun is internal_cast, the safe_invoke + %% is not actually async! However, in practice Fun will always be + %% something that does a gen_server:cast or similar, so I don't + %% think it's a problem unless someone misuses this + %% function. Making this *actually* async would be painful as we + %% can't spawn at this point or we break effect ordering. + [safe_invoke(LocalPids, Fun)| + delegate_per_remote_node(NodePids, Fun, DelegateFun)]. + +delegate_per_remote_node(NodePids, Fun, DelegateFun) -> Self = self(), %% Note that this is unsafe if the Fun requires reentrancy to the %% local_server. I.e. if self() == local_server(Node) then we'll %% block forever. [gen_server2:cast( local_server(Node), - {thunk, fun() -> + {thunk, fun () -> Self ! {result, DelegateFun( - Node, fun() -> safe_invoke(Pids, Fun) end)} + Node, fun () -> safe_invoke(Pids, Fun) end)} end}) || {Node, Pids} <- NodePids], [receive {result, Result} -> Result end || _ <- NodePids]. diff --git a/src/delegate_sup.erl b/src/delegate_sup.erl index 1c1d62a9..39ef3f85 100644 --- a/src/delegate_sup.erl +++ b/src/delegate_sup.erl @@ -43,7 +43,7 @@ -ifdef(use_specs). --spec(start_link/0 :: () -> {'ok', pid()} | 'ignore' | {'error', any()}). +-spec(start_link/0 :: () -> rabbit_types:ok_or_error2(pid(), any()) | 'ignore'). -endif. diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl index 0f648dcd..e209ee6b 100644 --- a/src/file_handle_cache.erl +++ b/src/file_handle_cache.erl @@ -182,18 +182,18 @@ -ifdef(use_specs). -type(ref() :: any()). --type(error() :: {'error', any()}). --type(ok_or_error() :: ('ok' | error())). --type(val_or_error(T) :: ({'ok', T} | error())). +-type(ok_or_error() :: rabbit_types:ok_or_error(any())). +-type(val_or_error(T) :: rabbit_types:ok_or_error2(T, any())). -type(position() :: ('bof' | 'eof' | non_neg_integer() | - {('bof' |'eof'), non_neg_integer()} | {'cur', integer()})). + {('bof' |'eof'), non_neg_integer()} | + {'cur', integer()})). -type(offset() :: non_neg_integer()). -spec(register_callback/3 :: (atom(), atom(), [any()]) -> 'ok'). -spec(open/3 :: - (string(), [any()], - [{'write_buffer', (non_neg_integer() | 'infinity' | 'unbuffered')}]) -> - val_or_error(ref())). + (string(), [any()], + [{'write_buffer', (non_neg_integer() | 'infinity' | 'unbuffered')}]) + -> val_or_error(ref())). -spec(close/1 :: (ref()) -> ok_or_error()). -spec(read/2 :: (ref(), non_neg_integer()) -> val_or_error([char()] | binary()) | 'eof'). diff --git a/src/gatherer.erl b/src/gatherer.erl new file mode 100644 index 00000000..31dda16e --- /dev/null +++ b/src/gatherer.erl @@ -0,0 +1,145 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developers of the Original Code are LShift Ltd, +%% Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd, +%% Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd +%% are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial +%% Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift +%% Ltd. Portions created by Cohesive Financial Technologies LLC are +%% Copyright (C) 2007-2010 Cohesive Financial Technologies +%% LLC. Portions created by Rabbit Technologies Ltd are Copyright +%% (C) 2007-2010 Rabbit Technologies Ltd. +%% +%% All Rights Reserved. +%% +%% Contributor(s): ______________________________________. +%% + +-module(gatherer). + +-behaviour(gen_server2). + +-export([start_link/0, stop/1, fork/1, finish/1, in/2, out/1]). + +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). + +%%---------------------------------------------------------------------------- + +-ifdef(use_specs). + +-spec(start_link/0 :: () -> {'ok', pid()} | 'ignore' | {'error', any()}). +-spec(stop/1 :: (pid()) -> 'ok'). +-spec(fork/1 :: (pid()) -> 'ok'). +-spec(finish/1 :: (pid()) -> 'ok'). +-spec(in/2 :: (pid(), any()) -> 'ok'). +-spec(out/1 :: (pid()) -> {'value', any()} | 'empty'). + +-endif. + +%%---------------------------------------------------------------------------- + +-define(HIBERNATE_AFTER_MIN, 1000). +-define(DESIRED_HIBERNATE, 10000). + +%%---------------------------------------------------------------------------- + +-record(gstate, { forks, values, blocked }). + +%%---------------------------------------------------------------------------- + +start_link() -> + gen_server2:start_link(?MODULE, [], [{timeout, infinity}]). + +stop(Pid) -> + gen_server2:call(Pid, stop, infinity). + +fork(Pid) -> + gen_server2:call(Pid, fork, infinity). + +finish(Pid) -> + gen_server2:cast(Pid, finish). + +in(Pid, Value) -> + gen_server2:cast(Pid, {in, Value}). + +out(Pid) -> + gen_server2:call(Pid, out, infinity). + +%%---------------------------------------------------------------------------- + +init([]) -> + {ok, #gstate { forks = 0, values = queue:new(), blocked = queue:new() }, + hibernate, + {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. + +handle_call(stop, _From, State) -> + {stop, normal, ok, State}; + +handle_call(fork, _From, State = #gstate { forks = Forks }) -> + {reply, ok, State #gstate { forks = Forks + 1 }, hibernate}; + +handle_call(out, From, State = #gstate { forks = Forks, + values = Values, + blocked = Blocked }) -> + case queue:out(Values) of + {empty, _} -> + case Forks of + 0 -> {reply, empty, State, hibernate}; + _ -> {noreply, + State #gstate { blocked = queue:in(From, Blocked) }, + hibernate} + end; + {{value, _Value} = V, NewValues} -> + {reply, V, State #gstate { values = NewValues }, hibernate} + end; + +handle_call(Msg, _From, State) -> + {stop, {unexpected_call, Msg}, State}. + +handle_cast(finish, State = #gstate { forks = Forks, blocked = Blocked }) -> + NewForks = Forks - 1, + NewBlocked = case NewForks of + 0 -> [gen_server2:reply(From, empty) || + From <- queue:to_list(Blocked)], + queue:new(); + _ -> Blocked + end, + {noreply, State #gstate { forks = NewForks, blocked = NewBlocked }, + hibernate}; + +handle_cast({in, Value}, State = #gstate { values = Values, + blocked = Blocked }) -> + {noreply, case queue:out(Blocked) of + {empty, _} -> + State #gstate { values = queue:in(Value, Values) }; + {{value, From}, NewBlocked} -> + gen_server2:reply(From, {value, Value}), + State #gstate { blocked = NewBlocked } + end, hibernate}; + +handle_cast(Msg, State) -> + {stop, {unexpected_cast, Msg}, State}. + +handle_info(Msg, State) -> + {stop, {unexpected_info, Msg}, State}. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +terminate(_Reason, State) -> + State. diff --git a/src/gen_server2.erl b/src/gen_server2.erl index 5b899cdb..49ae63c1 100644 --- a/src/gen_server2.erl +++ b/src/gen_server2.erl @@ -186,7 +186,7 @@ -ifdef(use_specs). -spec(handle_common_termination/6 :: - (any(), any(), any(), atom(), any(), any()) -> no_return()). + (any(), any(), any(), atom(), any(), any()) -> no_return()). -spec(hibernate/7 :: (pid(), any(), any(), atom(), any(), queue(), any()) -> no_return()). @@ -639,7 +639,7 @@ do_multi_call(Nodes, Name, Req, Timeout) -> Caller = self(), Receiver = spawn( - fun() -> + fun () -> %% Middleman process. Should be unsensitive to regular %% exit signals. The sychronization is needed in case %% the receiver would exit before the caller started diff --git a/src/pg_local.erl b/src/pg_local.erl index 1501331d..f5ded123 100644 --- a/src/pg_local.erl +++ b/src/pg_local.erl @@ -36,8 +36,8 @@ -export([join/2, leave/2, get_members/1]). -export([sync/0]). %% intended for testing only; not part of official API --export([start/0,start_link/0,init/1,handle_call/3,handle_cast/2,handle_info/2, - terminate/2]). +-export([start/0, start_link/0, init/1, handle_call/3, handle_cast/2, + handle_info/2, terminate/2]). %%---------------------------------------------------------------------------- @@ -45,8 +45,8 @@ -type(name() :: term()). --spec(start_link/0 :: () -> {'ok', pid()} | {'error', term()}). --spec(start/0 :: () -> {'ok', pid()} | {'error', term()}). +-spec(start_link/0 :: () -> rabbit_types:ok_or_error2(pid(), term())). +-spec(start/0 :: () -> rabbit_types:ok_or_error2(pid(), term())). -spec(join/2 :: (name(), pid()) -> 'ok'). -spec(leave/2 :: (name(), pid()) -> 'ok'). -spec(get_members/1 :: (name()) -> [pid()]). diff --git a/src/rabbit.erl b/src/rabbit.erl index 67f8df94..ada2c38e 100644 --- a/src/rabbit.erl +++ b/src/rabbit.erl @@ -33,7 +33,8 @@ -behaviour(application). --export([prepare/0, start/0, stop/0, stop_and_halt/0, status/0, rotate_logs/1]). +-export([prepare/0, start/0, stop/0, stop_and_halt/0, status/0, + rotate_logs/1]). -export([start/2, stop/1]). @@ -183,18 +184,19 @@ -ifdef(use_specs). --type(log_location() :: 'tty' | 'undefined' | string()). -type(file_suffix() :: binary()). +%% this really should be an abstract type +-type(log_location() :: 'tty' | 'undefined' | file:filename()). -spec(prepare/0 :: () -> 'ok'). -spec(start/0 :: () -> 'ok'). -spec(stop/0 :: () -> 'ok'). -spec(stop_and_halt/0 :: () -> 'ok'). --spec(rotate_logs/1 :: (file_suffix()) -> 'ok' | {'error', any()}). --spec(status/0 :: () -> - [{running_applications, [{atom(), string(), string()}]} | - {nodes, [erlang_node()]} | - {running_nodes, [erlang_node()]}]). +-spec(rotate_logs/1 :: (file_suffix()) -> rabbit_types:ok_or_error(any())). +-spec(status/0 :: + () -> [{running_applications, [{atom(), string(), string()}]} | + {nodes, [{rabbit_mnesia:node_type(), [node()]}]} | + {running_nodes, [node()]}]). -spec(log_location/1 :: ('sasl' | 'kernel') -> log_location()). -endif. @@ -299,6 +301,18 @@ run_boot_step({StepName, Attributes}) -> ok end. +module_attributes(Module) -> + case catch Module:module_info(attributes) of + {'EXIT', {undef, [{Module, module_info, _} | _]}} -> + io:format("WARNING: module ~p not found, so not scanned for boot steps.~n", + [Module]), + []; + {'EXIT', Reason} -> + exit(Reason); + V -> + V + end. + boot_steps() -> AllApps = [App || {App, _, _} <- application:loaded_applications()], Modules = lists:usort( @@ -310,7 +324,7 @@ boot_steps() -> lists:flatmap(fun (Module) -> [{StepName, Attributes} || {rabbit_boot_step, [{StepName, Attributes}]} - <- Module:module_info(attributes)] + <- module_attributes(Module)] end, Modules), sort_boot_steps(UnsortedSteps). @@ -412,9 +426,9 @@ print_banner() -> "| ~s +---+ |~n" "| |~n" "+-------------------+~n" - "AMQP ~p-~p~n~s~n~s~n~n", + "~s~n~s~n~s~n~n", [Product, string:right([$v|Version], ProductLen), - ?PROTOCOL_VERSION_MAJOR, ?PROTOCOL_VERSION_MINOR, + ?PROTOCOL_VERSION, ?COPYRIGHT_MESSAGE, ?INFORMATION_MESSAGE]), Settings = [{"node", node()}, {"app descriptor", app_location()}, diff --git a/src/rabbit_access_control.erl b/src/rabbit_access_control.erl index a445f441..3aaf5928 100644 --- a/src/rabbit_access_control.erl +++ b/src/rabbit_access_control.erl @@ -45,28 +45,38 @@ -ifdef(use_specs). +-export_type([username/0, password/0]). + -type(permission_atom() :: 'configure' | 'read' | 'write'). +-type(username() :: binary()). +-type(password() :: binary()). +-type(regexp() :: binary()). --spec(check_login/2 :: (binary(), binary()) -> user()). --spec(user_pass_login/2 :: (username(), password()) -> user()). --spec(check_vhost_access/2 :: (user(), vhost()) -> 'ok'). +-spec(check_login/2 :: (binary(), binary()) -> rabbit_types:user()). +-spec(user_pass_login/2 :: (username(), password()) -> rabbit_types:user()). +-spec(check_vhost_access/2 :: + (rabbit_types:user(), rabbit_types:vhost()) -> 'ok'). -spec(check_resource_access/3 :: - (username(), r(atom()), permission_atom()) -> 'ok'). + (username(), rabbit_types:r(atom()), permission_atom()) -> 'ok'). -spec(add_user/2 :: (username(), password()) -> 'ok'). -spec(delete_user/1 :: (username()) -> 'ok'). -spec(change_password/2 :: (username(), password()) -> 'ok'). -spec(list_users/0 :: () -> [username()]). --spec(lookup_user/1 :: (username()) -> {'ok', user()} | not_found()). --spec(add_vhost/1 :: (vhost()) -> 'ok'). --spec(delete_vhost/1 :: (vhost()) -> 'ok'). --spec(list_vhosts/0 :: () -> [vhost()]). --spec(set_permissions/5 :: - (username(), vhost(), regexp(), regexp(), regexp()) -> 'ok'). --spec(clear_permissions/2 :: (username(), vhost()) -> 'ok'). +-spec(lookup_user/1 :: + (username()) -> rabbit_types:ok(rabbit_types:user()) + | rabbit_types:error('not_found')). +-spec(add_vhost/1 :: (rabbit_types:vhost()) -> 'ok'). +-spec(delete_vhost/1 :: (rabbit_types:vhost()) -> 'ok'). +-spec(list_vhosts/0 :: () -> [rabbit_types:vhost()]). +-spec(set_permissions/5 ::(username(), rabbit_types:vhost(), regexp(), + regexp(), regexp()) -> 'ok'). +-spec(clear_permissions/2 :: (username(), rabbit_types:vhost()) -> 'ok'). -spec(list_vhost_permissions/1 :: - (vhost()) -> [{username(), regexp(), regexp(), regexp()}]). + (rabbit_types:vhost()) + -> [{username(), regexp(), regexp(), regexp()}]). -spec(list_user_permissions/1 :: - (username()) -> [{vhost(), regexp(), regexp(), regexp()}]). + (username()) + -> [{rabbit_types:vhost(), regexp(), regexp(), regexp()}]). -endif. @@ -162,11 +172,14 @@ check_resource_access(Username, [] -> false; [#user_permission{permission = P}] -> - case regexp:match( - binary_to_list(Name), - binary_to_list(element(permission_index(Permission), P))) of - {match, _, _} -> true; - nomatch -> false + PermRegexp = case element(permission_index(Permission), P) of + %% <<"^$">> breaks Emacs' erlang mode + <<"">> -> <<$^, $$>>; + RE -> RE + end, + case re:run(Name, PermRegexp, [{capture, none}]) of + match -> true; + nomatch -> false end end, if Res -> ok; @@ -291,7 +304,7 @@ list_vhosts() -> validate_regexp(RegexpBin) -> Regexp = binary_to_list(RegexpBin), - case regexp:parse(Regexp) of + case re:compile(Regexp) of {ok, _} -> ok; {error, Reason} -> throw({error, {invalid_regexp, Regexp, Reason}}) end. diff --git a/src/rabbit_alarm.erl b/src/rabbit_alarm.erl index 7e96d9a3..53c713e6 100644 --- a/src/rabbit_alarm.erl +++ b/src/rabbit_alarm.erl @@ -47,7 +47,7 @@ -type(mfa_tuple() :: {atom(), atom(), list()}). -spec(start/0 :: () -> 'ok'). -spec(stop/0 :: () -> 'ok'). --spec(register/2 :: (pid(), mfa_tuple()) -> 'ok'). +-spec(register/2 :: (pid(), mfa_tuple()) -> boolean()). -endif. @@ -67,9 +67,9 @@ stop() -> ok = alarm_handler:delete_alarm_handler(?MODULE). register(Pid, HighMemMFA) -> - ok = gen_event:call(alarm_handler, ?MODULE, - {register, Pid, HighMemMFA}, - infinity). + gen_event:call(alarm_handler, ?MODULE, + {register, Pid, HighMemMFA}, + infinity). %%---------------------------------------------------------------------------- @@ -84,7 +84,8 @@ handle_call({register, Pid, {M, F, A} = HighMemMFA}, false -> ok end, NewAlertees = dict:store(Pid, HighMemMFA, Alertess), - {ok, ok, State#alarms{alertees = NewAlertees}}; + {ok, State#alarms.vm_memory_high_watermark, + State#alarms{alertees = NewAlertees}}; handle_call(_Request, State) -> {ok, not_understood, State}. diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl index 7b88c45d..870c119a 100644 --- a/src/rabbit_amqqueue.erl +++ b/src/rabbit_amqqueue.erl @@ -31,18 +31,18 @@ -module(rabbit_amqqueue). --export([start/0, declare/4, delete/3, purge/1]). +-export([start/0, stop/0, declare/5, delete/3, purge/1]). -export([internal_declare/2, internal_delete/1, maybe_run_queue_via_backing_queue/2, update_ram_duration/1, set_ram_duration_target/2, - set_maximum_since_use/2]). + set_maximum_since_use/2, maybe_expire/1]). -export([pseudo_queue/2]). --export([lookup/1, with/2, with_or_die/2, - stat/1, stat_all/0, deliver/2, requeue/3, ack/4]). +-export([lookup/1, with/2, with_or_die/2, assert_equivalence/5, + check_exclusive_access/2, with_exclusive_access_or_die/3, + stat/1, deliver/2, requeue/3, ack/4, reject/4]). -export([list/1, info_keys/0, info/1, info/2, info_all/1, info_all/2]). -export([consumers/1, consumers_all/1]). --export([claim_queue/2]). --export([basic_get/3, basic_consume/8, basic_cancel/4]). +-export([basic_get/3, basic_consume/7, basic_cancel/4]). -export([notify_sent/2, unblock/2, flush_all/2]). -export([commit_all/3, rollback_all/3, notify_down_all/2, limit_all/3]). -export([on_node_down/1]). @@ -55,68 +55,103 @@ -include("rabbit.hrl"). -include_lib("stdlib/include/qlc.hrl"). +-define(EXPIRES_TYPE, long). + %%---------------------------------------------------------------------------- -ifdef(use_specs). --type(qstats() :: {'ok', queue_name(), non_neg_integer(), non_neg_integer()}). --type(qlen() :: {'ok', non_neg_integer()}). --type(qfun(A) :: fun ((amqqueue()) -> A)). +-export_type([name/0, qmsg/0]). + +-type(name() :: rabbit_types:r('queue')). + +-type(qlen() :: rabbit_types:ok(non_neg_integer())). +-type(qfun(A) :: fun ((rabbit_types:amqqueue()) -> A)). +-type(qmsg() :: {name(), pid(), msg_id(), boolean(), rabbit_types:message()}). +-type(msg_id() :: non_neg_integer()). -type(ok_or_errors() :: 'ok' | {'error', [{'error' | 'exit' | 'throw', any()}]}). -spec(start/0 :: () -> 'ok'). --spec(declare/4 :: (queue_name(), boolean(), boolean(), amqp_table()) -> - amqqueue()). --spec(lookup/1 :: (queue_name()) -> {'ok', amqqueue()} | not_found()). --spec(with/2 :: (queue_name(), qfun(A)) -> A | not_found()). --spec(with_or_die/2 :: (queue_name(), qfun(A)) -> A). --spec(list/1 :: (vhost()) -> [amqqueue()]). --spec(info_keys/0 :: () -> [info_key()]). --spec(info/1 :: (amqqueue()) -> [info()]). --spec(info/2 :: (amqqueue(), [info_key()]) -> [info()]). --spec(info_all/1 :: (vhost()) -> [[info()]]). --spec(info_all/2 :: (vhost(), [info_key()]) -> [[info()]]). --spec(consumers/1 :: (amqqueue()) -> [{pid(), ctag(), boolean()}]). +-spec(stop/0 :: () -> 'ok'). +-spec(declare/5 :: + (name(), boolean(), boolean(), + rabbit_framing:amqp_table(), rabbit_types:maybe(pid())) + -> {'new' | 'existing', rabbit_types:amqqueue()}). +-spec(lookup/1 :: + (name()) -> rabbit_types:ok(rabbit_types:amqqueue()) | + rabbit_types:error('not_found')). +-spec(with/2 :: (name(), qfun(A)) -> A | rabbit_types:error('not_found')). +-spec(with_or_die/2 :: (name(), qfun(A)) -> A). +-spec(assert_equivalence/5 :: + (rabbit_types:amqqueue(), boolean(), boolean(), + rabbit_framing:amqp_table(), rabbit_types:maybe(pid())) + -> 'ok' | no_return()). +-spec(check_exclusive_access/2 :: (rabbit_types:amqqueue(), pid()) -> 'ok'). +-spec(with_exclusive_access_or_die/3 :: (name(), pid(), qfun(A)) -> A). +-spec(list/1 :: (rabbit_types:vhost()) -> [rabbit_types:amqqueue()]). +-spec(info_keys/0 :: () -> [rabbit_types:info_key()]). +-spec(info/1 :: (rabbit_types:amqqueue()) -> [rabbit_types:info()]). +-spec(info/2 :: + (rabbit_types:amqqueue(), [rabbit_types:info_key()]) + -> [rabbit_types:info()]). +-spec(info_all/1 :: (rabbit_types:vhost()) -> [[rabbit_types:info()]]). +-spec(info_all/2 :: (rabbit_types:vhost(), [rabbit_types:info_key()]) + -> [[rabbit_types:info()]]). +-spec(consumers/1 :: + (rabbit_types:amqqueue()) + -> [{pid(), rabbit_types:ctag(), boolean()}]). -spec(consumers_all/1 :: - (vhost()) -> [{queue_name(), pid(), ctag(), boolean()}]). --spec(stat/1 :: (amqqueue()) -> qstats()). --spec(stat_all/0 :: () -> [qstats()]). + (rabbit_types:vhost()) + -> [{name(), pid(), rabbit_types:ctag(), boolean()}]). +-spec(stat/1 :: + (rabbit_types:amqqueue()) + -> {'ok', non_neg_integer(), non_neg_integer()}). -spec(delete/3 :: - (amqqueue(), 'false', 'false') -> qlen(); - (amqqueue(), 'true' , 'false') -> qlen() | {'error', 'in_use'}; - (amqqueue(), 'false', 'true' ) -> qlen() | {'error', 'not_empty'}; - (amqqueue(), 'true' , 'true' ) -> qlen() | - {'error', 'in_use'} | - {'error', 'not_empty'}). --spec(purge/1 :: (amqqueue()) -> qlen()). --spec(deliver/2 :: (pid(), delivery()) -> boolean()). + (rabbit_types:amqqueue(), 'false', 'false') + -> qlen(); + (rabbit_types:amqqueue(), 'true' , 'false') + -> qlen() | rabbit_types:error('in_use'); + (rabbit_types:amqqueue(), 'false', 'true' ) + -> qlen() | rabbit_types:error('not_empty'); + (rabbit_types:amqqueue(), 'true' , 'true' ) + -> qlen() | + rabbit_types:error('in_use') | + rabbit_types:error('not_empty')). +-spec(purge/1 :: (rabbit_types:amqqueue()) -> qlen()). +-spec(deliver/2 :: (pid(), rabbit_types:delivery()) -> boolean()). -spec(requeue/3 :: (pid(), [msg_id()], pid()) -> 'ok'). --spec(ack/4 :: (pid(), maybe(txn()), [msg_id()], pid()) -> 'ok'). --spec(commit_all/3 :: ([pid()], txn(), pid()) -> ok_or_errors()). --spec(rollback_all/3 :: ([pid()], txn(), pid()) -> 'ok'). +-spec(ack/4 :: + (pid(), rabbit_types:maybe(rabbit_types:txn()), [msg_id()], pid()) + -> 'ok'). +-spec(reject/4 :: (pid(), [msg_id()], boolean(), pid()) -> 'ok'). +-spec(commit_all/3 :: ([pid()], rabbit_types:txn(), pid()) -> ok_or_errors()). +-spec(rollback_all/3 :: ([pid()], rabbit_types:txn(), pid()) -> 'ok'). -spec(notify_down_all/2 :: ([pid()], pid()) -> ok_or_errors()). -spec(limit_all/3 :: ([pid()], pid(), pid() | 'undefined') -> ok_or_errors()). --spec(claim_queue/2 :: (amqqueue(), pid()) -> 'ok' | 'locked'). --spec(basic_get/3 :: (amqqueue(), pid(), boolean()) -> +-spec(basic_get/3 :: (rabbit_types:amqqueue(), pid(), boolean()) -> {'ok', non_neg_integer(), qmsg()} | 'empty'). --spec(basic_consume/8 :: - (amqqueue(), boolean(), pid(), pid(), pid() | 'undefined', ctag(), - boolean(), any()) -> - 'ok' | {'error', 'queue_owned_by_another_connection' | - 'exclusive_consume_unavailable'}). --spec(basic_cancel/4 :: (amqqueue(), pid(), ctag(), any()) -> 'ok'). +-spec(basic_consume/7 :: + (rabbit_types:amqqueue(), boolean(), pid(), pid() | 'undefined', + rabbit_types:ctag(), boolean(), any()) + -> rabbit_types:ok_or_error('exclusive_consume_unavailable')). +-spec(basic_cancel/4 :: + (rabbit_types:amqqueue(), pid(), rabbit_types:ctag(), any()) -> 'ok'). -spec(notify_sent/2 :: (pid(), pid()) -> 'ok'). -spec(unblock/2 :: (pid(), pid()) -> 'ok'). -spec(flush_all/2 :: ([pid()], pid()) -> 'ok'). --spec(internal_declare/2 :: (amqqueue(), boolean()) -> amqqueue() | 'not_found'). --spec(internal_delete/1 :: (queue_name()) -> 'ok' | not_found()). --spec(maybe_run_queue_via_backing_queue/2 :: (pid(), (fun ((A) -> A))) -> 'ok'). +-spec(internal_declare/2 :: + (rabbit_types:amqqueue(), boolean()) + -> rabbit_types:amqqueue() | 'not_found'). +-spec(internal_delete/1 :: (name()) -> rabbit_types:ok_or_error('not_found')). +-spec(maybe_run_queue_via_backing_queue/2 :: + (pid(), (fun ((A) -> A))) -> 'ok'). -spec(update_ram_duration/1 :: (pid()) -> 'ok'). --spec(set_ram_duration_target/2 :: (pid(), number()) -> 'ok'). +-spec(set_ram_duration_target/2 :: (pid(), number() | 'infinity') -> 'ok'). -spec(set_maximum_since_use/2 :: (pid(), non_neg_integer()) -> 'ok'). --spec(on_node_down/1 :: (erlang_node()) -> 'ok'). --spec(pseudo_queue/2 :: (binary(), pid()) -> amqqueue()). +-spec(maybe_expire/1 :: (pid()) -> 'ok'). +-spec(on_node_down/1 :: (node()) -> 'ok'). +-spec(pseudo_queue/2 :: (binary(), pid()) -> rabbit_types:amqqueue()). -endif. @@ -124,7 +159,7 @@ start() -> DurableQueues = find_durable_queues(), - {ok, BQ} = application:get_env(backing_queue_module), + {ok, BQ} = application:get_env(rabbit, backing_queue_module), ok = BQ:start([QName || #amqqueue{name = QName} <- DurableQueues]), {ok,_} = supervisor:start_child( rabbit_sup, @@ -134,6 +169,12 @@ start() -> _RealDurableQueues = recover_durable_queues(DurableQueues), ok. +stop() -> + ok = supervisor:terminate_child(rabbit_sup, rabbit_amqqueue_sup), + ok = supervisor:delete_child(rabbit_sup, rabbit_amqqueue_sup), + {ok, BQ} = application:get_env(rabbit, backing_queue_module), + ok = BQ:stop(). + find_durable_queues() -> Node = node(), %% TODO: use dirty ops instead @@ -148,11 +189,13 @@ recover_durable_queues(DurableQueues) -> Qs = [start_queue_process(Q) || Q <- DurableQueues], [Q || Q <- Qs, gen_server2:call(Q#amqqueue.pid, {init, true}) == Q]. -declare(QueueName, Durable, AutoDelete, Args) -> +declare(QueueName, Durable, AutoDelete, Args, Owner) -> + ok = check_declare_arguments(QueueName, Args), Q = start_queue_process(#amqqueue{name = QueueName, durable = Durable, auto_delete = AutoDelete, arguments = Args, + exclusive_owner = Owner, pid = none}), case gen_server2:call(Q#amqqueue.pid, {init, false}) of not_found -> rabbit_misc:not_found(QueueName); @@ -197,7 +240,8 @@ start_queue_process(Q) -> add_default_binding(#amqqueue{name = QueueName}) -> Exchange = rabbit_misc:r(QueueName, exchange, <<>>), RoutingKey = QueueName#resource.name, - rabbit_exchange:add_binding(Exchange, QueueName, RoutingKey, []), + rabbit_exchange:add_binding(Exchange, QueueName, RoutingKey, [], + fun (_X, _Q) -> ok end), ok. lookup(Name) -> @@ -214,6 +258,59 @@ with(Name, F) -> with_or_die(Name, F) -> with(Name, F, fun () -> rabbit_misc:not_found(Name) end). +assert_equivalence(#amqqueue{durable = Durable, + auto_delete = AutoDelete} = Q, + Durable, AutoDelete, RequiredArgs, Owner) -> + assert_args_equivalence(Q, RequiredArgs), + check_exclusive_access(Q, Owner, strict); +assert_equivalence(#amqqueue{name = QueueName}, + _Durable, _AutoDelete, _RequiredArgs, _Owner) -> + rabbit_misc:protocol_error( + not_allowed, "parameters for ~s not equivalent", + [rabbit_misc:rs(QueueName)]). + +check_exclusive_access(Q, Owner) -> check_exclusive_access(Q, Owner, lax). + +check_exclusive_access(#amqqueue{exclusive_owner = Owner}, Owner, _MatchType) -> + ok; +check_exclusive_access(#amqqueue{exclusive_owner = none}, _ReaderPid, lax) -> + ok; +check_exclusive_access(#amqqueue{name = QueueName}, _ReaderPid, _MatchType) -> + rabbit_misc:protocol_error( + resource_locked, + "cannot obtain exclusive access to locked ~s", + [rabbit_misc:rs(QueueName)]). + +with_exclusive_access_or_die(Name, ReaderPid, F) -> + with_or_die(Name, + fun (Q) -> check_exclusive_access(Q, ReaderPid), F(Q) end). + +assert_args_equivalence(#amqqueue{name = QueueName, arguments = Args}, + RequiredArgs) -> + rabbit_misc:assert_args_equivalence(Args, RequiredArgs, QueueName, + [<<"x-expires">>]). + +check_declare_arguments(QueueName, Args) -> + [case Fun(rabbit_misc:table_lookup(Args, Key)) of + ok -> ok; + {error, Error} -> rabbit_misc:protocol_error( + precondition_failed, + "Invalid arguments in declaration of queue ~s: " + "~w (on argument: ~w)", + [rabbit_misc:rs(QueueName), Error, Key]) + end || {Key, Fun} <- [{<<"x-expires">>, fun check_expires_argument/1}]], + ok. + +check_expires_argument(undefined) -> + ok; +check_expires_argument({?EXPIRES_TYPE, Expires}) + when is_integer(Expires) andalso Expires > 0 -> + ok; +check_expires_argument({?EXPIRES_TYPE, _Expires}) -> + {error, expires_zero_or_less}; +check_expires_argument(_) -> + {error, expires_not_of_type_long}. + list(VHostPath) -> mnesia:dirty_match_object( rabbit_queue, @@ -248,9 +345,6 @@ consumers_all(VHostPath) -> stat(#amqqueue{pid = QPid}) -> delegate_call(QPid, stat, infinity). -stat_all() -> - lists:map(fun stat/1, rabbit_misc:dirty_read_all(rabbit_queue)). - delete(#amqqueue{ pid = QPid }, IfUnused, IfEmpty) -> delegate_call(QPid, {delete, IfUnused, IfEmpty}, infinity). @@ -269,11 +363,14 @@ deliver(QPid, #delivery{txn = Txn, sender = ChPid, message = Message}) -> true. requeue(QPid, MsgIds, ChPid) -> - delegate_cast(QPid, {requeue, MsgIds, ChPid}). + delegate_call(QPid, {requeue, MsgIds, ChPid}, infinity). ack(QPid, Txn, MsgIds, ChPid) -> delegate_pcast(QPid, 7, {ack, Txn, MsgIds, ChPid}). +reject(QPid, MsgIds, Requeue, ChPid) -> + delegate_pcast(QPid, 7, {reject, MsgIds, Requeue, ChPid}). + commit_all(QPids, Txn, ChPid) -> safe_delegate_call_ok( fun (QPid) -> exit({queue_disappeared, QPid}) end, @@ -298,15 +395,12 @@ limit_all(QPids, ChPid, LimiterPid) -> gen_server2:cast(QPid, {limit, ChPid, LimiterPid}) end). -claim_queue(#amqqueue{pid = QPid}, ReaderPid) -> - delegate_call(QPid, {claim_queue, ReaderPid}, infinity). - basic_get(#amqqueue{pid = QPid}, ChPid, NoAck) -> delegate_call(QPid, {basic_get, ChPid, NoAck}, infinity). -basic_consume(#amqqueue{pid = QPid}, NoAck, ReaderPid, ChPid, LimiterPid, +basic_consume(#amqqueue{pid = QPid}, NoAck, ChPid, LimiterPid, ConsumerTag, ExclusiveConsume, OkMsg) -> - delegate_call(QPid, {basic_consume, NoAck, ReaderPid, ChPid, + delegate_call(QPid, {basic_consume, NoAck, ChPid, LimiterPid, ConsumerTag, ExclusiveConsume, OkMsg}, infinity). @@ -324,19 +418,21 @@ flush_all(QPids, ChPid) -> delegate:invoke_no_result( QPids, fun (QPid) -> gen_server2:cast(QPid, {flush, ChPid}) end). +internal_delete1(QueueName) -> + ok = mnesia:delete({rabbit_queue, QueueName}), + ok = mnesia:delete({rabbit_durable_queue, QueueName}), + %% we want to execute some things, as + %% decided by rabbit_exchange, after the + %% transaction. + rabbit_exchange:delete_queue_bindings(QueueName). + internal_delete(QueueName) -> case rabbit_misc:execute_mnesia_transaction( fun () -> case mnesia:wread({rabbit_queue, QueueName}) of [] -> {error, not_found}; - [_] -> - ok = mnesia:delete({rabbit_queue, QueueName}), - ok = mnesia:delete({rabbit_durable_queue, QueueName}), - %% we want to execute some things, as - %% decided by rabbit_exchange, after the - %% transaction. - rabbit_exchange:delete_queue_bindings(QueueName) + [_] -> internal_delete1(QueueName) end end) of Err = {error, _} -> Err; @@ -358,6 +454,9 @@ set_ram_duration_target(QPid, Duration) -> set_maximum_since_use(QPid, Age) -> gen_server2:pcast(QPid, 8, {set_maximum_since_use, Age}). +maybe_expire(QPid) -> + gen_server2:pcast(QPid, 8, maybe_expire). + on_node_down(Node) -> [Hook() || Hook <- rabbit_misc:execute_mnesia_transaction( @@ -394,15 +493,13 @@ safe_delegate_call_ok(H, F, Pids) -> end. delegate_call(Pid, Msg, Timeout) -> - delegate:invoke(Pid, fun(P) -> gen_server2:call(P, Msg, Timeout) end). + delegate:invoke(Pid, fun (P) -> gen_server2:call(P, Msg, Timeout) end). delegate_pcall(Pid, Pri, Msg, Timeout) -> - delegate:invoke(Pid, fun(P) -> gen_server2:pcall(P, Pri, Msg, Timeout) end). - -delegate_cast(Pid, Msg) -> - delegate:invoke_no_result(Pid, fun(P) -> gen_server2:cast(P, Msg) end). + delegate:invoke(Pid, + fun (P) -> gen_server2:pcall(P, Pri, Msg, Timeout) end). delegate_pcast(Pid, Pri, Msg) -> delegate:invoke_no_result(Pid, - fun(P) -> gen_server2:pcast(P, Pri, Msg) end). + fun (P) -> gen_server2:pcast(P, Pri, Msg) end). diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl index f12e1b70..ac5fb7f9 100644 --- a/src/rabbit_amqqueue_process.erl +++ b/src/rabbit_amqqueue_process.erl @@ -35,7 +35,7 @@ -behaviour(gen_server2). --define(UNSENT_MESSAGE_LIMIT, 100). +-define(UNSENT_MESSAGE_LIMIT, 100). -define(SYNC_INTERVAL, 5). %% milliseconds -define(RAM_DURATION_UPDATE_INTERVAL, 5000). @@ -50,15 +50,16 @@ % Queue's state -record(q, {q, - owner, exclusive_consumer, has_had_consumers, backing_queue, backing_queue_state, active_consumers, blocked_consumers, + expires, sync_timer_ref, - rate_timer_ref + rate_timer_ref, + expiry_timer_ref }). -record(consumer, {tag, ack_required}). @@ -103,16 +104,17 @@ init(Q) -> process_flag(trap_exit, true), {ok, BQ} = application:get_env(backing_queue_module), - {ok, #q{q = Q#amqqueue{pid = self()}, - owner = none, - exclusive_consumer = none, - has_had_consumers = false, - backing_queue = BQ, + {ok, #q{q = Q#amqqueue{pid = self()}, + exclusive_consumer = none, + has_had_consumers = false, + backing_queue = BQ, backing_queue_state = undefined, - active_consumers = queue:new(), - blocked_consumers = queue:new(), - sync_timer_ref = undefined, - rate_timer_ref = undefined}, hibernate, + active_consumers = queue:new(), + blocked_consumers = queue:new(), + expires = undefined, + sync_timer_ref = undefined, + rate_timer_ref = undefined, + expiry_timer_ref = undefined}, hibernate, {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. terminate(shutdown, State = #q{backing_queue = BQ}) -> @@ -134,6 +136,29 @@ code_change(_OldVsn, State, _Extra) -> %%---------------------------------------------------------------------------- +init_expires(State = #q{q = #amqqueue{arguments = Arguments}}) -> + case rabbit_misc:table_lookup(Arguments, <<"x-expires">>) of + {long, Expires} -> ensure_expiry_timer(State#q{expires = Expires}); + undefined -> State + end. + +declare(Recover, From, + State = #q{q = Q = #amqqueue{name = QName, durable = IsDurable}, + backing_queue = BQ, backing_queue_state = undefined}) -> + case rabbit_amqqueue:internal_declare(Q, Recover) of + not_found -> {stop, normal, not_found, State}; + Q -> gen_server2:reply(From, {new, Q}), + ok = file_handle_cache:register_callback( + rabbit_amqqueue, set_maximum_since_use, + [self()]), + ok = rabbit_memory_monitor:register( + self(), {rabbit_amqqueue, + set_ram_duration_target, [self()]}), + BQS = BQ:init(QName, IsDurable, Recover), + noreply(init_expires(State#q{backing_queue_state = BQS})); + Q1 -> {stop, normal, {existing, Q1}, State} + end. + terminate_shutdown(Fun, State) -> State1 = #q{backing_queue = BQ, backing_queue_state = BQS} = stop_sync_timer(stop_rate_timer(State)), @@ -164,7 +189,7 @@ noreply(NewState) -> next_state(State) -> State1 = #q{backing_queue = BQ, backing_queue_state = BQS} = ensure_rate_timer(State), - case BQ:needs_sync(BQS)of + case BQ:needs_idle_timeout(BQS)of true -> {ensure_sync_timer(State1), 0}; false -> {stop_sync_timer(State1), hibernate} end. @@ -173,7 +198,7 @@ ensure_sync_timer(State = #q{sync_timer_ref = undefined, backing_queue = BQ}) -> {ok, TRef} = timer:apply_after( ?SYNC_INTERVAL, rabbit_amqqueue, maybe_run_queue_via_backing_queue, - [self(), fun (BQS) -> BQ:sync(BQS) end]), + [self(), fun (BQS) -> BQ:idle_timeout(BQS) end]), State#q{sync_timer_ref = TRef}; ensure_sync_timer(State) -> State. @@ -203,6 +228,27 @@ stop_rate_timer(State = #q{rate_timer_ref = TRef}) -> {ok, cancel} = timer:cancel(TRef), State#q{rate_timer_ref = undefined}. +stop_expiry_timer(State = #q{expiry_timer_ref = undefined}) -> + State; +stop_expiry_timer(State = #q{expiry_timer_ref = TRef}) -> + {ok, cancel} = timer:cancel(TRef), + State#q{expiry_timer_ref = undefined}. + +%% We only wish to expire where there are no consumers *and* when +%% basic.get hasn't been called for the configured period. +ensure_expiry_timer(State = #q{expires = undefined}) -> + State; +ensure_expiry_timer(State = #q{expires = Expires}) -> + case is_unused(State) of + true -> + NewState = stop_expiry_timer(State), + {ok, TRef} = timer:apply_after( + Expires, rabbit_amqqueue, maybe_expire, [self()]), + NewState#q{expiry_timer_ref = TRef}; + false -> + State + end. + assert_invariant(#q{active_consumers = AC, backing_queue = BQ, backing_queue_state = BQS}) -> true = (queue:is_empty(AC) orelse BQ:is_empty(BQS)). @@ -424,7 +470,8 @@ handle_ch_down(DownPid, State = #q{exclusive_consumer = Holder}) -> _ -> rollback_transaction(Txn, ChPid, State1) end, - {ok, requeue_and_run(sets:to_list(ChAckTags), State2)} + {ok, requeue_and_run(sets:to_list(ChAckTags), + ensure_expiry_timer(State2))} end end. @@ -433,10 +480,6 @@ cancel_holder(ChPid, ConsumerTag, {ChPid, ConsumerTag}) -> cancel_holder(_ChPid, _ConsumerTag, Holder) -> Holder. -check_queue_owner(none, _) -> ok; -check_queue_owner({ReaderPid, _}, ReaderPid) -> ok; -check_queue_owner({_, _}, _) -> mismatch. - check_exclusive_access({_ChPid, _ConsumerTag}, _ExclusiveConsume, _State) -> in_use; check_exclusive_access(none, false, _State) -> @@ -488,10 +531,10 @@ i(auto_delete, #q{q = #amqqueue{auto_delete = AutoDelete}}) -> AutoDelete; i(arguments, #q{q = #amqqueue{arguments = Arguments}}) -> Arguments; i(pid, _) -> self(); -i(owner_pid, #q{owner = none}) -> +i(owner_pid, #q{q = #amqqueue{exclusive_owner = none}}) -> ''; -i(owner_pid, #q{owner = {ReaderPid, _MonitorRef}}) -> - ReaderPid; +i(owner_pid, #q{q = #amqqueue{exclusive_owner = ExclusiveOwner}}) -> + ExclusiveOwner; i(exclusive_consumer_pid, #q{exclusive_consumer = none}) -> ''; i(exclusive_consumer_pid, #q{exclusive_consumer = {ChPid, _ConsumerTag}}) -> @@ -520,25 +563,24 @@ i(Item, _) -> %--------------------------------------------------------------------------- handle_call({init, Recover}, From, - State = #q{q = Q = #amqqueue{name = QName, durable = IsDurable}, - backing_queue = BQ, backing_queue_state = undefined}) -> - %% TODO: If we're exclusively owned && our owner isn't alive && - %% Recover then we should BQ:init and then {stop, normal, - %% not_found, State}, relying on terminate to delete the queue. - case rabbit_amqqueue:internal_declare(Q, Recover) of - not_found -> - {stop, normal, not_found, State}; - Q -> - gen_server2:reply(From, Q), - ok = file_handle_cache:register_callback( - rabbit_amqqueue, set_maximum_since_use, [self()]), - ok = rabbit_memory_monitor:register( - self(), - {rabbit_amqqueue, set_ram_duration_target, [self()]}), - noreply(State#q{backing_queue_state = - BQ:init(QName, IsDurable, Recover)}); - Q1 -> - {stop, normal, Q1, State} + State = #q{q = #amqqueue{exclusive_owner = none}}) -> + declare(Recover, From, State); + +handle_call({init, Recover}, From, + State = #q{q = #amqqueue{exclusive_owner = Owner}}) -> + case rpc:call(node(Owner), erlang, is_process_alive, [Owner]) of + true -> erlang:monitor(process, Owner), + declare(Recover, From, State); + _ -> #q{q = #amqqueue{name = QName, durable = IsDurable}, + backing_queue = BQ, backing_queue_state = undefined} = State, + case Recover of + true -> ok; + _ -> rabbit_log:warning( + "Queue ~p exclusive owner went away~n", [QName]) + end, + BQS = BQ:init(QName, IsDurable, Recover), + %% Rely on terminate to delete the queue. + {stop, normal, not_found, State#q{backing_queue_state = BQS}} end; handle_call(info, _From, State) -> @@ -600,8 +642,9 @@ handle_call({basic_get, ChPid, NoAck}, _From, State = #q{q = #amqqueue{name = QName}, backing_queue_state = BQS, backing_queue = BQ}) -> AckRequired = not NoAck, + State1 = ensure_expiry_timer(State), case BQ:fetch(AckRequired, BQS) of - {empty, BQS1} -> reply(empty, State#q{backing_queue_state = BQS1}); + {empty, BQS1} -> reply(empty, State1#q{backing_queue_state = BQS1}); {{Message, IsDelivered, AckTag, Remaining}, BQS1} -> case AckRequired of true -> C = #cr{acktags = ChAckTags} = ch_record(ChPid), @@ -610,54 +653,47 @@ handle_call({basic_get, ChPid, NoAck}, _From, false -> ok end, Msg = {QName, self(), AckTag, IsDelivered, Message}, - reply({ok, Remaining, Msg}, State#q{backing_queue_state = BQS1}) + reply({ok, Remaining, Msg}, State1#q{backing_queue_state = BQS1}) end; -handle_call({basic_consume, NoAck, ReaderPid, ChPid, LimiterPid, +handle_call({basic_consume, NoAck, ChPid, LimiterPid, ConsumerTag, ExclusiveConsume, OkMsg}, - _From, State = #q{owner = Owner, - exclusive_consumer = ExistingHolder}) -> - case check_queue_owner(Owner, ReaderPid) of - mismatch -> - reply({error, queue_owned_by_another_connection}, State); + _From, State = #q{exclusive_consumer = ExistingHolder}) -> + case check_exclusive_access(ExistingHolder, ExclusiveConsume, + State) of + in_use -> + reply({error, exclusive_consume_unavailable}, State); ok -> - case check_exclusive_access(ExistingHolder, ExclusiveConsume, - State) of - in_use -> - reply({error, exclusive_consume_unavailable}, State); - ok -> - C = #cr{consumer_count = ConsumerCount} = ch_record(ChPid), - Consumer = #consumer{tag = ConsumerTag, - ack_required = not NoAck}, - store_ch_record(C#cr{consumer_count = ConsumerCount +1, - limiter_pid = LimiterPid}), - case ConsumerCount of - 0 -> ok = rabbit_limiter:register(LimiterPid, self()); - _ -> ok - end, - ExclusiveConsumer = case ExclusiveConsume of - true -> {ChPid, ConsumerTag}; - false -> ExistingHolder - end, - State1 = State#q{has_had_consumers = true, - exclusive_consumer = ExclusiveConsumer}, - ok = maybe_send_reply(ChPid, OkMsg), - State2 = - case is_ch_blocked(C) of - true -> State1#q{ - blocked_consumers = - add_consumer( - ChPid, Consumer, - State1#q.blocked_consumers)}; - false -> run_message_queue( - State1#q{ - active_consumers = - add_consumer( - ChPid, Consumer, - State1#q.active_consumers)}) - end, - reply(ok, State2) - end + C = #cr{consumer_count = ConsumerCount} = ch_record(ChPid), + Consumer = #consumer{tag = ConsumerTag, + ack_required = not NoAck}, + store_ch_record(C#cr{consumer_count = ConsumerCount +1, + limiter_pid = LimiterPid}), + ok = case ConsumerCount of + 0 -> rabbit_limiter:register(LimiterPid, self()); + _ -> ok + end, + ExclusiveConsumer = if ExclusiveConsume -> {ChPid, ConsumerTag}; + true -> ExistingHolder + end, + State1 = State#q{has_had_consumers = true, + exclusive_consumer = ExclusiveConsumer}, + ok = maybe_send_reply(ChPid, OkMsg), + State2 = + case is_ch_blocked(C) of + true -> State1#q{ + blocked_consumers = + add_consumer( + ChPid, Consumer, + State1#q.blocked_consumers)}; + false -> run_message_queue( + State1#q{ + active_consumers = + add_consumer( + ChPid, Consumer, + State1#q.active_consumers)}) + end, + reply(ok, State2) end; handle_call({basic_cancel, ChPid, ConsumerTag, OkMsg}, _From, @@ -684,16 +720,15 @@ handle_call({basic_cancel, ChPid, ConsumerTag, OkMsg}, _From, ChPid, ConsumerTag, State#q.blocked_consumers)}, case should_auto_delete(NewState) of - false -> reply(ok, NewState); + false -> reply(ok, ensure_expiry_timer(NewState)); true -> {stop, normal, ok, NewState} end end; -handle_call(stat, _From, State = #q{q = #amqqueue{name = Name}, - backing_queue = BQ, +handle_call(stat, _From, State = #q{backing_queue = BQ, backing_queue_state = BQS, active_consumers = ActiveConsumers}) -> - reply({ok, Name, BQ:len(BQS), queue:len(ActiveConsumers)}, State); + reply({ok, BQ:len(BQS), queue:len(ActiveConsumers)}, State); handle_call({delete, IfUnused, IfEmpty}, _From, State = #q{backing_queue_state = BQS, backing_queue = BQ}) -> @@ -713,27 +748,15 @@ handle_call(purge, _From, State = #q{backing_queue = BQ, {Count, BQS1} = BQ:purge(BQS), reply({ok, Count}, State#q{backing_queue_state = BQS1}); -handle_call({claim_queue, ReaderPid}, _From, - State = #q{owner = Owner, exclusive_consumer = Holder}) -> - case Owner of - none -> - case check_exclusive_access(Holder, true, State) of - in_use -> - %% FIXME: Is this really the right answer? What if - %% an active consumer's reader is actually the - %% claiming pid? Should that be allowed? In order - %% to check, we'd need to hold not just the ch - %% pid for each consumer, but also its reader - %% pid... - reply(locked, State); - ok -> - MonitorRef = erlang:monitor(process, ReaderPid), - reply(ok, State#q{owner = {ReaderPid, MonitorRef}}) - end; - {ReaderPid, _MonitorRef} -> - reply(ok, State); - _ -> - reply(locked, State) +handle_call({requeue, AckTags, ChPid}, From, State) -> + gen_server2:reply(From, ok), + case lookup_ch(ChPid) of + not_found -> + noreply(State); + C = #cr{acktags = ChAckTags} -> + ChAckTags1 = subtract_acks(ChAckTags, AckTags), + store_ch_record(C#cr{acktags = ChAckTags1}), + noreply(requeue_and_run(AckTags, State)) end; handle_call({maybe_run_queue_via_backing_queue, Fun}, _From, State) -> @@ -757,24 +780,27 @@ handle_cast({ack, Txn, AckTags, ChPid}, _ -> {C#cr{txn = Txn}, BQ:tx_ack(Txn, AckTags, BQS)} end, store_ch_record(C1), - noreply(State #q { backing_queue_state = BQS1 }) + noreply(State#q{backing_queue_state = BQS1}) end; -handle_cast({rollback, Txn, ChPid}, State) -> - noreply(rollback_transaction(Txn, ChPid, State)); - -handle_cast({requeue, AckTags, ChPid}, State) -> +handle_cast({reject, AckTags, Requeue, ChPid}, + State = #q{backing_queue = BQ, backing_queue_state = BQS}) -> case lookup_ch(ChPid) of not_found -> - rabbit_log:warning("Ignoring requeue from unknown ch: ~p~n", - [ChPid]), noreply(State); C = #cr{acktags = ChAckTags} -> ChAckTags1 = subtract_acks(ChAckTags, AckTags), store_ch_record(C#cr{acktags = ChAckTags1}), - noreply(requeue_and_run(AckTags, State)) + noreply(case Requeue of + true -> requeue_and_run(AckTags, State); + false -> BQS1 = BQ:ack(AckTags, BQS), + State #q { backing_queue_state = BQS1 } + end) end; +handle_cast({rollback, Txn, ChPid}, State) -> + noreply(rollback_transaction(Txn, ChPid, State)); + handle_cast({unblock, ChPid}, State) -> noreply( possibly_unblock(State, ChPid, @@ -823,21 +849,24 @@ handle_cast({set_ram_duration_target, Duration}, handle_cast({set_maximum_since_use, Age}, State) -> ok = file_handle_cache:set_maximum_since_use(Age), - noreply(State). - -handle_info({'DOWN', MonitorRef, process, DownPid, _Reason}, - State = #q{owner = {DownPid, MonitorRef}}) -> - %% We know here that there are no consumers on this queue that are - %% owned by other pids than the one that just went down, so since - %% exclusive in some sense implies autodelete, we delete the queue - %% here. The other way of implementing the "exclusive implies - %% autodelete" feature is to actually set autodelete when an - %% exclusive declaration is seen, but this has the problem that - %% the python tests rely on the queue not going away after a - %% basic.cancel when the queue was declared exclusive and - %% nonautodelete. - NewState = State#q{owner = none}, - {stop, normal, NewState}; + noreply(State); + +handle_cast(maybe_expire, State) -> + case is_unused(State) of + true -> ?LOGDEBUG("Queue lease expired for ~p~n", [State#q.q]), + {stop, normal, State}; + false -> noreply(ensure_expiry_timer(State)) + end. + +handle_info({'DOWN', _MonitorRef, process, DownPid, _Reason}, + State = #q{q = #amqqueue{exclusive_owner = DownPid}}) -> + %% Exclusively owned queues must disappear with their owner. In + %% the case of clean shutdown we delete the queue synchronously in + %% the reader - although not required by the spec this seems to + %% match what people expect (see bug 21824). However we need this + %% monitor-and-async- delete in case the connection goes away + %% unexpectedly. + {stop, normal, State}; handle_info({'DOWN', _MonitorRef, process, DownPid, _Reason}, State) -> case handle_ch_down(DownPid, State) of {ok, NewState} -> noreply(NewState); @@ -846,7 +875,7 @@ handle_info({'DOWN', _MonitorRef, process, DownPid, _Reason}, State) -> handle_info(timeout, State = #q{backing_queue = BQ}) -> noreply(maybe_run_queue_via_backing_queue( - fun (BQS) -> BQ:sync(BQS) end, State)); + fun (BQS) -> BQ:idle_timeout(BQS) end, State)); handle_info({'EXIT', _Pid, Reason}, State) -> {stop, Reason, State}; diff --git a/src/rabbit_backing_queue.erl b/src/rabbit_backing_queue.erl index 2dba00ad..2230c507 100644 --- a/src/rabbit_backing_queue.erl +++ b/src/rabbit_backing_queue.erl @@ -18,11 +18,11 @@ %% are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial %% Technologies LLC, and Rabbit Technologies Ltd. %% -%% Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift +%% Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift %% Ltd. Portions created by Cohesive Financial Technologies LLC are -%% Copyright (C) 2007-2009 Cohesive Financial Technologies +%% Copyright (C) 2007-2010 Cohesive Financial Technologies %% LLC. Portions created by Rabbit Technologies Ltd are Copyright -%% (C) 2007-2009 Rabbit Technologies Ltd. +%% (C) 2007-2010 Rabbit Technologies Ltd. %% %% All Rights Reserved. %% @@ -42,6 +42,11 @@ behaviour_info(callbacks) -> %% shared resources. {start, 1}, + %% Called to tear down any state/resources. NB: Implementations + %% should not depend on this function being called on shutdown + %% and instead should hook into the rabbit supervision hierarchy. + {stop, 0}, + %% Initialise the backing queue and its state. {init, 3}, @@ -113,14 +118,15 @@ behaviour_info(callbacks) -> %% queue. {ram_duration, 1}, - %% Should 'sync' be called as soon as the queue process can - %% manage (either on an empty mailbox, or when a timer fires)? - {needs_sync, 1}, + %% Should 'idle_timeout' be called as soon as the queue process + %% can manage (either on an empty mailbox, or when a timer + %% fires)? + {needs_idle_timeout, 1}, - %% Called (eventually) after needs_sync returns 'true'. Note this - %% may be called more than once for each 'true' returned from - %% needs_sync. - {sync, 1}, + %% Called (eventually) after needs_idle_timeout returns + %% 'true'. Note this may be called more than once for each 'true' + %% returned from needs_idle_timeout. + {idle_timeout, 1}, %% Called immediately before the queue hibernates. {handle_pre_hibernate, 1}, diff --git a/src/rabbit_basic.erl b/src/rabbit_basic.erl index 4ab7a2a0..c76c01ac 100644 --- a/src/rabbit_basic.erl +++ b/src/rabbit_basic.erl @@ -42,24 +42,41 @@ -ifdef(use_specs). --type(properties_input() :: (amqp_properties() | [{atom(), any()}])). --type(publish_result() :: ({ok, routing_result(), [pid()]} | not_found())). - --spec(publish/1 :: (delivery()) -> publish_result()). --spec(delivery/4 :: (boolean(), boolean(), maybe(txn()), message()) -> - delivery()). --spec(message/4 :: (exchange_name(), routing_key(), properties_input(), - binary()) -> (message() | {'error', any()})). --spec(properties/1 :: (properties_input()) -> amqp_properties()). --spec(publish/4 :: (exchange_name(), routing_key(), properties_input(), - binary()) -> publish_result()). --spec(publish/7 :: (exchange_name(), routing_key(), boolean(), boolean(), - maybe(txn()), properties_input(), binary()) -> - publish_result()). --spec(build_content/2 :: (amqp_properties(), binary()) -> content()). --spec(from_content/1 :: (content()) -> {amqp_properties(), binary()}). +-type(properties_input() :: + (rabbit_framing:amqp_property_record() | [{atom(), any()}])). +-type(publish_result() :: + ({ok, rabbit_router:routing_result(), [pid()]} + | rabbit_types:error('not_found'))). + +-spec(publish/1 :: (rabbit_types:delivery()) -> publish_result()). +-spec(delivery/4 :: + (boolean(), boolean(), rabbit_types:maybe(rabbit_types:txn()), + rabbit_types:message()) + -> rabbit_types:delivery()). +-spec(message/4 :: + (rabbit_exchange:name(), rabbit_router:routing_key(), + properties_input(), binary()) + -> (rabbit_types:message() | rabbit_types:error(any()))). +-spec(properties/1 :: + (properties_input()) -> rabbit_framing:amqp_property_record()). +-spec(publish/4 :: + (rabbit_exchange:name(), rabbit_router:routing_key(), + properties_input(), binary()) + -> publish_result()). +-spec(publish/7 :: + (rabbit_exchange:name(), rabbit_router:routing_key(), + boolean(), boolean(), rabbit_types:maybe(rabbit_types:txn()), + properties_input(), binary()) + -> publish_result()). +-spec(build_content/2 :: + (rabbit_framing:amqp_property_record(), binary()) + -> rabbit_types:content()). +-spec(from_content/1 :: + (rabbit_types:content()) + -> {rabbit_framing:amqp_property_record(), binary()}). -spec(is_message_persistent/1 :: - (decoded_content()) -> (boolean() | {'invalid', non_neg_integer()})). + (rabbit_types:decoded_content()) + -> (boolean() | {'invalid', non_neg_integer()})). -endif. @@ -80,18 +97,24 @@ delivery(Mandatory, Immediate, Txn, Message) -> sender = self(), message = Message}. build_content(Properties, BodyBin) -> - {ClassId, _MethodId} = rabbit_framing:method_id('basic.publish'), + %% basic.publish hasn't changed so we can just hard-code amqp_0_9_1 + {ClassId, _MethodId} = + rabbit_framing_amqp_0_9_1:method_id('basic.publish'), #content{class_id = ClassId, properties = Properties, properties_bin = none, + protocol = none, payload_fragments_rev = [BodyBin]}. from_content(Content) -> #content{class_id = ClassId, properties = Props, payload_fragments_rev = FragmentsRev} = - rabbit_binary_parser:ensure_content_decoded(Content), - {ClassId, _MethodId} = rabbit_framing:method_id('basic.publish'), + %% basic.publish hasn't changed so we can just hard-code amqp_0_9_1 + rabbit_binary_parser:ensure_content_decoded(Content, + rabbit_framing_amqp_0_9_1), + {ClassId, _MethodId} = + rabbit_framing_amqp_0_9_1:method_id('basic.publish'), {Props, list_to_binary(lists:reverse(FragmentsRev))}. message(ExchangeName, RoutingKeyBin, RawProperties, BodyBin) -> diff --git a/src/rabbit_binary_generator.erl b/src/rabbit_binary_generator.erl index ed843735..f0ec6180 100644 --- a/src/rabbit_binary_generator.erl +++ b/src/rabbit_binary_generator.erl @@ -41,12 +41,12 @@ % See definition of check_empty_content_body_frame_size/0, an assertion called at startup. -define(EMPTY_CONTENT_BODY_FRAME_SIZE, 8). --export([build_simple_method_frame/2, - build_simple_content_frames/3, +-export([build_simple_method_frame/3, + build_simple_content_frames/4, build_heartbeat_frame/0]). -export([generate_table/1, encode_properties/2]). -export([check_empty_content_body_frame_size/0]). --export([ensure_content_encoded/1, clear_encoded_content/1]). +-export([ensure_content_encoded/2, clear_encoded_content/1]). -import(lists). @@ -56,45 +56,47 @@ -type(frame() :: [binary()]). --spec(build_simple_method_frame/2 :: - (channel_number(), amqp_method()) -> frame()). --spec(build_simple_content_frames/3 :: - (channel_number(), content(), non_neg_integer()) -> [frame()]). +-spec(build_simple_method_frame/3 :: + (rabbit_channel:channel_number(), rabbit_framing:amqp_method_record(), + rabbit_types:protocol()) + -> frame()). +-spec(build_simple_content_frames/4 :: + (rabbit_channel:channel_number(), rabbit_types:content(), + non_neg_integer(), rabbit_types:protocol()) + -> [frame()]). -spec(build_heartbeat_frame/0 :: () -> frame()). --spec(generate_table/1 :: (amqp_table()) -> binary()). --spec(encode_properties/2 :: ([amqp_property_type()], [any()]) -> binary()). +-spec(generate_table/1 :: (rabbit_framing:amqp_table()) -> binary()). +-spec(encode_properties/2 :: + ([rabbit_framing:amqp_property_type()], [any()]) -> binary()). -spec(check_empty_content_body_frame_size/0 :: () -> 'ok'). --spec(ensure_content_encoded/1 :: (content()) -> encoded_content()). --spec(clear_encoded_content/1 :: (content()) -> unencoded_content()). +-spec(ensure_content_encoded/2 :: + (rabbit_types:content(), rabbit_types:protocol()) -> + rabbit_types:encoded_content()). +-spec(clear_encoded_content/1 :: + (rabbit_types:content()) -> rabbit_types:unencoded_content()). -endif. %%---------------------------------------------------------------------------- -build_simple_method_frame(ChannelInt, MethodRecord) -> - MethodFields = rabbit_framing:encode_method_fields(MethodRecord), +build_simple_method_frame(ChannelInt, MethodRecord, Protocol) -> + MethodFields = Protocol:encode_method_fields(MethodRecord), MethodName = rabbit_misc:method_record_type(MethodRecord), - {ClassId, MethodId} = rabbit_framing:method_id(MethodName), + {ClassId, MethodId} = Protocol:method_id(MethodName), create_frame(1, ChannelInt, [<<ClassId:16, MethodId:16>>, MethodFields]). -build_simple_content_frames(ChannelInt, - #content{class_id = ClassId, - properties = ContentProperties, - properties_bin = ContentPropertiesBin, - payload_fragments_rev = PayloadFragmentsRev}, - FrameMax) -> - {BodySize, ContentFrames} = build_content_frames(PayloadFragmentsRev, FrameMax, ChannelInt), +build_simple_content_frames(ChannelInt, Content, FrameMax, Protocol) -> + #content{class_id = ClassId, + properties_bin = ContentPropertiesBin, + payload_fragments_rev = PayloadFragmentsRev} = + ensure_content_encoded(Content, Protocol), + {BodySize, ContentFrames} = + build_content_frames(PayloadFragmentsRev, FrameMax, ChannelInt), HeaderFrame = create_frame(2, ChannelInt, [<<ClassId:16, 0:16, BodySize:64>>, - maybe_encode_properties(ContentProperties, ContentPropertiesBin)]), + ContentPropertiesBin]), [HeaderFrame | ContentFrames]. -maybe_encode_properties(_ContentProperties, ContentPropertiesBin) - when is_binary(ContentPropertiesBin) -> - ContentPropertiesBin; -maybe_encode_properties(ContentProperties, none) -> - rabbit_framing:encode_properties(ContentProperties). - build_content_frames(FragsRev, FrameMax, ChannelInt) -> BodyPayloadMax = if FrameMax == 0 -> iolist_size(FragsRev); @@ -118,10 +120,11 @@ build_content_frames(SizeAcc, FramesAcc, FragSizeRem, FragAcc, [Frag | Frags], BodyPayloadMax, ChannelInt) -> Size = size(Frag), {NewFragSizeRem, NewFragAcc, NewFrags} = - case Size =< FragSizeRem of - true -> {FragSizeRem - Size, [Frag | FragAcc], Frags}; - false -> <<Head:FragSizeRem/binary, Tail/binary>> = Frag, - {0, [Head | FragAcc], [Tail | Frags]} + if Size == 0 -> {FragSizeRem, FragAcc, Frags}; + Size =< FragSizeRem -> {FragSizeRem - Size, [Frag | FragAcc], Frags}; + true -> <<Head:FragSizeRem/binary, Tail/binary>> = + Frag, + {0, [Head | FragAcc], [Tail | Frags]} end, build_content_frames(SizeAcc, FramesAcc, NewFragSizeRem, NewFragAcc, NewFrags, BodyPayloadMax, ChannelInt). @@ -276,13 +279,16 @@ check_empty_content_body_frame_size() -> ComputedSize, ?EMPTY_CONTENT_BODY_FRAME_SIZE}) end. -ensure_content_encoded(Content = #content{properties_bin = PropsBin}) +ensure_content_encoded(Content = #content{properties_bin = PropsBin, + protocol = Protocol}, Protocol) when PropsBin =/= 'none' -> Content; -ensure_content_encoded(Content = #content{properties = Props}) -> - Content #content{properties_bin = rabbit_framing:encode_properties(Props)}. +ensure_content_encoded(Content = #content{properties = Props}, Protocol) -> + Content#content{properties_bin = Protocol:encode_properties(Props), + protocol = Protocol}. -clear_encoded_content(Content = #content{properties_bin = none}) -> +clear_encoded_content(Content = #content{properties_bin = none, + protocol = none}) -> Content; clear_encoded_content(Content = #content{properties = none}) -> %% Only clear when we can rebuild the properties_bin later in @@ -290,4 +296,4 @@ clear_encoded_content(Content = #content{properties = none}) -> %% one of properties and properties_bin can be 'none' Content; clear_encoded_content(Content = #content{}) -> - Content#content{properties_bin = none}. + Content#content{properties_bin = none, protocol = none}. diff --git a/src/rabbit_binary_parser.erl b/src/rabbit_binary_parser.erl index e022a1fa..1d0a62af 100644 --- a/src/rabbit_binary_parser.erl +++ b/src/rabbit_binary_parser.erl @@ -34,7 +34,7 @@ -include("rabbit.hrl"). -export([parse_table/1, parse_properties/2]). --export([ensure_content_decoded/1, clear_decoded_content/1]). +-export([ensure_content_decoded/2, clear_decoded_content/1]). -import(lists). @@ -42,10 +42,14 @@ -ifdef(use_specs). --spec(parse_table/1 :: (binary()) -> amqp_table()). --spec(parse_properties/2 :: ([amqp_property_type()], binary()) -> [any()]). --spec(ensure_content_decoded/1 :: (content()) -> decoded_content()). --spec(clear_decoded_content/1 :: (content()) -> undecoded_content()). +-spec(parse_table/1 :: (binary()) -> rabbit_framing:amqp_table()). +-spec(parse_properties/2 :: + ([rabbit_framing:amqp_property_type()], binary()) -> [any()]). +-spec(ensure_content_decoded/2 :: + (rabbit_types:content(), rabbit_types:protocol()) + -> rabbit_types:decoded_content()). +-spec(clear_decoded_content/1 :: + (rabbit_types:content()) -> rabbit_types:undecoded_content()). -endif. @@ -159,12 +163,12 @@ parse_property(bit, Rest) -> parse_property(table, <<Len:32/unsigned, Table:Len/binary, Rest/binary>>) -> {parse_table(Table), Rest}. -ensure_content_decoded(Content = #content{properties = Props}) +ensure_content_decoded(Content = #content{properties = Props}, _Protocol) when Props =/= 'none' -> Content; -ensure_content_decoded(Content = #content{properties_bin = PropBin}) +ensure_content_decoded(Content = #content{properties_bin = PropBin}, Protocol) when is_binary(PropBin) -> - Content#content{properties = rabbit_framing:decode_properties( + Content#content{properties = Protocol:decode_properties( Content#content.class_id, PropBin)}. clear_decoded_content(Content = #content{properties = none}) -> diff --git a/src/rabbit_channel.erl b/src/rabbit_channel.erl index a48db9c8..c4ff361d 100644 --- a/src/rabbit_channel.erl +++ b/src/rabbit_channel.erl @@ -35,20 +35,25 @@ -behaviour(gen_server2). --export([start_link/5, do/2, do/3, shutdown/1]). +-export([start_link/6, do/2, do/3, shutdown/1]). -export([send_command/2, deliver/4, conserve_memory/2, flushed/2]). -export([list/0, info_keys/0, info/1, info/2, info_all/0, info_all/1]). --export([init/1, terminate/2, code_change/3, - handle_call/3, handle_cast/2, handle_info/2, handle_pre_hibernate/1]). +-export([flow_timeout/2]). + +-export([init/1, terminate/2, code_change/3, handle_call/3, handle_cast/2, + handle_info/2, handle_pre_hibernate/1]). -record(ch, {state, channel, reader_pid, writer_pid, limiter_pid, transaction_id, tx_participants, next_tag, uncommitted_ack_q, unacked_message_q, username, virtual_host, most_recently_declared_queue, - consumer_mapping, blocking}). + consumer_mapping, blocking, queue_collector_pid, flow}). + +-record(flow, {server, client, pending}). -define(MAX_PERMISSION_CACHE_SIZE, 12). +-define(FLOW_OK_TIMEOUT, 10000). %% 10 seconds -define(INFO_KEYS, [pid, @@ -66,31 +71,39 @@ -ifdef(use_specs). --spec(start_link/5 :: - (channel_number(), pid(), pid(), username(), vhost()) -> pid()). --spec(do/2 :: (pid(), amqp_method()) -> 'ok'). --spec(do/3 :: (pid(), amqp_method(), maybe(content())) -> 'ok'). +-export_type([channel_number/0]). + +-type(ref() :: any()). +-type(channel_number() :: non_neg_integer()). + +-spec(start_link/6 :: + (channel_number(), pid(), pid(), rabbit_access_control:username(), + rabbit_types:vhost(), pid()) -> rabbit_types:ok(pid())). +-spec(do/2 :: (pid(), rabbit_framing:amqp_method_record()) -> 'ok'). +-spec(do/3 :: (pid(), rabbit_framing:amqp_method_record(), + rabbit_types:maybe(rabbit_types:content())) -> 'ok'). -spec(shutdown/1 :: (pid()) -> 'ok'). --spec(send_command/2 :: (pid(), amqp_method()) -> 'ok'). --spec(deliver/4 :: (pid(), ctag(), boolean(), qmsg()) -> 'ok'). +-spec(send_command/2 :: (pid(), rabbit_framing:amqp_method()) -> 'ok'). +-spec(deliver/4 :: + (pid(), rabbit_types:ctag(), boolean(), rabbit_amqqueue:qmsg()) + -> 'ok'). -spec(conserve_memory/2 :: (pid(), boolean()) -> 'ok'). -spec(flushed/2 :: (pid(), pid()) -> 'ok'). +-spec(flow_timeout/2 :: (pid(), ref()) -> 'ok'). -spec(list/0 :: () -> [pid()]). --spec(info_keys/0 :: () -> [info_key()]). --spec(info/1 :: (pid()) -> [info()]). --spec(info/2 :: (pid(), [info_key()]) -> [info()]). --spec(info_all/0 :: () -> [[info()]]). --spec(info_all/1 :: ([info_key()]) -> [[info()]]). +-spec(info_keys/0 :: () -> [rabbit_types:info_key()]). +-spec(info/1 :: (pid()) -> [rabbit_types:info()]). +-spec(info/2 :: (pid(), [rabbit_types:info_key()]) -> [rabbit_types:info()]). +-spec(info_all/0 :: () -> [[rabbit_types:info()]]). +-spec(info_all/1 :: ([rabbit_types:info_key()]) -> [[rabbit_types:info()]]). -endif. %%---------------------------------------------------------------------------- -start_link(Channel, ReaderPid, WriterPid, Username, VHost) -> - {ok, Pid} = gen_server2:start_link( - ?MODULE, [Channel, ReaderPid, WriterPid, - Username, VHost], []), - Pid. +start_link(Channel, ReaderPid, WriterPid, Username, VHost, CollectorPid) -> + gen_server2:start_link(?MODULE, [Channel, ReaderPid, WriterPid, + Username, VHost, CollectorPid], []). do(Pid, Method) -> do(Pid, Method, none). @@ -113,6 +126,9 @@ conserve_memory(Pid, Conserve) -> flushed(Pid, QPid) -> gen_server2:cast(Pid, {flushed, QPid}). +flow_timeout(Pid, Ref) -> + gen_server2:pcast(Pid, 7, {flow_timeout, Ref}). + list() -> pg_local:get_members(rabbit_channels). @@ -135,7 +151,7 @@ info_all(Items) -> %%--------------------------------------------------------------------------- -init([Channel, ReaderPid, WriterPid, Username, VHost]) -> +init([Channel, ReaderPid, WriterPid, Username, VHost, CollectorPid]) -> process_flag(trap_exit, true), link(WriterPid), ok = pg_local:join(rabbit_channels, self()), @@ -153,7 +169,10 @@ init([Channel, ReaderPid, WriterPid, Username, VHost]) -> virtual_host = VHost, most_recently_declared_queue = <<>>, consumer_mapping = dict:new(), - blocking = dict:new()}, + blocking = dict:new(), + queue_collector_pid = CollectorPid, + flow = #flow{server = true, client = true, + pending = none}}, hibernate, {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. @@ -180,11 +199,9 @@ handle_cast({method, Method, Content}, State) -> {stop, normal, State#ch{state = terminating}} catch exit:Reason = #amqp_error{} -> - ok = rollback_and_notify(State), MethodName = rabbit_misc:method_record_type(Method), - State#ch.reader_pid ! {channel_exit, State#ch.channel, - Reason#amqp_error{method = MethodName}}, - {stop, normal, State#ch{state = terminating}}; + {stop, normal, terminating(Reason#amqp_error{method = MethodName}, + State)}; exit:normal -> {stop, normal, State}; _:Reason -> @@ -208,11 +225,25 @@ handle_cast({deliver, ConsumerTag, AckRequired, Msg}, ok = internal_deliver(WriterPid, true, ConsumerTag, DeliveryTag, Msg), noreply(State1#ch{next_tag = DeliveryTag + 1}); -handle_cast({conserve_memory, Conserve}, State) -> - ok = clear_permission_cache(), - ok = rabbit_writer:send_command( - State#ch.writer_pid, #'channel.flow'{active = not(Conserve)}), - noreply(State). +handle_cast({conserve_memory, true}, State = #ch{state = starting}) -> + noreply(State); +handle_cast({conserve_memory, false}, State = #ch{state = starting}) -> + ok = rabbit_writer:send_command(State#ch.writer_pid, #'channel.open_ok'{}), + noreply(State#ch{state = running}); +handle_cast({conserve_memory, Conserve}, State = #ch{state = running}) -> + flow_control(not Conserve, State); +handle_cast({conserve_memory, _Conserve}, State) -> + noreply(State); + +handle_cast({flow_timeout, Ref}, + State = #ch{flow = #flow{client = Flow, pending = {Ref, _TRef}}}) -> + {stop, normal, terminating( + rabbit_misc:amqp_error( + precondition_failed, + "timeout waiting for channel.flow_ok{active=~w}", + [not Flow], none), State)}; +handle_cast({flow_timeout, _Ref}, State) -> + {noreply, State}. handle_info({'EXIT', WriterPid, Reason = {writer, send_failed, _Error}}, State = #ch{writer_pid = WriterPid}) -> @@ -253,20 +284,20 @@ return_ok(State, false, Msg) -> {reply, Msg, State}. ok_msg(true, _Msg) -> undefined; ok_msg(false, Msg) -> Msg. -return_queue_declare_ok(State, NoWait, Q) -> - NewState = State#ch{most_recently_declared_queue = - (Q#amqqueue.name)#resource.name}, +terminating(Reason, State = #ch{channel = Channel, reader_pid = Reader}) -> + ok = rollback_and_notify(State), + Reader ! {channel_exit, Channel, Reason}, + State#ch{state = terminating}. + +return_queue_declare_ok(#resource{name = ActualName}, + NoWait, MessageCount, ConsumerCount, State) -> + NewState = State#ch{most_recently_declared_queue = ActualName}, case NoWait of true -> {noreply, NewState}; - false -> - {ok, ActualName, MessageCount, ConsumerCount} = - rabbit_misc:with_exit_handler( - fun () -> {ok, Q#amqqueue.name, 0, 0} end, - fun () -> rabbit_amqqueue:stat(Q) end), - Reply = #'queue.declare_ok'{queue = ActualName#resource.name, - message_count = MessageCount, - consumer_count = ConsumerCount}, - {reply, Reply, NewState} + false -> Reply = #'queue.declare_ok'{queue = ActualName, + message_count = MessageCount, + consumer_count = ConsumerCount}, + {reply, Reply, NewState} end. check_resource_access(Username, Resource, Perm) -> @@ -300,7 +331,7 @@ check_read_permitted(Resource, #ch{ username = Username}) -> expand_queue_name_shortcut(<<>>, #ch{ most_recently_declared_queue = <<>> }) -> rabbit_misc:protocol_error( - not_allowed, "no previously declared queue", []); + not_found, "no previously declared queue", []); expand_queue_name_shortcut(<<>>, #ch{ virtual_host = VHostPath, most_recently_declared_queue = MRDQ }) -> rabbit_misc:r(VHostPath, queue, MRDQ); @@ -310,7 +341,7 @@ expand_queue_name_shortcut(QueueNameBin, #ch{ virtual_host = VHostPath }) -> expand_routing_key_shortcut(<<>>, <<>>, #ch{ most_recently_declared_queue = <<>> }) -> rabbit_misc:protocol_error( - not_allowed, "no previously declared queue", []); + not_found, "no previously declared queue", []); expand_routing_key_shortcut(<<>>, <<>>, #ch{ most_recently_declared_queue = MRDQ }) -> MRDQ; @@ -352,8 +383,10 @@ queue_blocked(QPid, State = #ch{blocking = Blocking}) -> end. handle_method(#'channel.open'{}, _, State = #ch{state = starting}) -> - rabbit_alarm:register(self(), {?MODULE, conserve_memory, []}), - {reply, #'channel.open_ok'{}, State#ch{state = running}}; + case rabbit_alarm:register(self(), {?MODULE, conserve_memory, []}) of + true -> {noreply, State}; + false -> {reply, #'channel.open_ok'{}, State#ch{state = running}} + end; handle_method(#'channel.open'{}, _, _State) -> rabbit_misc:protocol_error( @@ -370,19 +403,24 @@ handle_method(#'channel.close'{}, _, State = #ch{writer_pid = WriterPid}) -> handle_method(#'access.request'{},_, State) -> {reply, #'access.request_ok'{ticket = 1}, State}; -handle_method(#'basic.publish'{exchange = ExchangeNameBin, +handle_method(#'basic.publish'{}, _, #ch{flow = #flow{client = false}}) -> + rabbit_misc:protocol_error( + command_invalid, + "basic.publish received after channel.flow_ok{active=false}", []); +handle_method(#'basic.publish'{exchange = ExchangeNameBin, routing_key = RoutingKey, - mandatory = Mandatory, - immediate = Immediate}, - Content, State = #ch{ virtual_host = VHostPath, - transaction_id = TxnKey, - writer_pid = WriterPid}) -> + mandatory = Mandatory, + immediate = Immediate}, + Content, State = #ch{virtual_host = VHostPath, + transaction_id = TxnKey, + writer_pid = WriterPid}) -> ExchangeName = rabbit_misc:r(VHostPath, exchange, ExchangeNameBin), check_write_permitted(ExchangeName, State), Exchange = rabbit_exchange:lookup_or_die(ExchangeName), %% We decode the content's properties here because we're almost %% certain to want to look at delivery-mode and priority. - DecodedContent = rabbit_binary_parser:ensure_content_decoded(Content), + DecodedContent = rabbit_binary_parser:ensure_content_decoded( + Content, rabbit_framing_amqp_0_9_1), IsPersistent = is_message_persistent(DecodedContent), Message = #basic_message{exchange_name = ExchangeName, routing_key = RoutingKey, @@ -394,16 +432,9 @@ handle_method(#'basic.publish'{exchange = ExchangeNameBin, Exchange, rabbit_basic:delivery(Mandatory, Immediate, TxnKey, Message)), case RoutingRes of - routed -> - ok; - unroutable -> - %% FIXME: 312 should be replaced by the ?NO_ROUTE - %% definition, when we move to >=0-9 - ok = basic_return(Message, WriterPid, 312, <<"unroutable">>); - not_delivered -> - %% FIXME: 313 should be replaced by the ?NO_CONSUMERS - %% definition, when we move to >=0-9 - ok = basic_return(Message, WriterPid, 313, <<"not_delivered">>) + routed -> ok; + unroutable -> ok = basic_return(Message, WriterPid, no_route); + not_delivered -> ok = basic_return(Message, WriterPid, no_consumers) end, {noreply, case TxnKey of none -> State; @@ -413,13 +444,7 @@ handle_method(#'basic.publish'{exchange = ExchangeNameBin, handle_method(#'basic.ack'{delivery_tag = DeliveryTag, multiple = Multiple}, _, State = #ch{transaction_id = TxnKey, - next_tag = NextDeliveryTag, unacked_message_q = UAMQ}) -> - if DeliveryTag >= NextDeliveryTag -> - rabbit_misc:protocol_error( - command_invalid, "unknown delivery tag ~w", [DeliveryTag]); - true -> ok - end, {Acked, Remaining} = collect_acks(UAMQ, DeliveryTag, Multiple), Participants = ack(TxnKey, Acked), {noreply, case TxnKey of @@ -436,11 +461,12 @@ handle_method(#'basic.ack'{delivery_tag = DeliveryTag, handle_method(#'basic.get'{queue = QueueNameBin, no_ack = NoAck}, _, State = #ch{ writer_pid = WriterPid, + reader_pid = ReaderPid, next_tag = DeliveryTag }) -> QueueName = expand_queue_name_shortcut(QueueNameBin, State), check_read_permitted(QueueName, State), - case rabbit_amqqueue:with_or_die( - QueueName, + case rabbit_amqqueue:with_exclusive_access_or_die( + QueueName, ReaderPid, fun (Q) -> rabbit_amqqueue:basic_get(Q, self(), NoAck) end) of {ok, MessageCount, Msg = {_QName, _QPid, _MsgId, Redelivered, @@ -458,7 +484,7 @@ handle_method(#'basic.get'{queue = QueueNameBin, Content), {noreply, State1#ch{next_tag = DeliveryTag + 1}}; empty -> - {reply, #'basic.get_empty'{cluster_id = <<>>}, State} + {reply, #'basic.get_empty'{}, State} end; handle_method(#'basic.consume'{queue = QueueNameBin, @@ -480,14 +506,14 @@ handle_method(#'basic.consume'{queue = QueueNameBin, Other -> Other end, - %% In order to ensure that the consume_ok gets sent before - %% any messages are sent to the consumer, we get the queue - %% process to send the consume_ok on our behalf. - case rabbit_amqqueue:with_or_die( - QueueName, + %% We get the queue process to send the consume_ok on our + %% behalf. This is for symmetry with basic.cancel - see + %% the comment in that method for why. + case rabbit_amqqueue:with_exclusive_access_or_die( + QueueName, ReaderPid, fun (Q) -> rabbit_amqqueue:basic_consume( - Q, NoAck, ReaderPid, self(), LimiterPid, + Q, NoAck, self(), LimiterPid, ActualConsumerTag, ExclusiveConsume, ok_msg(NoWait, #'basic.consume_ok'{ consumer_tag = ActualConsumerTag})) @@ -497,14 +523,6 @@ handle_method(#'basic.consume'{queue = QueueNameBin, dict:store(ActualConsumerTag, QueueName, ConsumerMapping)}}; - {error, queue_owned_by_another_connection} -> - %% The spec is silent on which exception to use - %% here. This seems reasonable? - %% FIXME: check this - - rabbit_misc:protocol_error( - resource_locked, "~s owned by another connection", - [rabbit_misc:rs(QueueName)]); {error, exclusive_consume_unavailable} -> rabbit_misc:protocol_error( access_refused, "~s in exclusive use", @@ -571,9 +589,8 @@ handle_method(#'basic.qos'{prefetch_count = PrefetchCount}, end, {reply, #'basic.qos_ok'{}, State#ch{limiter_pid = LimiterPid2}}; -handle_method(#'basic.recover'{requeue = true}, - _, State = #ch{ transaction_id = none, - unacked_message_q = UAMQ }) -> +handle_method(#'basic.recover_async'{requeue = true}, + _, State = #ch{ unacked_message_q = UAMQ }) -> ok = fold_per_queue( fun (QPid, MsgIds, ok) -> %% The Qpid python test suite incorrectly assumes @@ -583,12 +600,12 @@ handle_method(#'basic.recover'{requeue = true}, rabbit_amqqueue:requeue( QPid, lists:reverse(MsgIds), self()) end, ok, UAMQ), - %% No answer required, apparently! + %% No answer required - basic.recover is the newer, synchronous + %% variant of this method {noreply, State#ch{unacked_message_q = queue:new()}}; -handle_method(#'basic.recover'{requeue = false}, - _, State = #ch{ transaction_id = none, - writer_pid = WriterPid, +handle_method(#'basic.recover_async'{requeue = false}, + _, State = #ch{ writer_pid = WriterPid, unacked_message_q = UAMQ }) -> ok = rabbit_misc:queue_fold( fun ({_DeliveryTag, none, _Msg}, ok) -> @@ -608,12 +625,28 @@ handle_method(#'basic.recover'{requeue = false}, WriterPid, false, ConsumerTag, DeliveryTag, {QName, QPid, MsgId, true, Message}) end, ok, UAMQ), - %% No answer required, apparently! + %% No answer required - basic.recover is the newer, synchronous + %% variant of this method {noreply, State}; -handle_method(#'basic.recover'{}, _, _State) -> - rabbit_misc:protocol_error( - not_allowed, "attempt to recover a transactional channel",[]); +handle_method(#'basic.recover'{requeue = Requeue}, Content, State) -> + {noreply, State2 = #ch{writer_pid = WriterPid}} = + handle_method(#'basic.recover_async'{requeue = Requeue}, + Content, + State), + ok = rabbit_writer:send_command(WriterPid, #'basic.recover_ok'{}), + {noreply, State2}; + +handle_method(#'basic.reject'{delivery_tag = DeliveryTag, + requeue = Requeue}, + _, State = #ch{ unacked_message_q = UAMQ}) -> + {Acked, Remaining} = collect_acks(UAMQ, DeliveryTag, false), + ok = fold_per_queue( + fun (QPid, MsgIds, ok) -> + rabbit_amqqueue:reject(QPid, MsgIds, Requeue, self()) + end, ok, Acked), + ok = notify_limiter(State#ch.limiter_pid, Acked), + {noreply, State#ch{unacked_message_q = Remaining}}; handle_method(#'exchange.declare'{exchange = ExchangeNameBin, type = TypeNameBin, @@ -644,18 +677,17 @@ handle_method(#'exchange.declare'{exchange = ExchangeNameBin, AutoDelete, Args) end, - ok = rabbit_exchange:assert_type(X, CheckedType), + ok = rabbit_exchange:assert_equivalence(X, CheckedType, Durable, + AutoDelete, Args), return_ok(State, NoWait, #'exchange.declare_ok'{}); handle_method(#'exchange.declare'{exchange = ExchangeNameBin, - type = TypeNameBin, passive = true, nowait = NoWait}, _, State = #ch{ virtual_host = VHostPath }) -> ExchangeName = rabbit_misc:r(VHostPath, exchange, ExchangeNameBin), check_configure_permitted(ExchangeName, State), - X = rabbit_exchange:lookup_or_die(ExchangeName), - ok = rabbit_exchange:assert_type(X, rabbit_exchange:check_type(TypeNameBin)), + _ = rabbit_exchange:lookup_or_die(ExchangeName), return_ok(State, NoWait, #'exchange.declare_ok'{}); handle_method(#'exchange.delete'{exchange = ExchangeNameBin, @@ -674,73 +706,78 @@ handle_method(#'exchange.delete'{exchange = ExchangeNameBin, return_ok(State, NoWait, #'exchange.delete_ok'{}) end; -handle_method(#'queue.declare'{queue = QueueNameBin, - passive = false, - durable = Durable, - exclusive = ExclusiveDeclare, +handle_method(#'queue.declare'{queue = QueueNameBin, + passive = false, + durable = Durable, + exclusive = ExclusiveDeclare, auto_delete = AutoDelete, - nowait = NoWait, - arguments = Args}, - _, State = #ch { virtual_host = VHostPath, - reader_pid = ReaderPid }) -> - %% FIXME: atomic create&claim - Finish = - fun (Q) -> - if ExclusiveDeclare -> - case rabbit_amqqueue:claim_queue(Q, ReaderPid) of - locked -> - %% AMQP 0-8 doesn't say which - %% exception to use, so we mimic QPid - %% here. - rabbit_misc:protocol_error( - resource_locked, - "cannot obtain exclusive access to locked ~s", - [rabbit_misc:rs(Q#amqqueue.name)]); - ok -> ok - end; - true -> - ok - end, - Q - end, - Q = case rabbit_amqqueue:with( - rabbit_misc:r(VHostPath, queue, QueueNameBin), - Finish) of - {error, not_found} -> - ActualNameBin = - case QueueNameBin of + nowait = NoWait, + arguments = Args} = Declare, + _, State = #ch{virtual_host = VHostPath, + reader_pid = ReaderPid, + queue_collector_pid = CollectorPid}) -> + Owner = case ExclusiveDeclare of + true -> ReaderPid; + false -> none + end, + ActualNameBin = case QueueNameBin of <<>> -> rabbit_guid:binstring_guid("amq.gen"); Other -> check_name('queue', Other) end, - QueueName = rabbit_misc:r(VHostPath, queue, ActualNameBin), - check_configure_permitted(QueueName, State), - Finish(rabbit_amqqueue:declare(QueueName, - Durable, AutoDelete, Args)); - Other = #amqqueue{name = QueueName} -> - check_configure_permitted(QueueName, State), - Other - end, - return_queue_declare_ok(State, NoWait, Q); + QueueName = rabbit_misc:r(VHostPath, queue, ActualNameBin), + check_configure_permitted(QueueName, State), + case rabbit_amqqueue:with( + QueueName, + fun (Q) -> ok = rabbit_amqqueue:assert_equivalence( + Q, Durable, AutoDelete, Args, Owner), + rabbit_amqqueue:stat(Q) + end) of + {ok, MessageCount, ConsumerCount} -> + return_queue_declare_ok(QueueName, NoWait, MessageCount, + ConsumerCount, State); + {error, not_found} -> + case rabbit_amqqueue:declare(QueueName, Durable, AutoDelete, + Args, Owner) of + {new, Q = #amqqueue{}} -> + %% We need to notify the reader within the channel + %% process so that we can be sure there are no + %% outstanding exclusive queues being declared as + %% the connection shuts down. + ok = case Owner of + none -> ok; + _ -> rabbit_queue_collector:register(CollectorPid, Q) + end, + return_queue_declare_ok(QueueName, NoWait, 0, 0, State); + {existing, _Q} -> + %% must have been created between the stat and the + %% declare. Loop around again. + handle_method(Declare, none, State) + end + end; -handle_method(#'queue.declare'{queue = QueueNameBin, +handle_method(#'queue.declare'{queue = QueueNameBin, passive = true, - nowait = NoWait}, - _, State = #ch{ virtual_host = VHostPath }) -> + nowait = NoWait}, + _, State = #ch{virtual_host = VHostPath, + reader_pid = ReaderPid}) -> QueueName = rabbit_misc:r(VHostPath, queue, QueueNameBin), check_configure_permitted(QueueName, State), - Q = rabbit_amqqueue:with_or_die(QueueName, fun (Q) -> Q end), - return_queue_declare_ok(State, NoWait, Q); + {{ok, MessageCount, ConsumerCount}, #amqqueue{} = Q} = + rabbit_amqqueue:with_or_die( + QueueName, fun (Q) -> {rabbit_amqqueue:stat(Q), Q} end), + ok = rabbit_amqqueue:check_exclusive_access(Q, ReaderPid), + return_queue_declare_ok(QueueName, NoWait, MessageCount, ConsumerCount, + State); handle_method(#'queue.delete'{queue = QueueNameBin, if_unused = IfUnused, if_empty = IfEmpty, - nowait = NoWait - }, - _, State) -> + nowait = NoWait}, + _, State = #ch{reader_pid = ReaderPid}) -> QueueName = expand_queue_name_shortcut(QueueNameBin, State), check_configure_permitted(QueueName, State), - case rabbit_amqqueue:with_or_die( - QueueName, + case rabbit_amqqueue:with_exclusive_access_or_die( + QueueName, ReaderPid, fun (Q) -> rabbit_amqqueue:delete(Q, IfUnused, IfEmpty) end) of {error, in_use} -> rabbit_misc:protocol_error( @@ -750,8 +787,7 @@ handle_method(#'queue.delete'{queue = QueueNameBin, precondition_failed, "~s not empty", [rabbit_misc:rs(QueueName)]); {ok, PurgedMessageCount} -> return_ok(State, NoWait, - #'queue.delete_ok'{ - message_count = PurgedMessageCount}) + #'queue.delete_ok'{message_count = PurgedMessageCount}) end; handle_method(#'queue.bind'{queue = QueueNameBin, @@ -759,7 +795,7 @@ handle_method(#'queue.bind'{queue = QueueNameBin, routing_key = RoutingKey, nowait = NoWait, arguments = Arguments}, _, State) -> - binding_action(fun rabbit_exchange:add_binding/4, ExchangeNameBin, + binding_action(fun rabbit_exchange:add_binding/5, ExchangeNameBin, QueueNameBin, RoutingKey, Arguments, #'queue.bind_ok'{}, NoWait, State); @@ -767,17 +803,17 @@ handle_method(#'queue.unbind'{queue = QueueNameBin, exchange = ExchangeNameBin, routing_key = RoutingKey, arguments = Arguments}, _, State) -> - binding_action(fun rabbit_exchange:delete_binding/4, ExchangeNameBin, + binding_action(fun rabbit_exchange:delete_binding/5, ExchangeNameBin, QueueNameBin, RoutingKey, Arguments, #'queue.unbind_ok'{}, false, State); handle_method(#'queue.purge'{queue = QueueNameBin, nowait = NoWait}, - _, State) -> + _, State = #ch{reader_pid = ReaderPid}) -> QueueName = expand_queue_name_shortcut(QueueNameBin, State), check_read_permitted(QueueName, State), - {ok, PurgedMessageCount} = rabbit_amqqueue:with_or_die( - QueueName, + {ok, PurgedMessageCount} = rabbit_amqqueue:with_exclusive_access_or_die( + QueueName, ReaderPid, fun (Q) -> rabbit_amqqueue:purge(Q) end), return_ok(State, NoWait, #'queue.purge_ok'{message_count = PurgedMessageCount}); @@ -790,14 +826,14 @@ handle_method(#'tx.select'{}, _, State) -> handle_method(#'tx.commit'{}, _, #ch{transaction_id = none}) -> rabbit_misc:protocol_error( - not_allowed, "channel is not transactional", []); + precondition_failed, "channel is not transactional", []); handle_method(#'tx.commit'{}, _, State) -> {reply, #'tx.commit_ok'{}, internal_commit(State)}; handle_method(#'tx.rollback'{}, _, #ch{transaction_id = none}) -> rabbit_misc:protocol_error( - not_allowed, "channel is not transactional", []); + precondition_failed, "channel is not transactional", []); handle_method(#'tx.rollback'{}, _, State) -> {reply, #'tx.rollback_ok'{}, internal_rollback(State)}; @@ -810,7 +846,6 @@ handle_method(#'channel.flow'{active = true}, _, end, {reply, #'channel.flow_ok'{active = true}, State#ch{limiter_pid = LimiterPid1}}; - handle_method(#'channel.flow'{active = false}, _, State = #ch{limiter_pid = LimiterPid, consumer_mapping = Consumers}) -> @@ -828,11 +863,25 @@ handle_method(#'channel.flow'{active = false}, _, blocking = dict:from_list(Queues)}} end; -handle_method(#'channel.flow_ok'{active = _}, _, State) -> - %% TODO: We may want to correlate this to channel.flow messages we - %% have sent, and complain if we get an unsolicited - %% channel.flow_ok, or the client refuses our flow request. - {noreply, State}; +handle_method(#'channel.flow_ok'{active = Active}, _, + State = #ch{flow = #flow{server = Active, client = Flow, + pending = {_Ref, TRef}} = F}) + when Flow =:= not Active -> + {ok, cancel} = timer:cancel(TRef), + {noreply, State#ch{flow = F#flow{client = Active, pending = none}}}; +handle_method(#'channel.flow_ok'{active = Active}, _, + State = #ch{flow = #flow{server = Flow, client = Flow, + pending = {_Ref, TRef}}}) + when Flow =:= not Active -> + {ok, cancel} = timer:cancel(TRef), + {noreply, issue_flow(Flow, State)}; +handle_method(#'channel.flow_ok'{}, _, #ch{flow = #flow{pending = none}}) -> + rabbit_misc:protocol_error( + command_invalid, "unsolicited channel.flow_ok", []); +handle_method(#'channel.flow_ok'{active = Active}, _, _State) -> + rabbit_misc:protocol_error( + command_invalid, + "received channel.flow_ok{active=~w} has incorrect polarity", [Active]); handle_method(_MethodRecord, _Content, _State) -> rabbit_misc:protocol_error( @@ -840,8 +889,26 @@ handle_method(_MethodRecord, _Content, _State) -> %%---------------------------------------------------------------------------- +flow_control(Active, State = #ch{flow = #flow{server = Flow, pending = none}}) + when Flow =:= not Active -> + ok = clear_permission_cache(), + noreply(issue_flow(Active, State)); +flow_control(Active, State = #ch{flow = F}) -> + noreply(State#ch{flow = F#flow{server = Active}}). + +issue_flow(Active, State) -> + ok = rabbit_writer:send_command( + State#ch.writer_pid, #'channel.flow'{active = Active}), + Ref = make_ref(), + {ok, TRef} = timer:apply_after(?FLOW_OK_TIMEOUT, ?MODULE, flow_timeout, + [self(), Ref]), + State#ch{flow = #flow{server = Active, client = not Active, + pending = {Ref, TRef}}}. + binding_action(Fun, ExchangeNameBin, QueueNameBin, RoutingKey, Arguments, - ReturnMethod, NoWait, State = #ch{virtual_host = VHostPath}) -> + ReturnMethod, NoWait, + State = #ch{virtual_host = VHostPath, + reader_pid = ReaderPid}) -> %% FIXME: connection exception (!) on failure?? %% (see rule named "failure" in spec-XML) %% FIXME: don't allow binding to internal exchanges - @@ -852,7 +919,12 @@ binding_action(Fun, ExchangeNameBin, QueueNameBin, RoutingKey, Arguments, State), ExchangeName = rabbit_misc:r(VHostPath, exchange, ExchangeNameBin), check_read_permitted(ExchangeName, State), - case Fun(ExchangeName, QueueName, ActualRoutingKey, Arguments) of + case Fun(ExchangeName, QueueName, ActualRoutingKey, Arguments, + fun (_X, Q) -> + try rabbit_amqqueue:check_exclusive_access(Q, ReaderPid) + catch exit:Reason -> {error, Reason} + end + end) of {error, exchange_not_found} -> rabbit_misc:not_found(ExchangeName); {error, queue_not_found} -> @@ -866,17 +938,17 @@ binding_action(Fun, ExchangeNameBin, QueueNameBin, RoutingKey, Arguments, not_found, "no binding ~s between ~s and ~s", [RoutingKey, rabbit_misc:rs(ExchangeName), rabbit_misc:rs(QueueName)]); - {error, durability_settings_incompatible} -> - rabbit_misc:protocol_error( - not_allowed, "durability settings of ~s incompatible with ~s", - [rabbit_misc:rs(QueueName), rabbit_misc:rs(ExchangeName)]); + {error, #amqp_error{} = Error} -> + rabbit_misc:protocol_error(Error); ok -> return_ok(State, NoWait, ReturnMethod) end. basic_return(#basic_message{exchange_name = ExchangeName, routing_key = RoutingKey, content = Content}, - WriterPid, ReplyCode, ReplyText) -> + WriterPid, Reason) -> + {_Close, ReplyCode, ReplyText} = + rabbit_framing_amqp_0_9_1:lookup_amqp_exception(Reason), ok = rabbit_writer:send_command( WriterPid, #'basic.return'{reply_code = ReplyCode, @@ -904,7 +976,8 @@ collect_acks(ToAcc, PrefixAcc, Q, DeliveryTag, Multiple) -> QTail, DeliveryTag, Multiple) end; {empty, _} -> - {ToAcc, PrefixAcc} + rabbit_misc:protocol_error( + precondition_failed, "unknown delivery tag ~w", [DeliveryTag]) end. add_tx_participants(MoreP, State = #ch{tx_participants = Participants}) -> @@ -968,7 +1041,7 @@ fold_per_queue(F, Acc0, UAQ) -> Acc0, D). start_limiter(State = #ch{unacked_message_q = UAMQ}) -> - LPid = rabbit_limiter:start_link(self(), queue:len(UAMQ)), + {ok, LPid} = rabbit_limiter:start_link(self(), queue:len(UAMQ)), ok = limit_queues(LPid, State), LPid. diff --git a/src/rabbit_control.erl b/src/rabbit_control.erl index d1834b3b..6e6ad06c 100644 --- a/src/rabbit_control.erl +++ b/src/rabbit_control.erl @@ -44,7 +44,7 @@ -spec(start/0 :: () -> no_return()). -spec(stop/0 :: () -> 'ok'). --spec(action/4 :: (atom(), erlang_node(), [string()], +-spec(action/4 :: (atom(), node(), [string()], fun ((string(), [any()]) -> 'ok')) -> 'ok'). -spec(usage/0 :: () -> no_return()). @@ -59,8 +59,8 @@ start() -> parse_args(FullCommand, #params{quiet = false, node = rabbit_misc:makenode(NodeStr)}), Inform = case Quiet of - true -> fun(_Format, _Args1) -> ok end; - false -> fun(Format, Args1) -> + true -> fun (_Format, _Args1) -> ok end; + false -> fun (Format, Args1) -> io:format(Format ++ " ...~n", Args1) end end, @@ -160,6 +160,12 @@ action(cluster, Node, ClusterNodeSs, Inform) -> [Node, ClusterNodes]), rpc_call(Node, rabbit_mnesia, cluster, [ClusterNodes]); +action(force_cluster, Node, ClusterNodeSs, Inform) -> + ClusterNodes = lists:map(fun list_to_atom/1, ClusterNodeSs), + Inform("Forcefully clustering node ~p with ~p (ignoring offline nodes)", + [Node, ClusterNodes]), + rpc_call(Node, rabbit_mnesia, force_cluster, [ClusterNodes]); + action(status, Node, [], Inform) -> Inform("Status of node ~p", [Node]), case call(Node, {rabbit, status, []}) of diff --git a/src/rabbit_dialyzer.erl b/src/rabbit_dialyzer.erl index f19e8d02..51bd6b1f 100644 --- a/src/rabbit_dialyzer.erl +++ b/src/rabbit_dialyzer.erl @@ -30,17 +30,17 @@ %% -module(rabbit_dialyzer). --include("rabbit.hrl"). --export([create_basic_plt/1, add_to_plt/2, dialyze_files/2, halt_with_code/1]). +-export([create_basic_plt/1, add_to_plt/2, dialyze_files/2, + halt_with_code/1]). %%---------------------------------------------------------------------------- -ifdef(use_specs). --spec(create_basic_plt/1 :: (file_path()) -> 'ok'). --spec(add_to_plt/2 :: (file_path(), string()) -> 'ok'). --spec(dialyze_files/2 :: (file_path(), string()) -> 'ok'). +-spec(create_basic_plt/1 :: (file:filename()) -> 'ok'). +-spec(add_to_plt/2 :: (file:filename(), string()) -> 'ok'). +-spec(dialyze_files/2 :: (file:filename(), string()) -> 'ok'). -spec(halt_with_code/1 :: (atom()) -> no_return()). -endif. @@ -56,7 +56,7 @@ create_basic_plt(BasicPltPath) -> ok. add_to_plt(PltPath, FilesString) -> - {ok, Files} = regexp:split(FilesString, " "), + Files = string:tokens(FilesString, " "), DialyzerWarnings = dialyzer:run([{analysis_type, plt_add}, {init_plt, PltPath}, {output_plt, PltPath}, @@ -65,7 +65,7 @@ add_to_plt(PltPath, FilesString) -> ok. dialyze_files(PltPath, ModifiedFiles) -> - {ok, Files} = regexp:split(ModifiedFiles, " "), + Files = string:tokens(ModifiedFiles, " "), DialyzerWarnings = dialyzer:run([{init_plt, PltPath}, {files, Files}]), case DialyzerWarnings of diff --git a/src/rabbit_error_logger.erl b/src/rabbit_error_logger.erl index e9baf2c4..42861f86 100644 --- a/src/rabbit_error_logger.erl +++ b/src/rabbit_error_logger.erl @@ -39,7 +39,8 @@ -export([boot/0]). --export([init/1, terminate/2, code_change/3, handle_call/2, handle_event/2, handle_info/2]). +-export([init/1, terminate/2, code_change/3, handle_call/2, handle_event/2, + handle_info/2]). boot() -> {ok, DefaultVHost} = application:get_env(default_vhost), diff --git a/src/rabbit_error_logger_file_h.erl b/src/rabbit_error_logger_file_h.erl index 45b66712..875d680f 100644 --- a/src/rabbit_error_logger_file_h.erl +++ b/src/rabbit_error_logger_file_h.erl @@ -33,7 +33,8 @@ -behaviour(gen_event). --export([init/1, handle_event/2, handle_call/2, handle_info/2, terminate/2, code_change/3]). +-export([init/1, handle_event/2, handle_call/2, handle_info/2, terminate/2, + code_change/3]). %% rabbit_error_logger_file_h is a wrapper around the error_logger_file_h %% module because the original's init/1 does not match properly diff --git a/src/rabbit_exchange.erl b/src/rabbit_exchange.erl index 8f41392f..49f87a22 100644 --- a/src/rabbit_exchange.erl +++ b/src/rabbit_exchange.erl @@ -33,13 +33,14 @@ -include("rabbit.hrl"). -include("rabbit_framing.hrl"). --export([recover/0, declare/5, lookup/1, lookup_or_die/1, - list/1, info_keys/0, info/1, info/2, info_all/1, info_all/2, - publish/2]). --export([add_binding/4, delete_binding/4, list_bindings/1]). +-export([recover/0, declare/5, lookup/1, lookup_or_die/1, list/1, info_keys/0, + info/1, info/2, info_all/1, info_all/2, publish/2]). +-export([add_binding/5, delete_binding/5, list_bindings/1]). -export([delete/2]). -export([delete_queue_bindings/1, delete_transient_queue_bindings/1]). --export([check_type/1, assert_type/2]). +-export([assert_equivalence/5]). +-export([assert_args_equivalence/2]). +-export([check_type/1]). %% EXTENDED API -export([list_exchange_bindings/1]). @@ -48,61 +49,93 @@ -import(mnesia). -import(sets). -import(lists). --import(regexp). %%---------------------------------------------------------------------------- -ifdef(use_specs). --type(bind_res() :: 'ok' | {'error', - 'queue_not_found' | - 'exchange_not_found' | - 'exchange_and_queue_not_found'}). +-export_type([name/0, type/0, binding_key/0]). + +-type(name() :: rabbit_types:r('exchange')). +-type(type() :: atom()). +-type(binding_key() :: binary()). + +-type(bind_res() :: rabbit_types:ok_or_error('queue_not_found' | + 'exchange_not_found' | + 'exchange_and_queue_not_found')). +-type(inner_fun() :: + fun((rabbit_types:exchange(), queue()) -> + rabbit_types:ok_or_error(rabbit_types:amqp_error()))). + -spec(recover/0 :: () -> 'ok'). --spec(declare/5 :: (exchange_name(), exchange_type(), boolean(), boolean(), - amqp_table()) -> exchange()). +-spec(declare/5 :: + (name(), type(), boolean(), boolean(), rabbit_framing:amqp_table()) + -> rabbit_types:exchange()). -spec(check_type/1 :: (binary()) -> atom()). --spec(assert_type/2 :: (exchange(), atom()) -> 'ok'). --spec(lookup/1 :: (exchange_name()) -> {'ok', exchange()} | not_found()). --spec(lookup_or_die/1 :: (exchange_name()) -> exchange()). --spec(list/1 :: (vhost()) -> [exchange()]). --spec(info_keys/0 :: () -> [info_key()]). --spec(info/1 :: (exchange()) -> [info()]). --spec(info/2 :: (exchange(), [info_key()]) -> [info()]). --spec(info_all/1 :: (vhost()) -> [[info()]]). --spec(info_all/2 :: (vhost(), [info_key()]) -> [[info()]]). --spec(publish/2 :: (exchange(), delivery()) -> {routing_result(), [pid()]}). --spec(add_binding/4 :: - (exchange_name(), queue_name(), routing_key(), amqp_table()) -> - bind_res() | {'error', 'durability_settings_incompatible'}). --spec(delete_binding/4 :: - (exchange_name(), queue_name(), routing_key(), amqp_table()) -> - bind_res() | {'error', 'binding_not_found'}). --spec(list_bindings/1 :: (vhost()) -> - [{exchange_name(), queue_name(), routing_key(), amqp_table()}]). --spec(delete_queue_bindings/1 :: (queue_name()) -> fun(() -> none())). --spec(delete_transient_queue_bindings/1 :: (queue_name()) -> fun(() -> none())). --spec(delete/2 :: (exchange_name(), boolean()) -> - 'ok' | not_found() | {'error', 'in_use'}). --spec(list_queue_bindings/1 :: (queue_name()) -> - [{exchange_name(), routing_key(), amqp_table()}]). --spec(list_exchange_bindings/1 :: (exchange_name()) -> - [{queue_name(), routing_key(), amqp_table()}]). +-spec(assert_equivalence/5 :: + (rabbit_types:exchange(), atom(), boolean(), boolean(), + rabbit_framing:amqp_table()) + -> 'ok' | no_return()). +-spec(assert_args_equivalence/2 :: + (rabbit_types:exchange(), rabbit_framing:amqp_table()) -> + 'ok' | no_return()). +-spec(lookup/1 :: + (name()) -> rabbit_types:ok(rabbit_types:exchange()) | + rabbit_types:error('not_found')). +-spec(lookup_or_die/1 :: (name()) -> rabbit_types:exchange()). +-spec(list/1 :: (rabbit_types:vhost()) -> [rabbit_types:exchange()]). +-spec(info_keys/0 :: () -> [rabbit_types:info_key()]). +-spec(info/1 :: (rabbit_types:exchange()) -> [rabbit_types:info()]). +-spec(info/2 :: + (rabbit_types:exchange(), [rabbit_types:info_key()]) + -> [rabbit_types:info()]). +-spec(info_all/1 :: (rabbit_types:vhost()) -> [[rabbit_types:info()]]). +-spec(info_all/2 ::(rabbit_types:vhost(), [rabbit_types:info_key()]) + -> [[rabbit_types:info()]]). +-spec(publish/2 :: (rabbit_types:exchange(), rabbit_types:delivery()) + -> {rabbit_router:routing_result(), [pid()]}). +-spec(add_binding/5 :: + (name(), rabbit_amqqueue:name(), rabbit_router:routing_key(), + rabbit_framing:amqp_table(), inner_fun()) + -> bind_res()). +-spec(delete_binding/5 :: + (name(), rabbit_amqqueue:name(), rabbit_router:routing_key(), + rabbit_framing:amqp_table(), inner_fun()) + -> bind_res() | rabbit_types:error('binding_not_found')). +-spec(list_bindings/1 :: + (rabbit_types:vhost()) + -> [{name(), rabbit_amqqueue:name(), rabbit_router:routing_key(), + rabbit_framing:amqp_table()}]). +-spec(delete_queue_bindings/1 :: + (rabbit_amqqueue:name()) -> fun (() -> none())). +-spec(delete_transient_queue_bindings/1 :: + (rabbit_amqqueue:name()) -> fun (() -> none())). +-spec(delete/2 :: + (name(), boolean())-> 'ok' | + rabbit_types:error('not_found') | + rabbit_types:error('in_use')). +-spec(list_queue_bindings/1 :: + (rabbit_amqqueue:name()) + -> [{name(), rabbit_router:routing_key(), + rabbit_framing:amqp_table()}]). +-spec(list_exchange_bindings/1 :: + (name()) -> [{rabbit_amqqueue:name(), rabbit_router:routing_key(), + rabbit_framing:amqp_table()}]). -endif. %%---------------------------------------------------------------------------- --define(INFO_KEYS, [name, type, durable, auto_delete, arguments]. +-define(INFO_KEYS, [name, type, durable, auto_delete, arguments]). recover() -> Exs = rabbit_misc:table_fold( - fun(Exchange, Acc) -> + fun (Exchange, Acc) -> ok = mnesia:write(rabbit_exchange, Exchange, write), [Exchange | Acc] end, [], rabbit_durable_exchange), Bs = rabbit_misc:table_fold( - fun(Route = #route{binding = B}, Acc) -> + fun (Route = #route{binding = B}, Acc) -> {_, ReverseRoute} = route_with_reverse(Route), ok = mnesia:write(rabbit_route, Route, write), @@ -182,13 +215,26 @@ check_type(TypeBin) -> T end. -assert_type(#exchange{ type = ActualType }, RequiredType) - when ActualType == RequiredType -> - ok; -assert_type(#exchange{ name = Name, type = ActualType }, RequiredType) -> +assert_equivalence(X = #exchange{ durable = Durable, + auto_delete = AutoDelete, + type = Type}, + Type, Durable, AutoDelete, RequiredArgs) -> + (type_to_module(Type)):assert_args_equivalence(X, RequiredArgs); +assert_equivalence(#exchange{ name = Name }, _Type, _Durable, _AutoDelete, + _Args) -> rabbit_misc:protocol_error( - not_allowed, "cannot redeclare ~s of type '~s' with type '~s'", - [rabbit_misc:rs(Name), ActualType, RequiredType]). + not_allowed, + "cannot redeclare ~s with different type, durable or autodelete value", + [rabbit_misc:rs(Name)]). + +assert_args_equivalence(#exchange{ name = Name, + arguments = Args }, + RequiredArgs) -> + %% The spec says "Arguments are compared for semantic + %% equivalence". The only arg we care about is + %% "alternate-exchange". + rabbit_misc:assert_args_equivalence(Args, RequiredArgs, Name, + [<<"alternate-exchange">>]). lookup(Name) -> rabbit_misc:dirty_read({rabbit_exchange, Name}). @@ -305,7 +351,7 @@ delete_queue_bindings(QueueName, FwdDeleteFun) -> Module = type_to_module(Type), case IsDeleted of auto_deleted -> Module:delete(X, Bs); - no_delete -> Module:remove_bindings(X, Bs) + not_deleted -> Module:remove_bindings(X, Bs) end end, Cleanup) end. @@ -332,7 +378,6 @@ cleanup_deleted_queue_bindings1(ExchangeName, Bindings) -> [X] = mnesia:read({rabbit_exchange, ExchangeName}), {maybe_auto_delete(X), Bindings}. - delete_forward_routes(Route) -> ok = mnesia:delete_object(rabbit_route, Route, write), ok = mnesia:delete_object(rabbit_durable_route, Route, write). @@ -349,7 +394,7 @@ continue({[], Continuation}) -> continue(mnesia:select(Continuation)). call_with_exchange(Exchange, Fun) -> rabbit_misc:execute_mnesia_transaction( - fun() -> case mnesia:read({rabbit_exchange, Exchange}) of + fun () -> case mnesia:read({rabbit_exchange, Exchange}) of [] -> {error, not_found}; [X] -> Fun(X) end @@ -357,7 +402,7 @@ call_with_exchange(Exchange, Fun) -> call_with_exchange_and_queue(Exchange, Queue, Fun) -> rabbit_misc:execute_mnesia_transaction( - fun() -> case {mnesia:read({rabbit_exchange, Exchange}), + fun () -> case {mnesia:read({rabbit_exchange, Exchange}), mnesia:read({rabbit_queue, Queue})} of {[X], [Q]} -> Fun(X, Q); {[ ], [_]} -> {error, exchange_not_found}; @@ -366,50 +411,66 @@ call_with_exchange_and_queue(Exchange, Queue, Fun) -> end end). -add_binding(ExchangeName, QueueName, RoutingKey, Arguments) -> +add_binding(ExchangeName, QueueName, RoutingKey, Arguments, InnerFun) -> case binding_action( ExchangeName, QueueName, RoutingKey, Arguments, fun (X, Q, B) -> - if Q#amqqueue.durable and not(X#exchange.durable) -> - {error, durability_settings_incompatible}; - true -> + %% this argument is used to check queue exclusivity; + %% in general, we want to fail on that in preference to + %% anything else + case InnerFun(X, Q) of + ok -> case mnesia:read({rabbit_route, B}) of [] -> - sync_binding(B, Q#amqqueue.durable, - fun mnesia:write/3), + ok = sync_binding(B, + X#exchange.durable andalso + Q#amqqueue.durable, + fun mnesia:write/3), {new, X, B}; [_R] -> {existing, X, B} - end + end; + {error, _} = E -> + E end end) of {new, Exchange = #exchange{ type = Type }, Binding} -> (type_to_module(Type)):add_binding(Exchange, Binding); {existing, _, _} -> ok; - Err = {error, _} -> + {error, _} = Err -> Err end. -delete_binding(ExchangeName, QueueName, RoutingKey, Arguments) -> +delete_binding(ExchangeName, QueueName, RoutingKey, Arguments, InnerFun) -> case binding_action( ExchangeName, QueueName, RoutingKey, Arguments, fun (X, Q, B) -> case mnesia:match_object(rabbit_route, #route{binding = B}, write) of - [] -> {error, binding_not_found}; - _ -> ok = sync_binding(B, Q#amqqueue.durable, - fun mnesia:delete_object/3), - {maybe_auto_delete(X), B} + [] -> + {error, binding_not_found}; + _ -> + case InnerFun(X, Q) of + ok -> + ok = + sync_binding(B, + X#exchange.durable andalso + Q#amqqueue.durable, + fun mnesia:delete_object/3), + {maybe_auto_delete(X), B}; + {error, _} = E -> + E + end end end) of - Err = {error, _} -> + {error, _} = Err -> Err; - {{Action, X = #exchange{ type = Type }}, B} -> + {{IsDeleted, X = #exchange{ type = Type }}, B} -> Module = type_to_module(Type), - case Action of - auto_delete -> Module:delete(X, [B]); - no_delete -> Module:remove_bindings(X, [B]) + case IsDeleted of + auto_deleted -> Module:delete(X, [B]); + not_deleted -> Module:remove_bindings(X, [B]) end end. @@ -493,10 +554,10 @@ delete(ExchangeName, IfUnused) -> end. maybe_auto_delete(Exchange = #exchange{auto_delete = false}) -> - {no_delete, Exchange}; + {not_deleted, Exchange}; maybe_auto_delete(Exchange = #exchange{auto_delete = true}) -> case conditional_delete(Exchange) of - {error, in_use} -> {no_delete, Exchange}; + {error, in_use} -> {not_deleted, Exchange}; {deleted, Exchange, []} -> {auto_deleted, Exchange} end. diff --git a/src/rabbit_exchange_type.erl b/src/rabbit_exchange_type.erl index a8c071e6..85760edc 100644 --- a/src/rabbit_exchange_type.erl +++ b/src/rabbit_exchange_type.erl @@ -18,11 +18,11 @@ %% are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial %% Technologies LLC, and Rabbit Technologies Ltd. %% -%% Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift +%% Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift %% Ltd. Portions created by Cohesive Financial Technologies LLC are -%% Copyright (C) 2007-2009 Cohesive Financial Technologies +%% Copyright (C) 2007-2010 Cohesive Financial Technologies %% LLC. Portions created by Rabbit Technologies Ltd are Copyright -%% (C) 2007-2009 Rabbit Technologies Ltd. +%% (C) 2007-2010 Rabbit Technologies Ltd. %% %% All Rights Reserved. %% @@ -54,7 +54,11 @@ behaviour_info(callbacks) -> {add_binding, 2}, %% called after bindings have been deleted. - {remove_bindings, 2} + {remove_bindings, 2}, + + %% called when comparing exchanges for equivalence - should return ok or + %% exit with #amqp_error{} + {assert_args_equivalence, 2} ]; behaviour_info(_Other) -> diff --git a/src/rabbit_exchange_type_direct.erl b/src/rabbit_exchange_type_direct.erl index 9b71e0e1..4f6eb851 100644 --- a/src/rabbit_exchange_type_direct.erl +++ b/src/rabbit_exchange_type_direct.erl @@ -18,11 +18,11 @@ %% are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial %% Technologies LLC, and Rabbit Technologies Ltd. %% -%% Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift +%% Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift %% Ltd. Portions created by Cohesive Financial Technologies LLC are -%% Copyright (C) 2007-2009 Cohesive Financial Technologies +%% Copyright (C) 2007-2010 Cohesive Financial Technologies %% LLC. Portions created by Rabbit Technologies Ltd are Copyright -%% (C) 2007-2009 Rabbit Technologies Ltd. +%% (C) 2007-2010 Rabbit Technologies Ltd. %% %% All Rights Reserved. %% @@ -36,7 +36,7 @@ -export([description/0, publish/2]). -export([validate/1, create/1, recover/2, delete/2, - add_binding/2, remove_bindings/2]). + add_binding/2, remove_bindings/2, assert_args_equivalence/2]). -include("rabbit_exchange_type_spec.hrl"). -rabbit_boot_step({?MODULE, @@ -61,3 +61,5 @@ recover(_X, _Bs) -> ok. delete(_X, _Bs) -> ok. add_binding(_X, _B) -> ok. remove_bindings(_X, _Bs) -> ok. +assert_args_equivalence(X, Args) -> + rabbit_exchange:assert_args_equivalence(X, Args). diff --git a/src/rabbit_exchange_type_fanout.erl b/src/rabbit_exchange_type_fanout.erl index 311654ab..94798c78 100644 --- a/src/rabbit_exchange_type_fanout.erl +++ b/src/rabbit_exchange_type_fanout.erl @@ -18,11 +18,11 @@ %% are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial %% Technologies LLC, and Rabbit Technologies Ltd. %% -%% Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift +%% Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift %% Ltd. Portions created by Cohesive Financial Technologies LLC are -%% Copyright (C) 2007-2009 Cohesive Financial Technologies +%% Copyright (C) 2007-2010 Cohesive Financial Technologies %% LLC. Portions created by Rabbit Technologies Ltd are Copyright -%% (C) 2007-2009 Rabbit Technologies Ltd. +%% (C) 2007-2010 Rabbit Technologies Ltd. %% %% All Rights Reserved. %% @@ -35,8 +35,8 @@ -behaviour(rabbit_exchange_type). -export([description/0, publish/2]). --export([validate/1, create/1, recover/2, delete/2, - add_binding/2, remove_bindings/2]). +-export([validate/1, create/1, recover/2, delete/2, add_binding/2, + remove_bindings/2, assert_args_equivalence/2]). -include("rabbit_exchange_type_spec.hrl"). -rabbit_boot_step({?MODULE, @@ -59,3 +59,5 @@ recover(_X, _Bs) -> ok. delete(_X, _Bs) -> ok. add_binding(_X, _B) -> ok. remove_bindings(_X, _Bs) -> ok. +assert_args_equivalence(X, Args) -> + rabbit_exchange:assert_args_equivalence(X, Args). diff --git a/src/rabbit_exchange_type_headers.erl b/src/rabbit_exchange_type_headers.erl index 285dab1a..44607398 100644 --- a/src/rabbit_exchange_type_headers.erl +++ b/src/rabbit_exchange_type_headers.erl @@ -18,11 +18,11 @@ %% are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial %% Technologies LLC, and Rabbit Technologies Ltd. %% -%% Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift +%% Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift %% Ltd. Portions created by Cohesive Financial Technologies LLC are -%% Copyright (C) 2007-2009 Cohesive Financial Technologies +%% Copyright (C) 2007-2010 Cohesive Financial Technologies %% LLC. Portions created by Rabbit Technologies Ltd are Copyright -%% (C) 2007-2009 Rabbit Technologies Ltd. +%% (C) 2007-2010 Rabbit Technologies Ltd. %% %% All Rights Reserved. %% @@ -36,8 +36,8 @@ -behaviour(rabbit_exchange_type). -export([description/0, publish/2]). --export([validate/1, create/1, recover/2, delete/2, - add_binding/2, remove_bindings/2]). +-export([validate/1, create/1, recover/2, delete/2, add_binding/2, + remove_bindings/2, assert_args_equivalence/2]). -include("rabbit_exchange_type_spec.hrl"). -rabbit_boot_step({?MODULE, @@ -48,7 +48,8 @@ {enables, kernel_ready}]}). -ifdef(use_specs). --spec(headers_match/2 :: (amqp_table(), amqp_table()) -> boolean()). +-spec(headers_match/2 :: (rabbit_framing:amqp_table(), + rabbit_framing:amqp_table()) -> boolean()). -endif. description() -> @@ -135,3 +136,5 @@ recover(_X, _Bs) -> ok. delete(_X, _Bs) -> ok. add_binding(_X, _B) -> ok. remove_bindings(_X, _Bs) -> ok. +assert_args_equivalence(X, Args) -> + rabbit_exchange:assert_args_equivalence(X, Args). diff --git a/src/rabbit_exchange_type_registry.erl b/src/rabbit_exchange_type_registry.erl index 175d15ad..7906fbee 100644 --- a/src/rabbit_exchange_type_registry.erl +++ b/src/rabbit_exchange_type_registry.erl @@ -18,11 +18,11 @@ %% are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial %% Technologies LLC, and Rabbit Technologies Ltd. %% -%% Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift +%% Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift %% Ltd. Portions created by Cohesive Financial Technologies LLC are -%% Copyright (C) 2007-2009 Cohesive Financial Technologies +%% Copyright (C) 2007-2010 Cohesive Financial Technologies %% LLC. Portions created by Rabbit Technologies Ltd are Copyright -%% (C) 2007-2009 Rabbit Technologies Ltd. +%% (C) 2007-2010 Rabbit Technologies Ltd. %% %% All Rights Reserved. %% @@ -35,8 +35,8 @@ -export([start_link/0]). --export([init/1, handle_call/3, handle_cast/2, handle_info/2, - terminate/2, code_change/3]). +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, + code_change/3]). -export([register/2, binary_to_type/1, lookup_module/1]). @@ -45,10 +45,13 @@ -ifdef(use_specs). --spec(start_link/0 :: () -> 'ignore' | {'error', term()} | {'ok', pid()}). +-spec(start_link/0 :: + () -> 'ignore' | rabbit_types:ok_or_error2(pid(), term())). -spec(register/2 :: (binary(), atom()) -> 'ok'). --spec(binary_to_type/1 :: (binary()) -> atom() | {'error', 'not_found'}). --spec(lookup_module/1 :: (atom()) -> {'ok', atom()} | {'error', 'not_found'}). +-spec(binary_to_type/1 :: + (binary()) -> atom() | rabbit_types:error('not_found')). +-spec(lookup_module/1 :: + (atom()) -> rabbit_types:ok_or_error2(atom(), 'not_found')). -endif. diff --git a/src/rabbit_exchange_type_topic.erl b/src/rabbit_exchange_type_topic.erl index 8a3dceea..89b2441e 100644 --- a/src/rabbit_exchange_type_topic.erl +++ b/src/rabbit_exchange_type_topic.erl @@ -18,11 +18,11 @@ %% are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial %% Technologies LLC, and Rabbit Technologies Ltd. %% -%% Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift +%% Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift %% Ltd. Portions created by Cohesive Financial Technologies LLC are -%% Copyright (C) 2007-2009 Cohesive Financial Technologies +%% Copyright (C) 2007-2010 Cohesive Financial Technologies %% LLC. Portions created by Rabbit Technologies Ltd are Copyright -%% (C) 2007-2009 Rabbit Technologies Ltd. +%% (C) 2007-2010 Rabbit Technologies Ltd. %% %% All Rights Reserved. %% @@ -35,8 +35,8 @@ -behaviour(rabbit_exchange_type). -export([description/0, publish/2]). --export([validate/1, create/1, recover/2, delete/2, - add_binding/2, remove_bindings/2]). +-export([validate/1, create/1, recover/2, delete/2, add_binding/2, + remove_bindings/2, assert_args_equivalence/2]). -include("rabbit_exchange_type_spec.hrl"). -rabbit_boot_step({?MODULE, @@ -49,7 +49,9 @@ -export([topic_matches/2]). -ifdef(use_specs). + -spec(topic_matches/2 :: (binary(), binary()) -> boolean()). + -endif. description() -> @@ -65,8 +67,7 @@ publish(#exchange{name = Name}, Delivery = Delivery). split_topic_key(Key) -> - {ok, KeySplit} = regexp:split(binary_to_list(Key), "\\."), - KeySplit. + re:split(Key, "\\.", [{return, list}]). topic_matches(PatternKey, RoutingKey) -> P = split_topic_key(PatternKey), @@ -99,3 +100,5 @@ recover(_X, _Bs) -> ok. delete(_X, _Bs) -> ok. add_binding(_X, _B) -> ok. remove_bindings(_X, _Bs) -> ok. +assert_args_equivalence(X, Args) -> + rabbit_exchange:assert_args_equivalence(X, Args). diff --git a/src/rabbit_framing_channel.erl b/src/rabbit_framing_channel.erl index b7c6aa96..00b74ad0 100644 --- a/src/rabbit_framing_channel.erl +++ b/src/rabbit_framing_channel.erl @@ -32,21 +32,22 @@ -module(rabbit_framing_channel). -include("rabbit.hrl"). --export([start_link/2, process/2, shutdown/1]). +-export([start_link/3, process/2, shutdown/1]). %% internal --export([mainloop/1]). +-export([mainloop/2]). %%-------------------------------------------------------------------- -start_link(StartFun, StartArgs) -> - spawn_link( - fun () -> - %% we trap exits so that a normal termination of the - %% channel or reader process terminates us too. - process_flag(trap_exit, true), - mainloop(apply(StartFun, StartArgs)) - end). +start_link(StartFun, StartArgs, Protocol) -> + {ok, spawn_link( + fun () -> + %% we trap exits so that a normal termination of + %% the channel or reader process terminates us too. + process_flag(trap_exit, true), + {ok, ChannelPid} = apply(StartFun, StartArgs), + mainloop(ChannelPid, Protocol) + end)}. process(Pid, Frame) -> Pid ! {frame, Frame}, @@ -72,39 +73,42 @@ read_frame(ChannelPid) -> Msg -> exit({unexpected_message, Msg}) end. -mainloop(ChannelPid) -> - {method, MethodName, FieldsBin} = read_frame(ChannelPid), - Method = rabbit_framing:decode_method_fields(MethodName, FieldsBin), - case rabbit_framing:method_has_content(MethodName) of - true -> rabbit_channel:do(ChannelPid, Method, - collect_content(ChannelPid, MethodName)); - false -> rabbit_channel:do(ChannelPid, Method) - end, - ?MODULE:mainloop(ChannelPid). +mainloop(ChannelPid, Protocol) -> + case read_frame(ChannelPid) of + {method, MethodName, FieldsBin} -> + Method = Protocol:decode_method_fields(MethodName, FieldsBin), + case Protocol:method_has_content(MethodName) of + true -> {ClassId, _MethodId} = Protocol:method_id(MethodName), + rabbit_channel:do(ChannelPid, Method, + collect_content(ChannelPid, + ClassId, + Protocol)); + false -> rabbit_channel:do(ChannelPid, Method) + end, + ?MODULE:mainloop(ChannelPid, Protocol); + _ -> + unexpected_frame("expected method frame, " + "got non method frame instead", + []) + end. -collect_content(ChannelPid, MethodName) -> - {ClassId, _MethodId} = rabbit_framing:method_id(MethodName), +collect_content(ChannelPid, ClassId, Protocol) -> case read_frame(ChannelPid) of - {content_header, HeaderClassId, 0, BodySize, PropertiesBin} -> - if HeaderClassId == ClassId -> - Payload = collect_content_payload(ChannelPid, BodySize, []), - #content{class_id = ClassId, - properties = none, - properties_bin = PropertiesBin, - payload_fragments_rev = Payload}; - true -> - rabbit_misc:protocol_error( - command_invalid, - "expected content header for class ~w, " - "got one for class ~w instead", - [ClassId, HeaderClassId]) - end; + {content_header, ClassId, 0, BodySize, PropertiesBin} -> + Payload = collect_content_payload(ChannelPid, BodySize, []), + #content{class_id = ClassId, + properties = none, + properties_bin = PropertiesBin, + protocol = Protocol, + payload_fragments_rev = Payload}; + {content_header, HeaderClassId, 0, _BodySize, _PropertiesBin} -> + unexpected_frame("expected content header for class ~w, " + "got one for class ~w instead", + [ClassId, HeaderClassId]); _ -> - rabbit_misc:protocol_error( - command_invalid, - "expected content header for class ~w, " - "got non content header frame instead", - [ClassId]) + unexpected_frame("expected content header for class ~w, " + "got non content header frame instead", + [ClassId]) end. collect_content_payload(_ChannelPid, 0, Acc) -> @@ -116,8 +120,10 @@ collect_content_payload(ChannelPid, RemainingByteCount, Acc) -> RemainingByteCount - size(FragmentBin), [FragmentBin | Acc]); _ -> - rabbit_misc:protocol_error( - command_invalid, - "expected content body, got non content body frame instead", - []) + unexpected_frame("expected content body, " + "got non content body frame instead", + []) end. + +unexpected_frame(ExplanationFormat, Params) -> + rabbit_misc:protocol_error(unexpected_frame, ExplanationFormat, Params). diff --git a/src/rabbit_guid.erl b/src/rabbit_guid.erl index 1ae8f7da..af1c629f 100644 --- a/src/rabbit_guid.erl +++ b/src/rabbit_guid.erl @@ -31,15 +31,13 @@ -module(rabbit_guid). --include("rabbit.hrl"). - -behaviour(gen_server). -export([start_link/0]). -export([guid/0, string_guid/1, binstring_guid/1]). --export([init/1, handle_call/3, handle_cast/2, handle_info/2, - terminate/2, code_change/3]). +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, + code_change/3]). -define(SERVER, ?MODULE). -define(SERIAL_FILENAME, "rabbit_serial"). @@ -50,7 +48,11 @@ -ifdef(use_specs). --spec(start_link/0 :: () -> {'ok', pid()} | 'ignore' | {'error', any()}). +-export_type([guid/0]). + +-type(guid() :: binary()). + +-spec(start_link/0 :: () -> 'ignore' | rabbit_types:ok_or_error2(pid(), any())). -spec(guid/0 :: () -> guid()). -spec(string_guid/1 :: (any()) -> string()). -spec(binstring_guid/1 :: (any()) -> binary()). diff --git a/src/rabbit_heartbeat.erl b/src/rabbit_heartbeat.erl index 45565705..1989fb7b 100644 --- a/src/rabbit_heartbeat.erl +++ b/src/rabbit_heartbeat.erl @@ -33,68 +33,72 @@ -export([start_heartbeat/2]). +%%---------------------------------------------------------------------------- + +-ifdef(use_specs). + +-spec(start_heartbeat/2 :: (rabbit_net:socket(), non_neg_integer()) -> + rabbit_types:maybe({pid(), pid()})). + +-endif. + +%%---------------------------------------------------------------------------- + start_heartbeat(_Sock, 0) -> none; start_heartbeat(Sock, TimeoutSec) -> Parent = self(), - %% we check for incoming data every interval, and time out after - %% two checks with no change. As a result we will time out between - %% 2 and 3 intervals after the last data has been received. - spawn_link(fun () -> heartbeater(Sock, TimeoutSec * 1000, - recv_oct, 1, - fun () -> - Parent ! timeout, - stop - end, - erlang:monitor(process, Parent)) end), %% the 'div 2' is there so that we don't end up waiting for nearly %% 2 * TimeoutSec before sending a heartbeat in the boundary case %% where the last message was sent just after a heartbeat. - spawn_link(fun () -> heartbeater(Sock, TimeoutSec * 1000 div 2, - send_oct, 0, - fun () -> - catch rabbit_net:send(Sock, rabbit_binary_generator:build_heartbeat_frame()), - continue - end, - erlang:monitor(process, Parent)) end), - ok. + Sender = + spawn_link(fun () -> heartbeater({Sock, TimeoutSec * 1000 div 2, + send_oct, 0, + fun () -> + catch rabbit_net:send(Sock, rabbit_binary_generator:build_heartbeat_frame()), + continue + end}, Parent) end), + %% we check for incoming data every interval, and time out after + %% two checks with no change. As a result we will time out between + %% 2 and 3 intervals after the last data has been received. + Receiver = + spawn_link(fun () -> heartbeater({Sock, TimeoutSec * 1000, + recv_oct, 1, + fun () -> + Parent ! timeout, + stop + end}, Parent) end), + {Sender, Receiver}. -%% Y-combinator, posted by Vladimir Sekissov to the Erlang mailing list -%% http://www.erlang.org/ml-archive/erlang-questions/200301/msg00053.html -y(X) -> - F = fun (P) -> X(fun (A) -> (P(P))(A) end) end, - F(F). +heartbeater(Params, Parent) -> + heartbeater(Params, erlang:monitor(process, Parent), {0, 0}). -heartbeater(Sock, TimeoutMillisec, StatName, Threshold, Handler, MonitorRef) -> - Heartbeat = - fun (F) -> - fun ({StatVal, SameCount}) -> - receive - {'DOWN', MonitorRef, process, _Object, _Info} -> ok; - Other -> exit({unexpected_message, Other}) - after TimeoutMillisec -> - case rabbit_net:getstat(Sock, [StatName]) of - {ok, [{StatName, NewStatVal}]} -> - if NewStatVal =/= StatVal -> - F({NewStatVal, 0}); - SameCount < Threshold -> - F({NewStatVal, SameCount + 1}); - true -> - case Handler() of - stop -> ok; - continue -> F({NewStatVal, 0}) - end - end; - {error, einval} -> - %% the socket is dead, most - %% likely because the - %% connection is being shut - %% down -> terminate - ok; - {error, Reason} -> - exit({cannot_get_socket_stats, Reason}) - end - end - end - end, - (y(Heartbeat))({0, 0}). +heartbeater({Sock, TimeoutMillisec, StatName, Threshold, Handler} = Params, + MonitorRef, {StatVal, SameCount}) -> + receive + {'DOWN', MonitorRef, process, _Object, _Info} -> + ok; + Other -> + exit({unexpected_message, Other}) + after TimeoutMillisec -> + case rabbit_net:getstat(Sock, [StatName]) of + {ok, [{StatName, NewStatVal}]} -> + Recurse = fun (V) -> heartbeater(Params, MonitorRef, V) end, + if NewStatVal =/= StatVal -> + Recurse({NewStatVal, 0}); + SameCount < Threshold -> + Recurse({NewStatVal, SameCount + 1}); + true -> + case Handler() of + stop -> ok; + continue -> Recurse({NewStatVal, 0}) + end + end; + {error, einval} -> + %% the socket is dead, most likely because the + %% connection is being shut down -> terminate + ok; + {error, Reason} -> + exit({cannot_get_socket_stats, Reason}) + end + end. diff --git a/src/rabbit_invariable_queue.erl b/src/rabbit_invariable_queue.erl index b4fd9156..4e0dad84 100644 --- a/src/rabbit_invariable_queue.erl +++ b/src/rabbit_invariable_queue.erl @@ -34,25 +34,25 @@ -export([init/3, terminate/1, delete_and_terminate/1, purge/1, publish/2, publish_delivered/3, fetch/2, ack/2, tx_publish/3, tx_ack/3, tx_rollback/2, tx_commit/3, requeue/2, len/1, is_empty/1, - set_ram_duration_target/2, ram_duration/1, needs_sync/1, sync/1, - handle_pre_hibernate/1, status/1]). + set_ram_duration_target/2, ram_duration/1, needs_idle_timeout/1, + idle_timeout/1, handle_pre_hibernate/1, status/1]). --export([start/1]). +-export([start/1, stop/0]). -behaviour(rabbit_backing_queue). -include("rabbit.hrl"). --record(iv_state, { queue, qname, len, pending_ack }). +-record(iv_state, { queue, qname, durable, len, pending_ack }). -record(tx, { pending_messages, pending_acks, is_persistent }). -ifdef(use_specs). --type(ack() :: guid() | 'blank_ack'). +-type(ack() :: rabbit_guid:guid() | 'blank_ack'). -type(state() :: #iv_state { queue :: queue(), - qname :: queue_name(), + qname :: rabbit_amqqueue:name(), len :: non_neg_integer(), - pending_ack :: dict() + pending_ack :: dict:dictionary() }). -include("rabbit_backing_queue_spec.hrl"). @@ -61,23 +61,31 @@ start(DurableQueues) -> ok = rabbit_sup:start_child(rabbit_persister, [DurableQueues]). +stop() -> + ok = rabbit_sup:stop_child(rabbit_persister). + init(QName, IsDurable, Recover) -> Q = queue:from_list(case IsDurable andalso Recover of true -> rabbit_persister:queue_content(QName); false -> [] end), - #iv_state { queue = Q, qname = QName, len = queue:len(Q), + #iv_state { queue = Q, + qname = QName, + durable = IsDurable, + len = queue:len(Q), pending_ack = dict:new() }. terminate(State) -> State #iv_state { queue = queue:new(), len = 0, pending_ack = dict:new() }. -delete_and_terminate(State = #iv_state { qname = QName, pending_ack = PA }) -> - ok = persist_acks(none, QName, dict:fetch_keys(PA), PA), +delete_and_terminate(State = #iv_state { qname = QName, durable = IsDurable, + pending_ack = PA }) -> + ok = persist_acks(QName, IsDurable, none, dict:fetch_keys(PA), PA), {_PLen, State1} = purge(State), terminate(State1). -purge(State = #iv_state { len = Len, queue = Q, qname = QName }) -> +purge(State = #iv_state { queue = Q, qname = QName, durable = IsDurable, + len = Len }) -> %% We do not purge messages pending acks. {AckTags, PA} = rabbit_misc:queue_fold( @@ -85,57 +93,63 @@ purge(State = #iv_state { len = Len, queue = Q, qname = QName }) -> Acc; ({Msg = #basic_message { guid = Guid }, IsDelivered}, {AckTagsN, PAN}) -> - ok = persist_delivery(QName, Msg, IsDelivered), + ok = persist_delivery(QName, IsDurable, IsDelivered, Msg), {[Guid | AckTagsN], dict:store(Guid, Msg, PAN)} end, {[], dict:new()}, Q), - ok = persist_acks(none, QName, AckTags, PA), + ok = persist_acks(QName, IsDurable, none, AckTags, PA), {Len, State #iv_state { len = 0, queue = queue:new() }}. -publish(Msg, State = #iv_state { queue = Q, qname = QName, len = Len }) -> - ok = persist_message(none, QName, Msg), +publish(Msg, State = #iv_state { queue = Q, qname = QName, durable = IsDurable, + len = Len }) -> + ok = persist_message(QName, IsDurable, none, Msg), State #iv_state { queue = queue:in({Msg, false}, Q), len = Len + 1 }. publish_delivered(false, _Msg, State) -> {blank_ack, State}; publish_delivered(true, Msg = #basic_message { guid = Guid }, - State = #iv_state { qname = QName, len = 0, - pending_ack = PA }) -> - ok = persist_message(none, QName, Msg), - ok = persist_delivery(QName, Msg, false), + State = #iv_state { qname = QName, durable = IsDurable, + len = 0, pending_ack = PA }) -> + ok = persist_message(QName, IsDurable, none, Msg), + ok = persist_delivery(QName, IsDurable, false, Msg), {Guid, State #iv_state { pending_ack = dict:store(Guid, Msg, PA) }}. fetch(_AckRequired, State = #iv_state { len = 0 }) -> {empty, State}; -fetch(AckRequired, State = #iv_state { queue = Q, qname = QName, len = Len, +fetch(AckRequired, State = #iv_state { len = Len, queue = Q, qname = QName, + durable = IsDurable, pending_ack = PA }) -> {{value, {Msg = #basic_message { guid = Guid }, IsDelivered}}, Q1} = queue:out(Q), Len1 = Len - 1, - ok = persist_delivery(QName, Msg, IsDelivered), + ok = persist_delivery(QName, IsDurable, IsDelivered, Msg), PA1 = dict:store(Guid, Msg, PA), {AckTag, PA2} = case AckRequired of true -> {Guid, PA1}; - false -> ok = persist_acks(none, QName, [Guid], PA1), + false -> ok = persist_acks(QName, IsDurable, none, + [Guid], PA1), {blank_ack, PA} end, {{Msg, IsDelivered, AckTag, Len1}, State #iv_state { queue = Q1, len = Len1, pending_ack = PA2 }}. -ack(AckTags, State = #iv_state { qname = QName, pending_ack = PA }) -> - ok = persist_acks(none, QName, AckTags, PA), +ack(AckTags, State = #iv_state { qname = QName, durable = IsDurable, + pending_ack = PA }) -> + ok = persist_acks(QName, IsDurable, none, AckTags, PA), PA1 = remove_acks(AckTags, PA), State #iv_state { pending_ack = PA1 }. -tx_publish(Txn, Msg, State = #iv_state { qname = QName }) -> +tx_publish(Txn, Msg, State = #iv_state { qname = QName, + durable = IsDurable }) -> Tx = #tx { pending_messages = Pubs } = lookup_tx(Txn), store_tx(Txn, Tx #tx { pending_messages = [Msg | Pubs] }), - ok = persist_message(Txn, QName, Msg), + ok = persist_message(QName, IsDurable, Txn, Msg), State. -tx_ack(Txn, AckTags, State = #iv_state { qname = QName, pending_ack = PA }) -> +tx_ack(Txn, AckTags, State = #iv_state { qname = QName, durable = IsDurable, + pending_ack = PA }) -> Tx = #tx { pending_acks = Acks } = lookup_tx(Txn), store_tx(Txn, Tx #tx { pending_acks = [AckTags | Acks] }), - ok = persist_acks(Txn, QName, AckTags, PA), + ok = persist_acks(QName, IsDurable, Txn, AckTags, PA), State. tx_rollback(Txn, State = #iv_state { qname = QName }) -> @@ -186,9 +200,9 @@ set_ram_duration_target(_DurationTarget, State) -> State. ram_duration(State) -> {0, State}. -needs_sync(_State) -> false. +needs_idle_timeout(_State) -> false. -sync(State) -> State. +idle_timeout(State) -> State. handle_pre_hibernate(State) -> State. @@ -228,32 +242,32 @@ do_if_persistent(F, Txn, QName) -> %%---------------------------------------------------------------------------- -persist_message(_Txn, _QName, #basic_message { is_persistent = false }) -> - ok; -persist_message(Txn, QName, Msg) -> +persist_message(QName, true, Txn, Msg = #basic_message { + is_persistent = true }) -> Msg1 = Msg #basic_message { - %% don't persist any recoverable decoded properties, - %% rebuild from properties_bin on restore + %% don't persist any recoverable decoded properties content = rabbit_binary_parser:clear_decoded_content( Msg #basic_message.content)}, persist_work(Txn, QName, - [{publish, Msg1, {QName, Msg1 #basic_message.guid}}]). + [{publish, Msg1, {QName, Msg1 #basic_message.guid}}]); +persist_message(_QName, _IsDurable, _Txn, _Msg) -> + ok. -persist_delivery(_QName, #basic_message { is_persistent = false }, - _IsDelivered) -> - ok; -persist_delivery(_QName, _Message, true) -> - ok; -persist_delivery(QName, #basic_message { guid = Guid }, _IsDelivered) -> - persist_work(none, QName, [{deliver, {QName, Guid}}]). +persist_delivery(QName, true, false, #basic_message { is_persistent = true, + guid = Guid }) -> + persist_work(none, QName, [{deliver, {QName, Guid}}]); +persist_delivery(_QName, _IsDurable, _IsDelivered, _Msg) -> + ok. -persist_acks(Txn, QName, AckTags, PA) -> +persist_acks(QName, true, Txn, AckTags, PA) -> persist_work(Txn, QName, [{ack, {QName, Guid}} || Guid <- AckTags, begin {ok, Msg} = dict:find(Guid, PA), Msg #basic_message.is_persistent - end]). + end]); +persist_acks(_QName, _IsDurable, _Txn, _AckTags, _PA) -> + ok. persist_work(_Txn,_QName, []) -> ok; diff --git a/src/rabbit_limiter.erl b/src/rabbit_limiter.erl index 878af029..813ccc75 100644 --- a/src/rabbit_limiter.erl +++ b/src/rabbit_limiter.erl @@ -45,7 +45,7 @@ -type(maybe_pid() :: pid() | 'undefined'). --spec(start_link/2 :: (pid(), non_neg_integer()) -> pid()). +-spec(start_link/2 :: (pid(), non_neg_integer()) -> rabbit_types:ok(pid())). -spec(shutdown/1 :: (maybe_pid()) -> 'ok'). -spec(limit/2 :: (maybe_pid(), non_neg_integer()) -> 'ok' | 'stopped'). -spec(can_send/3 :: (maybe_pid(), pid(), boolean()) -> boolean()). @@ -74,8 +74,7 @@ %%---------------------------------------------------------------------------- start_link(ChPid, UnackedMsgCount) -> - {ok, Pid} = gen_server2:start_link(?MODULE, [ChPid, UnackedMsgCount], []), - Pid. + gen_server2:start_link(?MODULE, [ChPid, UnackedMsgCount], []). shutdown(undefined) -> ok; diff --git a/src/rabbit_load.erl b/src/rabbit_load.erl index 4f467162..e0457b1e 100644 --- a/src/rabbit_load.erl +++ b/src/rabbit_load.erl @@ -40,11 +40,10 @@ -ifdef(use_specs). --type(erlang_node() :: atom()). --type(load() :: {{non_neg_integer(), integer() | 'unknown'}, erlang_node()}). +-type(load() :: {{non_neg_integer(), integer() | 'unknown'}, node()}). -spec(local_load/0 :: () -> load()). -spec(remote_loads/0 :: () -> [load()]). --spec(pick/0 :: () -> erlang_node()). +-spec(pick/0 :: () -> node()). -endif. diff --git a/src/rabbit_log.erl b/src/rabbit_log.erl index cc80e360..85bcbca0 100644 --- a/src/rabbit_log.erl +++ b/src/rabbit_log.erl @@ -50,7 +50,7 @@ -ifdef(use_specs). --spec(start_link/0 :: () -> {'ok', pid()} | 'ignore' | {'error', any()}). +-spec(start_link/0 :: () -> 'ignore' | rabbit_types:ok_or_error2(pid(), any())). -spec(debug/1 :: (string()) -> 'ok'). -spec(debug/2 :: (string(), [any()]) -> 'ok'). -spec(info/1 :: (string()) -> 'ok'). diff --git a/src/rabbit_memory_monitor.erl b/src/rabbit_memory_monitor.erl index 91e97ffe..bdf38075 100644 --- a/src/rabbit_memory_monitor.erl +++ b/src/rabbit_memory_monitor.erl @@ -86,11 +86,12 @@ -ifdef(use_specs). --spec(start_link/0 :: () -> 'ignore' | {'error', _} | {'ok', pid()}). +-spec(start_link/0 :: () -> 'ignore' | rabbit_types:ok_or_error2(pid(), any())). -spec(update/0 :: () -> 'ok'). -spec(register/2 :: (pid(), {atom(),atom(),[any()]}) -> 'ok'). -spec(deregister/1 :: (pid()) -> 'ok'). --spec(report_ram_duration/2 :: (pid(), float() | 'infinity') -> number()). +-spec(report_ram_duration/2 :: + (pid(), float() | 'infinity') -> number() | 'infinity'). -spec(stop/0 :: () -> 'ok'). -endif. diff --git a/src/rabbit_misc.erl b/src/rabbit_misc.erl index 723b818b..050b499f 100644 --- a/src/rabbit_misc.erl +++ b/src/rabbit_misc.erl @@ -32,14 +32,16 @@ -module(rabbit_misc). -include("rabbit.hrl"). -include("rabbit_framing.hrl"). + -include_lib("kernel/include/file.hrl"). -export([method_record_type/1, polite_pause/0, polite_pause/1]). -export([die/1, frame_error/2, amqp_error/4, - protocol_error/3, protocol_error/4]). --export([not_found/1]). + protocol_error/3, protocol_error/4, protocol_error/1]). +-export([not_found/1, assert_args_equivalence/4]). -export([get_config/1, get_config/2, set_config/2]). -export([dirty_read/1]). +-export([table_lookup/2]). -export([r/3, r/2, r_arg/4, rs/1]). -export([enable_cover/0, report_cover/0]). -export([enable_cover/1, report_cover/1]). @@ -60,7 +62,8 @@ -export([sort_field_table/1]). -export([pid_to_string/1, string_to_pid/1]). -export([version_compare/2, version_compare/3]). --export([recursive_delete/1, dict_cons/3, unlink_and_capture_exit/1]). +-export([recursive_delete/1, dict_cons/3, orddict_cons/3, + unlink_and_capture_exit/1]). -import(mnesia). -import(lists). @@ -71,61 +74,91 @@ -ifdef(use_specs). --include_lib("kernel/include/inet.hrl"). +-export_type([resource_name/0]). --type(ok_or_error() :: 'ok' | {'error', any()}). +-type(ok_or_error() :: rabbit_types:ok_or_error(any())). +-type(thunk(T) :: fun(() -> T)). +-type(resource_name() :: binary()). --spec(method_record_type/1 :: (tuple()) -> atom()). +-spec(method_record_type/1 :: (rabbit_framing:amqp_method_record()) + -> rabbit_framing:amqp_method_name()). -spec(polite_pause/0 :: () -> 'done'). -spec(polite_pause/1 :: (non_neg_integer()) -> 'done'). --spec(die/1 :: (atom()) -> no_return()). --spec(frame_error/2 :: (atom(), binary()) -> no_return()). --spec(amqp_error/4 :: (atom(), string(), [any()], atom()) -> amqp_error()). --spec(protocol_error/3 :: (atom(), string(), [any()]) -> no_return()). --spec(protocol_error/4 :: (atom(), string(), [any()], atom()) -> no_return()). --spec(not_found/1 :: (r(atom())) -> no_return()). --spec(get_config/1 :: (atom()) -> {'ok', any()} | not_found()). +-spec(die/1 :: (rabbit_framing:amqp_exception()) -> no_return()). +-spec(frame_error/2 :: (rabbit_framing:amqp_method_name(), binary()) + -> no_return()). +-spec(amqp_error/4 :: + (rabbit_framing:amqp_exception(), string(), [any()], + rabbit_framing:amqp_method_name()) + -> rabbit_types:amqp_error()). +-spec(protocol_error/3 :: (rabbit_framing:amqp_exception(), string(), [any()]) + -> no_return()). +-spec(protocol_error/4 :: + (rabbit_framing:amqp_exception(), string(), [any()], + rabbit_framing:amqp_method_name()) + -> no_return()). +-spec(protocol_error/1 :: (rabbit_types:amqp_error()) -> no_return()). +-spec(not_found/1 :: (rabbit_types:r(atom())) -> no_return()). +-spec(assert_args_equivalence/4 :: (rabbit_framing:amqp_table(), + rabbit_framing:amqp_table(), + rabbit_types:r(any()), [binary()]) -> + 'ok' | no_return()). +-spec(get_config/1 :: + (atom()) -> rabbit_types:ok_or_error2(any(), 'not_found')). -spec(get_config/2 :: (atom(), A) -> A). -spec(set_config/2 :: (atom(), any()) -> 'ok'). --spec(dirty_read/1 :: ({atom(), any()}) -> {'ok', any()} | not_found()). --spec(r/3 :: (vhost() | r(atom()), K, resource_name()) -> - r(K) when is_subtype(K, atom())). --spec(r/2 :: (vhost(), K) -> #resource{virtual_host :: vhost(), - kind :: K, - name :: '_'} - when is_subtype(K, atom())). --spec(r_arg/4 :: (vhost() | r(atom()), K, amqp_table(), binary()) -> - undefined | r(K) when is_subtype(K, atom())). --spec(rs/1 :: (r(atom())) -> string()). +-spec(dirty_read/1 :: + ({atom(), any()}) -> rabbit_types:ok_or_error2(any(), 'not_found')). +-spec(table_lookup/2 :: + (rabbit_framing:amqp_table(), binary()) + -> 'undefined' | {rabbit_framing:amqp_field_type(), any()}). +-spec(r/2 :: (rabbit_types:vhost(), K) + -> rabbit_types:r3(rabbit_types:vhost(), K, '_') + when is_subtype(K, atom())). +-spec(r/3 :: + (rabbit_types:vhost() | rabbit_types:r(atom()), K, resource_name()) + -> rabbit_types:r3(rabbit_types:vhost(), K, resource_name()) + when is_subtype(K, atom())). +-spec(r_arg/4 :: + (rabbit_types:vhost() | rabbit_types:r(atom()), K, + rabbit_framing:amqp_table(), binary()) + -> undefined | rabbit_types:r(K) + when is_subtype(K, atom())). +-spec(rs/1 :: (rabbit_types:r(atom())) -> string()). -spec(enable_cover/0 :: () -> ok_or_error()). -spec(start_cover/1 :: ([{string(), string()} | string()]) -> 'ok'). -spec(report_cover/0 :: () -> 'ok'). --spec(enable_cover/1 :: (file_path()) -> ok_or_error()). --spec(report_cover/1 :: (file_path()) -> 'ok'). +-spec(enable_cover/1 :: (file:filename()) -> ok_or_error()). +-spec(report_cover/1 :: (file:filename()) -> 'ok'). -spec(throw_on_error/2 :: - (atom(), thunk({error, any()} | {ok, A} | A)) -> A). + (atom(), thunk(rabbit_types:error(any()) | {ok, A} | A)) -> A). -spec(with_exit_handler/2 :: (thunk(A), thunk(A)) -> A). -spec(filter_exit_map/2 :: (fun ((A) -> B), [A]) -> [B]). --spec(with_user/2 :: (username(), thunk(A)) -> A). --spec(with_vhost/2 :: (vhost(), thunk(A)) -> A). --spec(with_user_and_vhost/3 :: (username(), vhost(), thunk(A)) -> A). +-spec(with_user/2 :: (rabbit_access_control:username(), thunk(A)) -> A). +-spec(with_vhost/2 :: (rabbit_types:vhost(), thunk(A)) -> A). +-spec(with_user_and_vhost/3 :: + (rabbit_access_control:username(), rabbit_types:vhost(), thunk(A)) + -> A). -spec(execute_mnesia_transaction/1 :: (thunk(A)) -> A). -spec(ensure_ok/2 :: (ok_or_error(), atom()) -> 'ok'). --spec(makenode/1 :: ({string(), string()} | string()) -> erlang_node()). --spec(nodeparts/1 :: (erlang_node() | string()) -> {string(), string()}). +-spec(makenode/1 :: ({string(), string()} | string()) -> node()). +-spec(nodeparts/1 :: (node() | string()) -> {string(), string()}). -spec(cookie_hash/0 :: () -> string()). --spec(tcp_name/3 :: (atom(), ip_address(), ip_port()) -> atom()). +-spec(tcp_name/3 :: + (atom(), inet:ip_address(), rabbit_networking:ip_port()) + -> atom()). -spec(intersperse/2 :: (A, [A]) -> [A]). -spec(upmap/2 :: (fun ((A) -> B), [A]) -> [B]). -spec(map_in_order/2 :: (fun ((A) -> B), [A]) -> [B]). -spec(table_fold/3 :: (fun ((any(), A) -> A), A, atom()) -> A). -spec(dirty_read_all/1 :: (atom()) -> [any()]). --spec(dirty_foreach_key/2 :: (fun ((any()) -> any()), atom()) -> - 'ok' | 'aborted'). --spec(dirty_dump_log/1 :: (file_path()) -> ok_or_error()). --spec(read_term_file/1 :: (file_path()) -> {'ok', [any()]} | {'error', any()}). --spec(write_term_file/2 :: (file_path(), [any()]) -> ok_or_error()). --spec(append_file/2 :: (file_path(), string()) -> ok_or_error()). +-spec(dirty_foreach_key/2 :: (fun ((any()) -> any()), atom()) + -> 'ok' | 'aborted'). +-spec(dirty_dump_log/1 :: (file:filename()) -> ok_or_error()). +-spec(read_term_file/1 :: + (file:filename()) -> {'ok', [any()]} | rabbit_types:error(any())). +-spec(write_term_file/2 :: (file:filename(), [any()]) -> ok_or_error()). +-spec(append_file/2 :: (file:filename(), string()) -> ok_or_error()). -spec(ensure_parent_dirs_exist/1 :: (string()) -> 'ok'). -spec(format_stderr/2 :: (string(), [any()]) -> 'ok'). -spec(start_applications/1 :: ([atom()]) -> 'ok'). @@ -133,15 +166,21 @@ -spec(unfold/2 :: (fun ((A) -> ({'true', B, A} | 'false')), A) -> {[B], A}). -spec(ceil/1 :: (number()) -> integer()). -spec(queue_fold/3 :: (fun ((any(), B) -> B), B, queue()) -> B). --spec(sort_field_table/1 :: (amqp_table()) -> amqp_table()). +-spec(sort_field_table/1 :: + (rabbit_framing:amqp_table()) -> rabbit_framing:amqp_table()). -spec(pid_to_string/1 :: (pid()) -> string()). -spec(string_to_pid/1 :: (string()) -> pid()). -spec(version_compare/2 :: (string(), string()) -> 'lt' | 'eq' | 'gt'). --spec(version_compare/3 :: (string(), string(), - ('lt' | 'lte' | 'eq' | 'gte' | 'gt')) -> boolean()). --spec(recursive_delete/1 :: ([file_path()]) -> - 'ok' | {'error', {file_path(), any()}}). --spec(dict_cons/3 :: (any(), any(), dict()) -> dict()). +-spec(version_compare/3 :: + (string(), string(), ('lt' | 'lte' | 'eq' | 'gte' | 'gt')) + -> boolean()). +-spec(recursive_delete/1 :: + ([file:filename()]) + -> rabbit_types:ok_or_error({file:filename(), any()})). +-spec(dict_cons/3 :: (any(), any(), dict:dictionary()) -> + dict:dictionary()). +-spec(orddict_cons/3 :: (any(), any(), orddict:dictionary()) -> + orddict:dictionary()). -spec(unlink_and_capture_exit/1 :: (pid()) -> 'ok'). -endif. @@ -173,10 +212,27 @@ protocol_error(Name, ExplanationFormat, Params) -> protocol_error(Name, ExplanationFormat, Params, none). protocol_error(Name, ExplanationFormat, Params, Method) -> - exit(amqp_error(Name, ExplanationFormat, Params, Method)). + protocol_error(amqp_error(Name, ExplanationFormat, Params, Method)). + +protocol_error(#amqp_error{} = Error) -> + exit(Error). not_found(R) -> protocol_error(not_found, "no ~s", [rs(R)]). +assert_args_equivalence(Orig, New, Name, Keys) -> + [assert_args_equivalence1(Orig, New, Name, Key) || Key <- Keys], + ok. + +assert_args_equivalence1(Orig, New, Name, Key) -> + case {table_lookup(Orig, Key), table_lookup(New, Key)} of + {Same, Same} -> ok; + {Orig1, New1} -> protocol_error( + not_allowed, + "cannot redeclare ~s with inequivalent args for ~s: " + "required ~w, received ~w", + [rabbit_misc:rs(Name), Key, New1, Orig1]) + end. + get_config(Key) -> case dirty_read({rabbit_config, Key}) of {ok, {rabbit_config, Key, V}} -> {ok, V}; @@ -198,6 +254,12 @@ dirty_read(ReadSpec) -> [] -> {error, not_found} end. +table_lookup(Table, Key) -> + case lists:keysearch(Key, 1, Table) of + {value, {_, TypeBin, ValueBin}} -> {TypeBin, ValueBin}; + false -> undefined + end. + r(#resource{virtual_host = VHostPath}, Kind, Name) when is_binary(Name) -> #resource{virtual_host = VHostPath, kind = Kind, name = Name}; @@ -210,9 +272,9 @@ r(VHostPath, Kind) when is_binary(VHostPath) -> r_arg(#resource{virtual_host = VHostPath}, Kind, Table, Key) -> r_arg(VHostPath, Kind, Table, Key); r_arg(VHostPath, Kind, Table, Key) -> - case lists:keysearch(Key, 1, Table) of - {value, {_, longstr, NameBin}} -> r(VHostPath, Kind, NameBin); - false -> undefined + case table_lookup(Table, Key) of + {longstr, NameBin} -> r(VHostPath, Kind, NameBin); + undefined -> undefined end. rs(#resource{virtual_host = VHostPath, kind = Kind, name = Name}) -> @@ -242,12 +304,12 @@ report_cover([Root]) when is_atom(Root) -> report_cover(Root) -> Dir = filename:join(Root, "cover"), ok = filelib:ensure_dir(filename:join(Dir,"junk")), - lists:foreach(fun(F) -> file:delete(F) end, + lists:foreach(fun (F) -> file:delete(F) end, filelib:wildcard(filename:join(Dir, "*.html"))), {ok, SummaryFile} = file:open(filename:join(Dir, "summary.txt"), [write]), {CT, NCT} = lists:foldl( - fun(M,{CovTot, NotCovTot}) -> + fun (M,{CovTot, NotCovTot}) -> {ok, {M, {Cov, NotCov}}} = cover:analyze(M, module), ok = report_coverage_percentage(SummaryFile, Cov, NotCov, M), @@ -367,7 +429,7 @@ upmap(F, L) -> Parent = self(), Ref = make_ref(), [receive {Ref, Result} -> Result end - || _ <- [spawn(fun() -> Parent ! {Ref, F(X)} end) || X <- L]]. + || _ <- [spawn(fun () -> Parent ! {Ref, F(X)} end) || X <- L]]. map_in_order(F, L) -> lists:reverse( @@ -537,19 +599,25 @@ pid_to_string(Pid) when is_pid(Pid) -> %% inverse of above string_to_pid(Str) -> + Err = {error, {invalid_pid_syntax, Str}}, %% The \ before the trailing $ is only there to keep emacs %% font-lock from getting confused. case re:run(Str, "^<(.*)\\.([0-9]+)\\.([0-9]+)>\$", [{capture,all_but_first,list}]) of {match, [NodeStr, IdStr, SerStr]} -> - %% turn the triple into a pid - see pid_to_string - <<131,NodeEnc/binary>> = term_to_binary(list_to_atom(NodeStr)), + %% the NodeStr atom might be quoted, so we have to parse + %% it rather than doing a simple list_to_atom + NodeAtom = case erl_scan:string(NodeStr) of + {ok, [{atom, _, X}], _} -> X; + {error, _, _} -> throw(Err) + end, + <<131,NodeEnc/binary>> = term_to_binary(NodeAtom), Id = list_to_integer(IdStr), Ser = list_to_integer(SerStr), binary_to_term(<<131,103,NodeEnc/binary,Id:32,Ser:32,0:8>>); nomatch -> - throw({error, {invalid_pid_syntax, Str}}) - end. + throw(Err) + end. version_compare(A, B, lte) -> case version_compare(A, B) of @@ -625,6 +693,9 @@ recursive_delete1(Path) -> dict_cons(Key, Value, Dict) -> dict:update(Key, fun (List) -> [Value | List] end, [Value], Dict). +orddict_cons(Key, Value, Dict) -> + orddict:update(Key, fun (List) -> [Value | List] end, [Value], Dict). + unlink_and_capture_exit(Pid) -> unlink(Pid), receive {'EXIT', Pid, _} -> ok diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl index 55a6761d..c808499b 100644 --- a/src/rabbit_mnesia.erl +++ b/src/rabbit_mnesia.erl @@ -29,11 +29,12 @@ %% Contributor(s): ______________________________________. %% + -module(rabbit_mnesia). -export([ensure_mnesia_dir/0, dir/0, status/0, init/0, is_db_empty/0, - cluster/1, reset/0, force_reset/0, is_clustered/0, - empty_ram_only_tables/0]). + cluster/1, force_cluster/1, reset/0, force_reset/0, + is_clustered/0, empty_ram_only_tables/0]). -export([table_names/0]). @@ -47,12 +48,18 @@ -ifdef(use_specs). --spec(status/0 :: () -> [{'nodes' | 'running_nodes', [erlang_node()]}]). --spec(dir/0 :: () -> file_path()). +-export_type([node_type/0]). + +-type(node_type() :: disc_only | disc | ram | unknown). +-spec(status/0 :: () -> [{'nodes', [{node_type(), [node()]}]} | + {'running_nodes', [node()]}]). +-spec(dir/0 :: () -> file:filename()). -spec(ensure_mnesia_dir/0 :: () -> 'ok'). -spec(init/0 :: () -> 'ok'). -spec(is_db_empty/0 :: () -> boolean()). --spec(cluster/1 :: ([erlang_node()]) -> 'ok'). +-spec(cluster/1 :: ([node()]) -> 'ok'). +-spec(force_cluster/1 :: ([node()]) -> 'ok'). +-spec(cluster/2 :: ([node()], boolean()) -> 'ok'). -spec(reset/0 :: () -> 'ok'). -spec(force_reset/0 :: () -> 'ok'). -spec(is_clustered/0 :: () -> boolean()). @@ -64,13 +71,26 @@ %%---------------------------------------------------------------------------- status() -> - [{nodes, mnesia:system_info(db_nodes)}, + [{nodes, case mnesia:system_info(is_running) of + yes -> [{Key, Nodes} || + {Key, CopyType} <- [{disc_only, disc_only_copies}, + {disc, disc_copies}, + {ram, ram_copies}], + begin + Nodes = nodes_of_type(CopyType), + Nodes =/= [] + end]; + no -> case mnesia:system_info(db_nodes) of + [] -> []; + Nodes -> [{unknown, Nodes}] + end + end}, {running_nodes, mnesia:system_info(running_db_nodes)}]. init() -> ok = ensure_mnesia_running(), ok = ensure_mnesia_dir(), - ok = init_db(read_cluster_nodes_config()), + ok = init_db(read_cluster_nodes_config(), true), ok = wait_for_tables(), ok. @@ -78,16 +98,22 @@ is_db_empty() -> lists:all(fun (Tab) -> mnesia:dirty_first(Tab) == '$end_of_table' end, table_names()). +cluster(ClusterNodes) -> + cluster(ClusterNodes, false). +force_cluster(ClusterNodes) -> + cluster(ClusterNodes, true). + %% Alter which disk nodes this node is clustered with. This can be a %% subset of all the disk nodes in the cluster but can (and should) %% include the node itself if it is to be a disk rather than a ram -%% node. -cluster(ClusterNodes) -> +%% node. If Force is false, only connections to online nodes are +%% allowed. +cluster(ClusterNodes, Force) -> ok = ensure_mnesia_not_running(), ok = ensure_mnesia_dir(), rabbit_misc:ensure_ok(mnesia:start(), cannot_start_mnesia), try - ok = init_db(ClusterNodes), + ok = init_db(ClusterNodes, Force), ok = wait_for_tables(), ok = create_cluster_nodes_config(ClusterNodes) after @@ -118,6 +144,15 @@ empty_ram_only_tables() -> %%-------------------------------------------------------------------- +nodes_of_type(Type) -> + %% This function should return the nodes of a certain type (ram, + %% disc or disc_only) in the current cluster. The type of nodes + %% is determined when the cluster is initially configured. + %% Specifically, we check whether a certain table, which we know + %% will be written to disk on a disc node, is stored on disk or in + %% RAM. + mnesia:table_info(rabbit_durable_exchange, Type). + table_definitions() -> [{rabbit_user, [{record_name, user}, @@ -149,6 +184,8 @@ table_definitions() -> [{record_name, reverse_route}, {attributes, record_info(fields, reverse_route)}, {type, ordered_set}]}, + %% Consider the implications to nodes_of_type/1 before altering + %% the next entry. {rabbit_durable_exchange, [{record_name, exchange}, {attributes, record_info(fields, exchange)}, @@ -227,20 +264,9 @@ read_cluster_nodes_config() -> case rabbit_misc:read_term_file(FileName) of {ok, [ClusterNodes]} -> ClusterNodes; {error, enoent} -> - case application:get_env(cluster_config) of + case application:get_env(cluster_nodes) of undefined -> []; - {ok, DefaultFileName} -> - case file:consult(DefaultFileName) of - {ok, [ClusterNodes]} -> ClusterNodes; - {error, enoent} -> - error_logger:warning_msg( - "default cluster config file ~p does not exist~n", - [DefaultFileName]), - []; - {error, Reason} -> - throw({error, {cannot_read_cluster_nodes_config, - DefaultFileName, Reason}}) - end + {ok, ClusterNodes} -> ClusterNodes end; {error, Reason} -> throw({error, {cannot_read_cluster_nodes_config, @@ -259,38 +285,56 @@ delete_cluster_nodes_config() -> %% Take a cluster node config and create the right kind of node - a %% standalone disk node, or disk or ram node connected to the -%% specified cluster nodes. -init_db(ClusterNodes) -> - case mnesia:change_config(extra_db_nodes, ClusterNodes -- [node()]) of - {ok, []} -> - case mnesia:system_info(use_dir) of - true -> - case check_schema_integrity() of - ok -> - ok; - {error, Reason} -> - %% NB: we cannot use rabbit_log here since - %% it may not have been started yet - error_logger:warning_msg( - "schema integrity check failed: ~p~n" - "moving database to backup location " - "and recreating schema from scratch~n", - [Reason]), - ok = move_db(), +%% specified cluster nodes. If Force is false, don't allow +%% connections to offline nodes. +init_db(ClusterNodes, Force) -> + UClusterNodes = lists:usort(ClusterNodes), + ProperClusterNodes = UClusterNodes -- [node()], + case mnesia:change_config(extra_db_nodes, ProperClusterNodes) of + {ok, Nodes} -> + case Force of + false -> + FailedClusterNodes = ProperClusterNodes -- Nodes, + case FailedClusterNodes of + [] -> ok; + _ -> + throw({error, {failed_to_cluster_with, + FailedClusterNodes, + "Mnesia could not connect to some nodes."}}) + end; + _ -> ok + end, + case Nodes of + [] -> + case mnesia:system_info(use_dir) of + true -> + case check_schema_integrity() of + ok -> + ok; + {error, Reason} -> + %% NB: we cannot use rabbit_log here since + %% it may not have been started yet + error_logger:warning_msg( + "schema integrity check failed: ~p~n" + "moving database to backup location " + "and recreating schema from scratch~n", + [Reason]), + ok = move_db(), + ok = create_schema() + end; + false -> ok = create_schema() end; - false -> - ok = create_schema() - end; - {ok, [_|_]} -> - IsDiskNode = ClusterNodes == [] orelse - lists:member(node(), ClusterNodes), - ok = wait_for_replicated_tables(), - ok = create_local_table_copy(schema, disc_copies), - ok = create_local_table_copies(case IsDiskNode of - true -> disc; - false -> ram - end); + [_|_] -> + IsDiskNode = ClusterNodes == [] orelse + lists:member(node(), ClusterNodes), + ok = wait_for_replicated_tables(), + ok = create_local_table_copy(schema, disc_copies), + ok = create_local_table_copies(case IsDiskNode of + true -> disc; + false -> ram + end) + end; {error, Reason} -> %% one reason we may end up here is if we try to join %% nodes together that are currently running standalone or @@ -346,7 +390,7 @@ table_has_copy_type(TabDef, DiscType) -> create_local_table_copies(Type) -> lists:foreach( - fun({Tab, TabDef}) -> + fun ({Tab, TabDef}) -> HasDiscCopies = table_has_copy_type(TabDef, disc_copies), HasDiscOnlyCopies = table_has_copy_type(TabDef, disc_only_copies), LocalTab = proplists:get_bool(local_content, TabDef), diff --git a/src/rabbit_msg_file.erl b/src/rabbit_msg_file.erl new file mode 100644 index 00000000..4f178439 --- /dev/null +++ b/src/rabbit_msg_file.erl @@ -0,0 +1,136 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developers of the Original Code are LShift Ltd, +%% Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd, +%% Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd +%% are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial +%% Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift +%% Ltd. Portions created by Cohesive Financial Technologies LLC are +%% Copyright (C) 2007-2010 Cohesive Financial Technologies +%% LLC. Portions created by Rabbit Technologies Ltd are Copyright +%% (C) 2007-2010 Rabbit Technologies Ltd. +%% +%% All Rights Reserved. +%% +%% Contributor(s): ______________________________________. +%% + +-module(rabbit_msg_file). + +-export([append/3, read/2, scan/2]). + +%%---------------------------------------------------------------------------- + +-include("rabbit_msg_store.hrl"). + +-define(INTEGER_SIZE_BYTES, 8). +-define(INTEGER_SIZE_BITS, (8 * ?INTEGER_SIZE_BYTES)). +-define(WRITE_OK_SIZE_BITS, 8). +-define(WRITE_OK_MARKER, 255). +-define(FILE_PACKING_ADJUSTMENT, (1 + ?INTEGER_SIZE_BYTES)). +-define(GUID_SIZE_BYTES, 16). +-define(GUID_SIZE_BITS, (8 * ?GUID_SIZE_BYTES)). +-define(SCAN_BLOCK_SIZE, 4194304). %% 4MB + +%%---------------------------------------------------------------------------- + +-ifdef(use_specs). + +-type(io_device() :: any()). +-type(position() :: non_neg_integer()). +-type(msg_size() :: non_neg_integer()). +-type(file_size() :: non_neg_integer()). + +-spec(append/3 :: (io_device(), rabbit_guid:guid(), msg()) -> + rabbit_types:ok_or_error2(msg_size(), any())). +-spec(read/2 :: (io_device(), msg_size()) -> + rabbit_types:ok_or_error2({rabbit_guid:guid(), msg()}, + any())). +-spec(scan/2 :: (io_device(), file_size()) -> + {'ok', [{rabbit_guid:guid(), msg_size(), position()}], + position()}). + +-endif. + +%%---------------------------------------------------------------------------- + +append(FileHdl, Guid, MsgBody) + when is_binary(Guid) andalso size(Guid) =:= ?GUID_SIZE_BYTES -> + MsgBodyBin = term_to_binary(MsgBody), + MsgBodyBinSize = size(MsgBodyBin), + Size = MsgBodyBinSize + ?GUID_SIZE_BYTES, + case file_handle_cache:append(FileHdl, + <<Size:?INTEGER_SIZE_BITS, + Guid:?GUID_SIZE_BYTES/binary, + MsgBodyBin:MsgBodyBinSize/binary, + ?WRITE_OK_MARKER:?WRITE_OK_SIZE_BITS>>) of + ok -> {ok, Size + ?FILE_PACKING_ADJUSTMENT}; + KO -> KO + end. + +read(FileHdl, TotalSize) -> + Size = TotalSize - ?FILE_PACKING_ADJUSTMENT, + BodyBinSize = Size - ?GUID_SIZE_BYTES, + case file_handle_cache:read(FileHdl, TotalSize) of + {ok, <<Size:?INTEGER_SIZE_BITS, + Guid:?GUID_SIZE_BYTES/binary, + MsgBodyBin:BodyBinSize/binary, + ?WRITE_OK_MARKER:?WRITE_OK_SIZE_BITS>>} -> + {ok, {Guid, binary_to_term(MsgBodyBin)}}; + KO -> KO + end. + +scan(FileHdl, FileSize) when FileSize >= 0 -> + scan(FileHdl, FileSize, <<>>, 0, [], 0). + +scan(_FileHdl, FileSize, _Data, FileSize, Acc, ScanOffset) -> + {ok, Acc, ScanOffset}; +scan(FileHdl, FileSize, Data, ReadOffset, Acc, ScanOffset) -> + Read = lists:min([?SCAN_BLOCK_SIZE, (FileSize - ReadOffset)]), + case file_handle_cache:read(FileHdl, Read) of + {ok, Data1} -> + {Data2, Acc1, ScanOffset1} = + scan(<<Data/binary, Data1/binary>>, Acc, ScanOffset), + ReadOffset1 = ReadOffset + size(Data1), + scan(FileHdl, FileSize, Data2, ReadOffset1, Acc1, ScanOffset1); + _KO -> + {ok, Acc, ScanOffset} + end. + +scan(<<>>, Acc, Offset) -> + {<<>>, Acc, Offset}; +scan(<<0:?INTEGER_SIZE_BITS, _Rest/binary>>, Acc, Offset) -> + {<<>>, Acc, Offset}; %% Nothing to do other than stop. +scan(<<Size:?INTEGER_SIZE_BITS, GuidAndMsg:Size/binary, + WriteMarker:?WRITE_OK_SIZE_BITS, Rest/binary>>, Acc, Offset) -> + TotalSize = Size + ?FILE_PACKING_ADJUSTMENT, + case WriteMarker of + ?WRITE_OK_MARKER -> + %% Here we take option 5 from + %% http://www.erlang.org/cgi-bin/ezmlm-cgi?2:mss:1569 in + %% which we read the Guid as a number, and then convert it + %% back to a binary in order to work around bugs in + %% Erlang's GC. + <<GuidNum:?GUID_SIZE_BITS, _Msg/binary>> = + <<GuidAndMsg:Size/binary>>, + <<Guid:?GUID_SIZE_BYTES/binary>> = <<GuidNum:?GUID_SIZE_BITS>>, + scan(Rest, [{Guid, TotalSize, Offset} | Acc], Offset + TotalSize); + _ -> + scan(Rest, Acc, Offset + TotalSize) + end; +scan(Data, Acc, Offset) -> + {Data, Acc, Offset}. diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl new file mode 100644 index 00000000..63100571 --- /dev/null +++ b/src/rabbit_msg_store.erl @@ -0,0 +1,1731 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developers of the Original Code are LShift Ltd, +%% Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd, +%% Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd +%% are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial +%% Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift +%% Ltd. Portions created by Cohesive Financial Technologies LLC are +%% Copyright (C) 2007-2010 Cohesive Financial Technologies +%% LLC. Portions created by Rabbit Technologies Ltd are Copyright +%% (C) 2007-2010 Rabbit Technologies Ltd. +%% +%% All Rights Reserved. +%% +%% Contributor(s): ______________________________________. +%% + +-module(rabbit_msg_store). + +-behaviour(gen_server2). + +-export([start_link/4, write/4, read/3, contains/2, remove/2, release/2, + sync/3, client_init/2, client_terminate/1, + client_delete_and_terminate/3, successfully_recovered_state/1]). + +-export([sync/1, gc_done/4, set_maximum_since_use/2, gc/3]). %% internal + +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). + +%%---------------------------------------------------------------------------- + +-include("rabbit_msg_store.hrl"). + +-define(SYNC_INTERVAL, 5). %% milliseconds +-define(CLEAN_FILENAME, "clean.dot"). +-define(FILE_SUMMARY_FILENAME, "file_summary.ets"). + +-define(BINARY_MODE, [raw, binary]). +-define(READ_MODE, [read]). +-define(READ_AHEAD_MODE, [read_ahead | ?READ_MODE]). +-define(WRITE_MODE, [write]). + +-define(FILE_EXTENSION, ".rdq"). +-define(FILE_EXTENSION_TMP, ".rdt"). + +-define(HANDLE_CACHE_BUFFER_SIZE, 1048576). %% 1MB + +%%---------------------------------------------------------------------------- + +-record(msstate, + { dir, %% store directory + index_module, %% the module for index ops + index_state, %% where are messages? + current_file, %% current file name as number + current_file_handle, %% current file handle since the last fsync? + file_handle_cache, %% file handle cache + on_sync, %% pending sync requests + sync_timer_ref, %% TRef for our interval timer + sum_valid_data, %% sum of valid data in all files + sum_file_size, %% sum of file sizes + pending_gc_completion, %% things to do once GC completes + gc_active, %% is the GC currently working? + gc_pid, %% pid of our GC + file_handles_ets, %% tid of the shared file handles table + file_summary_ets, %% tid of the file summary table + dedup_cache_ets, %% tid of dedup cache table + cur_file_cache_ets, %% tid of current file cache table + client_refs, %% set of references of all registered clients + successfully_recovered, %% boolean: did we recover state? + file_size_limit %% how big are our files allowed to get? + }). + +-record(client_msstate, + { file_handle_cache, + index_state, + index_module, + dir, + gc_pid, + file_handles_ets, + file_summary_ets, + dedup_cache_ets, + cur_file_cache_ets + }). + +-record(file_summary, + {file, valid_total_size, contiguous_top, left, right, file_size, + locked, readers}). + +%%---------------------------------------------------------------------------- + +-ifdef(use_specs). + +-type(server() :: pid() | atom()). +-type(file_num() :: non_neg_integer()). +-type(client_msstate() :: #client_msstate { + file_handle_cache :: dict:dictionary(), + index_state :: any(), + index_module :: atom(), + dir :: file:filename(), + gc_pid :: pid(), + file_handles_ets :: ets:tid(), + file_summary_ets :: ets:tid(), + dedup_cache_ets :: ets:tid(), + cur_file_cache_ets :: ets:tid() }). +-type(startup_fun_state() :: + {(fun ((A) -> 'finished' | {rabbit_guid:guid(), non_neg_integer(), A})), + A}). + +-spec(start_link/4 :: + (atom(), file:filename(), [binary()] | 'undefined', + startup_fun_state()) -> + 'ignore' | rabbit_types:ok_or_error2(pid(), any())). +-spec(write/4 :: (server(), rabbit_guid:guid(), msg(), client_msstate()) -> + rabbit_types:ok(client_msstate())). +-spec(read/3 :: (server(), rabbit_guid:guid(), client_msstate()) -> + {rabbit_types:ok(msg()) | 'not_found', client_msstate()}). +-spec(contains/2 :: (server(), rabbit_guid:guid()) -> boolean()). +-spec(remove/2 :: (server(), [rabbit_guid:guid()]) -> 'ok'). +-spec(release/2 :: (server(), [rabbit_guid:guid()]) -> 'ok'). +-spec(sync/3 :: (server(), [rabbit_guid:guid()], fun (() -> any())) -> 'ok'). +-spec(gc_done/4 :: (server(), non_neg_integer(), file_num(), file_num()) -> + 'ok'). +-spec(set_maximum_since_use/2 :: (server(), non_neg_integer()) -> 'ok'). +-spec(client_init/2 :: (server(), binary()) -> client_msstate()). +-spec(client_terminate/1 :: (client_msstate()) -> 'ok'). +-spec(client_delete_and_terminate/3 :: + (client_msstate(), server(), binary()) -> 'ok'). +-spec(successfully_recovered_state/1 :: (server()) -> boolean()). + +-spec(gc/3 :: (non_neg_integer(), non_neg_integer(), + {ets:tid(), file:filename(), atom(), any()}) -> + 'concurrent_readers' | non_neg_integer()). + +-endif. + +%%---------------------------------------------------------------------------- + +%% We run GC whenever (garbage / sum_file_size) > ?GARBAGE_FRACTION +%% It is not recommended to set this to < 0.5 +-define(GARBAGE_FRACTION, 0.5). + +%% The components: +%% +%% Index: this is a mapping from Guid to #msg_location{}: +%% {Guid, RefCount, File, Offset, TotalSize} +%% By default, it's in ets, but it's also pluggable. +%% FileSummary: this is an ets table which maps File to #file_summary{}: +%% {File, ValidTotalSize, ContiguousTop, Left, Right, +%% FileSize, Locked, Readers} +%% +%% The basic idea is that messages are appended to the current file up +%% until that file becomes too big (> file_size_limit). At that point, +%% the file is closed and a new file is created on the _right_ of the +%% old file which is used for new messages. Files are named +%% numerically ascending, thus the file with the lowest name is the +%% eldest file. +%% +%% We need to keep track of which messages are in which files (this is +%% the Index); how much useful data is in each file and which files +%% are on the left and right of each other. This is the purpose of the +%% FileSummary ets table. +%% +%% As messages are removed from files, holes appear in these +%% files. The field ValidTotalSize contains the total amount of useful +%% data left in the file, whilst ContiguousTop contains the amount of +%% valid data right at the start of each file. These are needed for +%% garbage collection. +%% +%% When we discover that a file is now empty, we delete it. When we +%% discover that it can be combined with the useful data in either its +%% left or right neighbour, and overall, across all the files, we have +%% ((the amount of garbage) / (the sum of all file sizes)) > +%% ?GARBAGE_FRACTION, we start a garbage collection run concurrently, +%% which will compact the two files together. This keeps disk +%% utilisation high and aids performance. We deliberately do this +%% lazily in order to prevent doing GC on files which are soon to be +%% emptied (and hence deleted) soon. +%% +%% Given the compaction between two files, the left file (i.e. elder +%% file) is considered the ultimate destination for the good data in +%% the right file. If necessary, the good data in the left file which +%% is fragmented throughout the file is written out to a temporary +%% file, then read back in to form a contiguous chunk of good data at +%% the start of the left file. Thus the left file is garbage collected +%% and compacted. Then the good data from the right file is copied +%% onto the end of the left file. Index and FileSummary tables are +%% updated. +%% +%% On non-clean startup, we scan the files we discover, dealing with +%% the possibilites of a crash having occured during a compaction +%% (this consists of tidyup - the compaction is deliberately designed +%% such that data is duplicated on disk rather than risking it being +%% lost), and rebuild the FileSummary ets table and Index. +%% +%% So, with this design, messages move to the left. Eventually, they +%% should end up in a contiguous block on the left and are then never +%% rewritten. But this isn't quite the case. If in a file there is one +%% message that is being ignored, for some reason, and messages in the +%% file to the right and in the current block are being read all the +%% time then it will repeatedly be the case that the good data from +%% both files can be combined and will be written out to a new +%% file. Whenever this happens, our shunned message will be rewritten. +%% +%% So, provided that we combine messages in the right order, +%% (i.e. left file, bottom to top, right file, bottom to top), +%% eventually our shunned message will end up at the bottom of the +%% left file. The compaction/combining algorithm is smart enough to +%% read in good data from the left file that is scattered throughout +%% (i.e. C and D in the below diagram), then truncate the file to just +%% above B (i.e. truncate to the limit of the good contiguous region +%% at the start of the file), then write C and D on top and then write +%% E, F and G from the right file on top. Thus contiguous blocks of +%% good data at the bottom of files are not rewritten (yes, this is +%% the data the size of which is tracked by the ContiguousTop +%% variable. Judicious use of a mirror is required). +%% +%% +-------+ +-------+ +-------+ +%% | X | | G | | G | +%% +-------+ +-------+ +-------+ +%% | D | | X | | F | +%% +-------+ +-------+ +-------+ +%% | X | | X | | E | +%% +-------+ +-------+ +-------+ +%% | C | | F | ===> | D | +%% +-------+ +-------+ +-------+ +%% | X | | X | | C | +%% +-------+ +-------+ +-------+ +%% | B | | X | | B | +%% +-------+ +-------+ +-------+ +%% | A | | E | | A | +%% +-------+ +-------+ +-------+ +%% left right left +%% +%% From this reasoning, we do have a bound on the number of times the +%% message is rewritten. From when it is inserted, there can be no +%% files inserted between it and the head of the queue, and the worst +%% case is that everytime it is rewritten, it moves one position lower +%% in the file (for it to stay at the same position requires that +%% there are no holes beneath it, which means truncate would be used +%% and so it would not be rewritten at all). Thus this seems to +%% suggest the limit is the number of messages ahead of it in the +%% queue, though it's likely that that's pessimistic, given the +%% requirements for compaction/combination of files. +%% +%% The other property is that we have is the bound on the lowest +%% utilisation, which should be 50% - worst case is that all files are +%% fractionally over half full and can't be combined (equivalent is +%% alternating full files and files with only one tiny message in +%% them). +%% +%% Messages are reference-counted. When a message with the same guid +%% is written several times we only store it once, and only remove it +%% from the store when it has been removed the same number of times. +%% +%% The reference counts do not persist. Therefore the initialisation +%% function must be provided with a generator that produces ref count +%% deltas for all recovered messages. This is only used on startup +%% when the shutdown was non-clean. +%% +%% Read messages with a reference count greater than one are entered +%% into a message cache. The purpose of the cache is not especially +%% performance, though it can help there too, but prevention of memory +%% explosion. It ensures that as messages with a high reference count +%% are read from several processes they are read back as the same +%% binary object rather than multiples of identical binary +%% objects. +%% +%% Reads can be performed directly by clients without calling to the +%% server. This is safe because multiple file handles can be used to +%% read files. However, locking is used by the concurrent GC to make +%% sure that reads are not attempted from files which are in the +%% process of being garbage collected. +%% +%% The server automatically defers reads, removes and contains calls +%% that occur which refer to files which are currently being +%% GC'd. Contains calls are only deferred in order to ensure they do +%% not overtake removes. +%% +%% The current file to which messages are being written has a +%% write-back cache. This is written to immediately by clients and can +%% be read from by clients too. This means that there are only ever +%% writes made to the current file, thus eliminating delays due to +%% flushing write buffers in order to be able to safely read from the +%% current file. The one exception to this is that on start up, the +%% cache is not populated with msgs found in the current file, and +%% thus in this case only, reads may have to come from the file +%% itself. The effect of this is that even if the msg_store process is +%% heavily overloaded, clients can still write and read messages with +%% very low latency and not block at all. +%% +%% For notes on Clean Shutdown and startup, see documentation in +%% variable_queue. + +%%---------------------------------------------------------------------------- +%% public API +%%---------------------------------------------------------------------------- + +start_link(Server, Dir, ClientRefs, StartupFunState) -> + gen_server2:start_link({local, Server}, ?MODULE, + [Server, Dir, ClientRefs, StartupFunState], + [{timeout, infinity}]). + +write(Server, Guid, Msg, + CState = #client_msstate { cur_file_cache_ets = CurFileCacheEts }) -> + ok = update_msg_cache(CurFileCacheEts, Guid, Msg), + {gen_server2:cast(Server, {write, Guid, Msg}), CState}. + +read(Server, Guid, + CState = #client_msstate { dedup_cache_ets = DedupCacheEts, + cur_file_cache_ets = CurFileCacheEts }) -> + %% 1. Check the dedup cache + case fetch_and_increment_cache(DedupCacheEts, Guid) of + not_found -> + %% 2. Check the cur file cache + case ets:lookup(CurFileCacheEts, Guid) of + [] -> + Defer = fun() -> {gen_server2:pcall( + Server, 2, {read, Guid}, infinity), + CState} end, + case index_lookup(Guid, CState) of + not_found -> Defer(); + MsgLocation -> client_read1(Server, MsgLocation, Defer, + CState) + end; + [{Guid, Msg, _CacheRefCount}] -> + %% Although we've found it, we don't know the + %% refcount, so can't insert into dedup cache + {{ok, Msg}, CState} + end; + Msg -> + {{ok, Msg}, CState} + end. + +contains(Server, Guid) -> gen_server2:call(Server, {contains, Guid}, infinity). +remove(_Server, []) -> ok; +remove(Server, Guids) -> gen_server2:cast(Server, {remove, Guids}). +release(_Server, []) -> ok; +release(Server, Guids) -> gen_server2:cast(Server, {release, Guids}). +sync(Server, Guids, K) -> gen_server2:cast(Server, {sync, Guids, K}). +sync(Server) -> gen_server2:pcast(Server, 8, sync). %% internal + +gc_done(Server, Reclaimed, Source, Destination) -> + gen_server2:pcast(Server, 8, {gc_done, Reclaimed, Source, Destination}). + +set_maximum_since_use(Server, Age) -> + gen_server2:pcast(Server, 8, {set_maximum_since_use, Age}). + +client_init(Server, Ref) -> + {IState, IModule, Dir, GCPid, + FileHandlesEts, FileSummaryEts, DedupCacheEts, CurFileCacheEts} = + gen_server2:call(Server, {new_client_state, Ref}, infinity), + #client_msstate { file_handle_cache = dict:new(), + index_state = IState, + index_module = IModule, + dir = Dir, + gc_pid = GCPid, + file_handles_ets = FileHandlesEts, + file_summary_ets = FileSummaryEts, + dedup_cache_ets = DedupCacheEts, + cur_file_cache_ets = CurFileCacheEts }. + +client_terminate(CState) -> + close_all_handles(CState), + ok. + +client_delete_and_terminate(CState, Server, Ref) -> + ok = client_terminate(CState), + ok = gen_server2:call(Server, {delete_client, Ref}, infinity). + +successfully_recovered_state(Server) -> + gen_server2:call(Server, successfully_recovered_state, infinity). + +%%---------------------------------------------------------------------------- +%% Client-side-only helpers +%%---------------------------------------------------------------------------- + +client_read1(Server, + #msg_location { guid = Guid, file = File } = MsgLocation, + Defer, + CState = #client_msstate { file_summary_ets = FileSummaryEts }) -> + case ets:lookup(FileSummaryEts, File) of + [] -> %% File has been GC'd and no longer exists. Go around again. + read(Server, Guid, CState); + [#file_summary { locked = Locked, right = Right }] -> + client_read2(Server, Locked, Right, MsgLocation, Defer, CState) + end. + +client_read2(_Server, false, undefined, _MsgLocation, Defer, _CState) -> + %% Although we've already checked both caches and not found the + %% message there, the message is apparently in the + %% current_file. We can only arrive here if we are trying to read + %% a message which we have not written, which is very odd, so just + %% defer. + %% + %% OR, on startup, the cur_file_cache is not populated with the + %% contents of the current file, thus reads from the current file + %% will end up here and will need to be deferred. + Defer(); +client_read2(_Server, true, _Right, _MsgLocation, Defer, _CState) -> + %% Of course, in the mean time, the GC could have run and our msg + %% is actually in a different file, unlocked. However, defering is + %% the safest and simplest thing to do. + Defer(); +client_read2(Server, false, _Right, + MsgLocation = #msg_location { guid = Guid, file = File }, + Defer, + CState = #client_msstate { file_summary_ets = FileSummaryEts }) -> + %% It's entirely possible that everything we're doing from here on + %% is for the wrong file, or a non-existent file, as a GC may have + %% finished. + safe_ets_update_counter( + FileSummaryEts, File, {#file_summary.readers, +1}, + fun (_) -> client_read3(Server, MsgLocation, Defer, CState) end, + fun () -> read(Server, Guid, CState) end). + +client_read3(Server, #msg_location { guid = Guid, file = File }, Defer, + CState = #client_msstate { file_handles_ets = FileHandlesEts, + file_summary_ets = FileSummaryEts, + dedup_cache_ets = DedupCacheEts, + gc_pid = GCPid }) -> + Release = + fun() -> ok = case ets:update_counter(FileSummaryEts, File, + {#file_summary.readers, -1}) of + 0 -> case ets:lookup(FileSummaryEts, File) of + [#file_summary { locked = true }] -> + rabbit_msg_store_gc:no_readers( + GCPid, File); + _ -> ok + end; + _ -> ok + end + end, + %% If a GC involving the file hasn't already started, it won't + %% start now. Need to check again to see if we've been locked in + %% the meantime, between lookup and update_counter (thus GC + %% started before our +1. In fact, it could have finished by now + %% too). + case ets:lookup(FileSummaryEts, File) of + [] -> %% GC has deleted our file, just go round again. + read(Server, Guid, CState); + [#file_summary { locked = true }] -> + %% If we get a badarg here, then the GC has finished and + %% deleted our file. Try going around again. Otherwise, + %% just defer. + %% + %% badarg scenario: we lookup, msg_store locks, GC starts, + %% GC ends, we +1 readers, msg_store ets:deletes (and + %% unlocks the dest) + try Release(), + Defer() + catch error:badarg -> read(Server, Guid, CState) + end; + [#file_summary { locked = false }] -> + %% Ok, we're definitely safe to continue - a GC involving + %% the file cannot start up now, and isn't running, so + %% nothing will tell us from now on to close the handle if + %% it's already open. + %% + %% Finally, we need to recheck that the msg is still at + %% the same place - it's possible an entire GC ran between + %% us doing the lookup and the +1 on the readers. (Same as + %% badarg scenario above, but we don't have a missing file + %% - we just have the /wrong/ file). + case index_lookup(Guid, CState) of + #msg_location { file = File } = MsgLocation -> + %% Still the same file. + mark_handle_open(FileHandlesEts, File), + + CState1 = close_all_indicated(CState), + {Msg, CState2} = %% This will never be the current file + read_from_disk(MsgLocation, CState1, DedupCacheEts), + Release(), %% this MUST NOT fail with badarg + {{ok, Msg}, CState2}; + MsgLocation -> %% different file! + Release(), %% this MUST NOT fail with badarg + client_read1(Server, MsgLocation, Defer, CState) + end + end. + +%%---------------------------------------------------------------------------- +%% gen_server callbacks +%%---------------------------------------------------------------------------- + +init([Server, BaseDir, ClientRefs, {MsgRefDeltaGen, MsgRefDeltaGenInit}]) -> + process_flag(trap_exit, true), + + ok = file_handle_cache:register_callback(?MODULE, set_maximum_since_use, + [self()]), + + Dir = filename:join(BaseDir, atom_to_list(Server)), + + {ok, IndexModule} = application:get_env(msg_store_index_module), + rabbit_log:info("~w: using ~p to provide index~n", [Server, IndexModule]), + + {AllCleanShutdown, IndexState, ClientRefs1} = + recover_index_and_client_refs(IndexModule, ClientRefs, Dir, Server), + + {FileSummaryRecovered, FileSummaryEts} = + recover_file_summary(AllCleanShutdown, Dir, Server), + + DedupCacheEts = ets:new(rabbit_msg_store_dedup_cache, [set, public]), + FileHandlesEts = ets:new(rabbit_msg_store_shared_file_handles, + [ordered_set, public]), + CurFileCacheEts = ets:new(rabbit_msg_store_cur_file, [set, public]), + + {ok, FileSizeLimit} = application:get_env(msg_store_file_size_limit), + + State = #msstate { dir = Dir, + index_module = IndexModule, + index_state = IndexState, + current_file = 0, + current_file_handle = undefined, + file_handle_cache = dict:new(), + on_sync = [], + sync_timer_ref = undefined, + sum_valid_data = 0, + sum_file_size = 0, + pending_gc_completion = [], + gc_active = false, + gc_pid = undefined, + file_handles_ets = FileHandlesEts, + file_summary_ets = FileSummaryEts, + dedup_cache_ets = DedupCacheEts, + cur_file_cache_ets = CurFileCacheEts, + client_refs = ClientRefs1, + successfully_recovered = AllCleanShutdown, + file_size_limit = FileSizeLimit + }, + + ok = case AllCleanShutdown of + true -> ok; + false -> count_msg_refs(MsgRefDeltaGen, MsgRefDeltaGenInit, State) + end, + + FileNames = + sort_file_names(filelib:wildcard("*" ++ ?FILE_EXTENSION, Dir)), + TmpFileNames = + sort_file_names(filelib:wildcard("*" ++ ?FILE_EXTENSION_TMP, Dir)), + ok = recover_crashed_compactions(Dir, FileNames, TmpFileNames), + + %% There should be no more tmp files now, so go ahead and load the + %% whole lot + Files = [filename_to_num(FileName) || FileName <- FileNames], + {Offset, State1 = #msstate { current_file = CurFile }} = + build_index(FileSummaryRecovered, Files, State), + + %% read is only needed so that we can seek + {ok, CurHdl} = open_file(Dir, filenum_to_name(CurFile), + [read | ?WRITE_MODE]), + {ok, Offset} = file_handle_cache:position(CurHdl, Offset), + ok = file_handle_cache:truncate(CurHdl), + + {ok, GCPid} = rabbit_msg_store_gc:start_link(Dir, IndexState, IndexModule, + FileSummaryEts), + + {ok, maybe_compact( + State1 #msstate { current_file_handle = CurHdl, gc_pid = GCPid }), + hibernate, + {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. + +handle_call({read, Guid}, From, State) -> + State1 = read_message(Guid, From, State), + noreply(State1); + +handle_call({contains, Guid}, From, State) -> + State1 = contains_message(Guid, From, State), + noreply(State1); + +handle_call({new_client_state, CRef}, _From, + State = #msstate { dir = Dir, + index_state = IndexState, + index_module = IndexModule, + file_handles_ets = FileHandlesEts, + file_summary_ets = FileSummaryEts, + dedup_cache_ets = DedupCacheEts, + cur_file_cache_ets = CurFileCacheEts, + client_refs = ClientRefs, + gc_pid = GCPid }) -> + reply({IndexState, IndexModule, Dir, GCPid, + FileHandlesEts, FileSummaryEts, DedupCacheEts, CurFileCacheEts}, + State #msstate { client_refs = sets:add_element(CRef, ClientRefs) }); + +handle_call(successfully_recovered_state, _From, State) -> + reply(State #msstate.successfully_recovered, State); + +handle_call({delete_client, CRef}, _From, + State = #msstate { client_refs = ClientRefs }) -> + reply(ok, + State #msstate { client_refs = sets:del_element(CRef, ClientRefs) }). + +handle_cast({write, Guid, Msg}, + State = #msstate { current_file_handle = CurHdl, + current_file = CurFile, + sum_valid_data = SumValid, + sum_file_size = SumFileSize, + file_summary_ets = FileSummaryEts, + cur_file_cache_ets = CurFileCacheEts }) -> + true = 0 =< ets:update_counter(CurFileCacheEts, Guid, {3, -1}), + case index_lookup(Guid, State) of + not_found -> + %% New message, lots to do + {ok, CurOffset} = file_handle_cache:current_virtual_offset(CurHdl), + {ok, TotalSize} = rabbit_msg_file:append(CurHdl, Guid, Msg), + ok = index_insert(#msg_location { + guid = Guid, ref_count = 1, file = CurFile, + offset = CurOffset, total_size = TotalSize }, + State), + [#file_summary { valid_total_size = ValidTotalSize, + contiguous_top = ContiguousTop, + right = undefined, + locked = false, + file_size = FileSize }] = + ets:lookup(FileSummaryEts, CurFile), + ValidTotalSize1 = ValidTotalSize + TotalSize, + ContiguousTop1 = case CurOffset =:= ContiguousTop of + true -> ValidTotalSize1; + false -> ContiguousTop + end, + true = ets:update_element( + FileSummaryEts, CurFile, + [{#file_summary.valid_total_size, ValidTotalSize1}, + {#file_summary.contiguous_top, ContiguousTop1}, + {#file_summary.file_size, FileSize + TotalSize}]), + NextOffset = CurOffset + TotalSize, + noreply( + maybe_roll_to_new_file( + NextOffset, State #msstate { + sum_valid_data = SumValid + TotalSize, + sum_file_size = SumFileSize + TotalSize })); + #msg_location { ref_count = RefCount } -> + %% We already know about it, just update counter. Only + %% update field otherwise bad interaction with concurrent GC + ok = index_update_fields(Guid, + {#msg_location.ref_count, RefCount + 1}, + State), + noreply(State) + end; + +handle_cast({remove, Guids}, State) -> + State1 = lists:foldl( + fun (Guid, State2) -> remove_message(Guid, State2) end, + State, Guids), + noreply(maybe_compact(State1)); + +handle_cast({release, Guids}, State = + #msstate { dedup_cache_ets = DedupCacheEts }) -> + lists:foreach( + fun (Guid) -> decrement_cache(DedupCacheEts, Guid) end, Guids), + noreply(State); + +handle_cast({sync, Guids, K}, + State = #msstate { current_file = CurFile, + current_file_handle = CurHdl, + on_sync = Syncs }) -> + {ok, SyncOffset} = file_handle_cache:last_sync_offset(CurHdl), + case lists:any(fun (Guid) -> + #msg_location { file = File, offset = Offset } = + index_lookup(Guid, State), + File =:= CurFile andalso Offset >= SyncOffset + end, Guids) of + false -> K(), + noreply(State); + true -> noreply(State #msstate { on_sync = [K | Syncs] }) + end; + +handle_cast(sync, State) -> + noreply(internal_sync(State)); + +handle_cast({gc_done, Reclaimed, Src, Dst}, + State = #msstate { sum_file_size = SumFileSize, + gc_active = {Src, Dst}, + file_handles_ets = FileHandlesEts, + file_summary_ets = FileSummaryEts }) -> + %% GC done, so now ensure that any clients that have open fhs to + %% those files close them before using them again. This has to be + %% done here (given it's done in the msg_store, and not the gc), + %% and not when starting up the GC, because if done when starting + %% up the GC, the client could find the close, and close and + %% reopen the fh, whilst the GC is waiting for readers to + %% disappear, before it's actually done the GC. + true = mark_handle_to_close(FileHandlesEts, Src), + true = mark_handle_to_close(FileHandlesEts, Dst), + %% we always move data left, so Src has gone and was on the + %% right, so need to make dest = source.right.left, and also + %% dest.right = source.right + [#file_summary { left = Dst, + right = SrcRight, + locked = true, + readers = 0 }] = ets:lookup(FileSummaryEts, Src), + %% this could fail if SrcRight =:= undefined + ets:update_element(FileSummaryEts, SrcRight, {#file_summary.left, Dst}), + true = ets:update_element(FileSummaryEts, Dst, + [{#file_summary.locked, false}, + {#file_summary.right, SrcRight}]), + true = ets:delete(FileSummaryEts, Src), + noreply( + maybe_compact(run_pending( + State #msstate { sum_file_size = SumFileSize - Reclaimed, + gc_active = false }))); + +handle_cast({set_maximum_since_use, Age}, State) -> + ok = file_handle_cache:set_maximum_since_use(Age), + noreply(State). + +handle_info(timeout, State) -> + noreply(internal_sync(State)); + +handle_info({'EXIT', _Pid, Reason}, State) -> + {stop, Reason, State}. + +terminate(_Reason, State = #msstate { index_state = IndexState, + index_module = IndexModule, + current_file_handle = CurHdl, + gc_pid = GCPid, + file_handles_ets = FileHandlesEts, + file_summary_ets = FileSummaryEts, + dedup_cache_ets = DedupCacheEts, + cur_file_cache_ets = CurFileCacheEts, + client_refs = ClientRefs, + dir = Dir }) -> + %% stop the gc first, otherwise it could be working and we pull + %% out the ets tables from under it. + ok = rabbit_msg_store_gc:stop(GCPid), + State1 = case CurHdl of + undefined -> State; + _ -> State2 = internal_sync(State), + file_handle_cache:close(CurHdl), + State2 + end, + State3 = close_all_handles(State1), + store_file_summary(FileSummaryEts, Dir), + [ets:delete(T) || + T <- [FileSummaryEts, DedupCacheEts, FileHandlesEts, CurFileCacheEts]], + IndexModule:terminate(IndexState), + store_recovery_terms([{client_refs, sets:to_list(ClientRefs)}, + {index_module, IndexModule}], Dir), + State3 #msstate { index_state = undefined, + current_file_handle = undefined }. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%%---------------------------------------------------------------------------- +%% general helper functions +%%---------------------------------------------------------------------------- + +noreply(State) -> + {State1, Timeout} = next_state(State), + {noreply, State1, Timeout}. + +reply(Reply, State) -> + {State1, Timeout} = next_state(State), + {reply, Reply, State1, Timeout}. + +next_state(State = #msstate { on_sync = [], sync_timer_ref = undefined }) -> + {State, hibernate}; +next_state(State = #msstate { sync_timer_ref = undefined }) -> + {start_sync_timer(State), 0}; +next_state(State = #msstate { on_sync = [] }) -> + {stop_sync_timer(State), hibernate}; +next_state(State) -> + {State, 0}. + +start_sync_timer(State = #msstate { sync_timer_ref = undefined }) -> + {ok, TRef} = timer:apply_after(?SYNC_INTERVAL, ?MODULE, sync, [self()]), + State #msstate { sync_timer_ref = TRef }. + +stop_sync_timer(State = #msstate { sync_timer_ref = undefined }) -> + State; +stop_sync_timer(State = #msstate { sync_timer_ref = TRef }) -> + {ok, cancel} = timer:cancel(TRef), + State #msstate { sync_timer_ref = undefined }. + +internal_sync(State = #msstate { current_file_handle = CurHdl, + on_sync = Syncs }) -> + State1 = stop_sync_timer(State), + case Syncs of + [] -> State1; + _ -> ok = file_handle_cache:sync(CurHdl), + lists:foreach(fun (K) -> K() end, lists:reverse(Syncs)), + State1 #msstate { on_sync = [] } + end. + +read_message(Guid, From, + State = #msstate { dedup_cache_ets = DedupCacheEts }) -> + case index_lookup(Guid, State) of + not_found -> + gen_server2:reply(From, not_found), + State; + MsgLocation -> + case fetch_and_increment_cache(DedupCacheEts, Guid) of + not_found -> read_message1(From, MsgLocation, State); + Msg -> gen_server2:reply(From, {ok, Msg}), + State + end + end. + +read_message1(From, #msg_location { guid = Guid, ref_count = RefCount, + file = File, offset = Offset } = MsgLoc, + State = #msstate { current_file = CurFile, + current_file_handle = CurHdl, + file_summary_ets = FileSummaryEts, + dedup_cache_ets = DedupCacheEts, + cur_file_cache_ets = CurFileCacheEts }) -> + case File =:= CurFile of + true -> {Msg, State1} = + %% can return [] if msg in file existed on startup + case ets:lookup(CurFileCacheEts, Guid) of + [] -> + {ok, RawOffSet} = + file_handle_cache:current_raw_offset(CurHdl), + ok = case Offset >= RawOffSet of + true -> file_handle_cache:flush(CurHdl); + false -> ok + end, + read_from_disk(MsgLoc, State, DedupCacheEts); + [{Guid, Msg1, _CacheRefCount}] -> + ok = maybe_insert_into_cache( + DedupCacheEts, RefCount, Guid, Msg1), + {Msg1, State} + end, + gen_server2:reply(From, {ok, Msg}), + State1; + false -> [#file_summary { locked = Locked }] = + ets:lookup(FileSummaryEts, File), + case Locked of + true -> add_to_pending_gc_completion({read, Guid, From}, + State); + false -> {Msg, State1} = + read_from_disk(MsgLoc, State, DedupCacheEts), + gen_server2:reply(From, {ok, Msg}), + State1 + end + end. + +read_from_disk(#msg_location { guid = Guid, ref_count = RefCount, + file = File, offset = Offset, + total_size = TotalSize }, + State, DedupCacheEts) -> + {Hdl, State1} = get_read_handle(File, State), + {ok, Offset} = file_handle_cache:position(Hdl, Offset), + {ok, {Guid, Msg}} = + case rabbit_msg_file:read(Hdl, TotalSize) of + {ok, {Guid, _}} = Obj -> + Obj; + Rest -> + {error, {misread, [{old_state, State}, + {file_num, File}, + {offset, Offset}, + {guid, Guid}, + {read, Rest}, + {proc_dict, get()} + ]}} + end, + ok = maybe_insert_into_cache(DedupCacheEts, RefCount, Guid, Msg), + {Msg, State1}. + +contains_message(Guid, From, State = #msstate { gc_active = GCActive }) -> + case index_lookup(Guid, State) of + not_found -> + gen_server2:reply(From, false), + State; + #msg_location { file = File } -> + case GCActive of + {A, B} when File =:= A orelse File =:= B -> + add_to_pending_gc_completion( + {contains, Guid, From}, State); + _ -> + gen_server2:reply(From, true), + State + end + end. + +remove_message(Guid, State = #msstate { sum_valid_data = SumValid, + file_summary_ets = FileSummaryEts, + dedup_cache_ets = DedupCacheEts }) -> + #msg_location { ref_count = RefCount, file = File, + offset = Offset, total_size = TotalSize } = + index_lookup(Guid, State), + case RefCount of + 1 -> + %% don't remove from CUR_FILE_CACHE_ETS_NAME here because + %% there may be further writes in the mailbox for the same + %% msg. + ok = remove_cache_entry(DedupCacheEts, Guid), + [#file_summary { valid_total_size = ValidTotalSize, + contiguous_top = ContiguousTop, + locked = Locked }] = + ets:lookup(FileSummaryEts, File), + case Locked of + true -> + add_to_pending_gc_completion({remove, Guid}, State); + false -> + ok = index_delete(Guid, State), + ContiguousTop1 = lists:min([ContiguousTop, Offset]), + ValidTotalSize1 = ValidTotalSize - TotalSize, + true = ets:update_element( + FileSummaryEts, File, + [{#file_summary.valid_total_size, ValidTotalSize1}, + {#file_summary.contiguous_top, ContiguousTop1}]), + State1 = delete_file_if_empty(File, State), + State1 #msstate { sum_valid_data = SumValid - TotalSize } + end; + _ when 1 < RefCount -> + ok = decrement_cache(DedupCacheEts, Guid), + %% only update field, otherwise bad interaction with concurrent GC + ok = index_update_fields(Guid, + {#msg_location.ref_count, RefCount - 1}, + State), + State + end. + +add_to_pending_gc_completion( + Op, State = #msstate { pending_gc_completion = Pending }) -> + State #msstate { pending_gc_completion = [Op | Pending] }. + +run_pending(State = #msstate { pending_gc_completion = [] }) -> + State; +run_pending(State = #msstate { pending_gc_completion = Pending }) -> + State1 = State #msstate { pending_gc_completion = [] }, + lists:foldl(fun run_pending/2, State1, lists:reverse(Pending)). + +run_pending({read, Guid, From}, State) -> + read_message(Guid, From, State); +run_pending({contains, Guid, From}, State) -> + contains_message(Guid, From, State); +run_pending({remove, Guid}, State) -> + remove_message(Guid, State). + +safe_ets_update_counter(Tab, Key, UpdateOp, SuccessFun, FailThunk) -> + try + SuccessFun(ets:update_counter(Tab, Key, UpdateOp)) + catch error:badarg -> FailThunk() + end. + +safe_ets_update_counter_ok(Tab, Key, UpdateOp, FailThunk) -> + safe_ets_update_counter(Tab, Key, UpdateOp, fun (_) -> ok end, FailThunk). + +%%---------------------------------------------------------------------------- +%% file helper functions +%%---------------------------------------------------------------------------- + +open_file(Dir, FileName, Mode) -> + file_handle_cache:open(form_filename(Dir, FileName), ?BINARY_MODE ++ Mode, + [{write_buffer, ?HANDLE_CACHE_BUFFER_SIZE}]). + +close_handle(Key, CState = #client_msstate { file_handle_cache = FHC }) -> + CState #client_msstate { file_handle_cache = close_handle(Key, FHC) }; + +close_handle(Key, State = #msstate { file_handle_cache = FHC }) -> + State #msstate { file_handle_cache = close_handle(Key, FHC) }; + +close_handle(Key, FHC) -> + case dict:find(Key, FHC) of + {ok, Hdl} -> ok = file_handle_cache:close(Hdl), + dict:erase(Key, FHC); + error -> FHC + end. + +mark_handle_open(FileHandlesEts, File) -> + %% This is fine to fail (already exists) + ets:insert_new(FileHandlesEts, {{self(), File}, open}), + true. + +mark_handle_to_close(FileHandlesEts, File) -> + [ ets:update_element(FileHandlesEts, Key, {2, close}) + || {Key, open} <- ets:match_object(FileHandlesEts, {{'_', File}, open}) ], + true. + +close_all_indicated(#client_msstate { file_handles_ets = FileHandlesEts } = + CState) -> + Objs = ets:match_object(FileHandlesEts, {{self(), '_'}, close}), + lists:foldl(fun ({Key = {_Self, File}, close}, CStateM) -> + true = ets:delete(FileHandlesEts, Key), + close_handle(File, CStateM) + end, CState, Objs). + +close_all_handles(CState = #client_msstate { file_handles_ets = FileHandlesEts, + file_handle_cache = FHC }) -> + Self = self(), + ok = dict:fold(fun (File, Hdl, ok) -> + true = ets:delete(FileHandlesEts, {Self, File}), + file_handle_cache:close(Hdl) + end, ok, FHC), + CState #client_msstate { file_handle_cache = dict:new() }; + +close_all_handles(State = #msstate { file_handle_cache = FHC }) -> + ok = dict:fold(fun (_Key, Hdl, ok) -> file_handle_cache:close(Hdl) end, + ok, FHC), + State #msstate { file_handle_cache = dict:new() }. + +get_read_handle(FileNum, CState = #client_msstate { file_handle_cache = FHC, + dir = Dir }) -> + {Hdl, FHC2} = get_read_handle(FileNum, FHC, Dir), + {Hdl, CState #client_msstate { file_handle_cache = FHC2 }}; + +get_read_handle(FileNum, State = #msstate { file_handle_cache = FHC, + dir = Dir }) -> + {Hdl, FHC2} = get_read_handle(FileNum, FHC, Dir), + {Hdl, State #msstate { file_handle_cache = FHC2 }}. + +get_read_handle(FileNum, FHC, Dir) -> + case dict:find(FileNum, FHC) of + {ok, Hdl} -> {Hdl, FHC}; + error -> {ok, Hdl} = open_file(Dir, filenum_to_name(FileNum), + ?READ_MODE), + {Hdl, dict:store(FileNum, Hdl, FHC)} + end. + +preallocate(Hdl, FileSizeLimit, FinalPos) -> + {ok, FileSizeLimit} = file_handle_cache:position(Hdl, FileSizeLimit), + ok = file_handle_cache:truncate(Hdl), + {ok, FinalPos} = file_handle_cache:position(Hdl, FinalPos), + ok. + +truncate_and_extend_file(Hdl, Lowpoint, Highpoint) -> + {ok, Lowpoint} = file_handle_cache:position(Hdl, Lowpoint), + ok = file_handle_cache:truncate(Hdl), + ok = preallocate(Hdl, Highpoint, Lowpoint). + +form_filename(Dir, Name) -> filename:join(Dir, Name). + +filenum_to_name(File) -> integer_to_list(File) ++ ?FILE_EXTENSION. + +filename_to_num(FileName) -> list_to_integer(filename:rootname(FileName)). + +sort_file_names(FileNames) -> + lists:sort(fun (A, B) -> filename_to_num(A) < filename_to_num(B) end, + FileNames). + +%%---------------------------------------------------------------------------- +%% message cache helper functions +%%---------------------------------------------------------------------------- + +maybe_insert_into_cache(DedupCacheEts, RefCount, Guid, Msg) + when RefCount > 1 -> + update_msg_cache(DedupCacheEts, Guid, Msg); +maybe_insert_into_cache(_DedupCacheEts, _RefCount, _Guid, _Msg) -> + ok. + +update_msg_cache(CacheEts, Guid, Msg) -> + case ets:insert_new(CacheEts, {Guid, Msg, 1}) of + true -> ok; + false -> safe_ets_update_counter_ok( + CacheEts, Guid, {3, +1}, + fun () -> update_msg_cache(CacheEts, Guid, Msg) end) + end. + +remove_cache_entry(DedupCacheEts, Guid) -> + true = ets:delete(DedupCacheEts, Guid), + ok. + +fetch_and_increment_cache(DedupCacheEts, Guid) -> + case ets:lookup(DedupCacheEts, Guid) of + [] -> + not_found; + [{_Guid, Msg, _RefCount}] -> + safe_ets_update_counter_ok( + DedupCacheEts, Guid, {3, +1}, + %% someone has deleted us in the meantime, insert us + fun () -> ok = update_msg_cache(DedupCacheEts, Guid, Msg) end), + Msg + end. + +decrement_cache(DedupCacheEts, Guid) -> + true = safe_ets_update_counter( + DedupCacheEts, Guid, {3, -1}, + fun (N) when N =< 0 -> true = ets:delete(DedupCacheEts, Guid); + (_N) -> true + end, + %% Guid is not in there because although it's been + %% delivered, it's never actually been read (think: + %% persistent message held in RAM) + fun () -> true end), + ok. + +%%---------------------------------------------------------------------------- +%% index +%%---------------------------------------------------------------------------- + +index_lookup(Key, #client_msstate { index_module = Index, + index_state = State }) -> + Index:lookup(Key, State); + +index_lookup(Key, #msstate { index_module = Index, index_state = State }) -> + Index:lookup(Key, State). + +index_insert(Obj, #msstate { index_module = Index, index_state = State }) -> + Index:insert(Obj, State). + +index_update(Obj, #msstate { index_module = Index, index_state = State }) -> + Index:update(Obj, State). + +index_update_fields(Key, Updates, #msstate { index_module = Index, + index_state = State }) -> + Index:update_fields(Key, Updates, State). + +index_delete(Key, #msstate { index_module = Index, index_state = State }) -> + Index:delete(Key, State). + +index_delete_by_file(File, #msstate { index_module = Index, + index_state = State }) -> + Index:delete_by_file(File, State). + +%%---------------------------------------------------------------------------- +%% shutdown and recovery +%%---------------------------------------------------------------------------- + +recover_index_and_client_refs(IndexModule, undefined, Dir, _Server) -> + ok = rabbit_misc:recursive_delete([Dir]), + ok = filelib:ensure_dir(filename:join(Dir, "nothing")), + {false, IndexModule:new(Dir), sets:new()}; +recover_index_and_client_refs(IndexModule, ClientRefs, Dir, Server) -> + ok = filelib:ensure_dir(filename:join(Dir, "nothing")), + Fresh = fun (ErrorMsg, ErrorArgs) -> + rabbit_log:warning("~w: " ++ ErrorMsg ++ + "~nrebuilding indices from scratch~n", + [Server | ErrorArgs]), + {false, IndexModule:new(Dir), sets:new()} + end, + case read_recovery_terms(Dir) of + {false, Error} -> + Fresh("failed to read recovery terms: ~p", [Error]); + {true, Terms} -> + RecClientRefs = proplists:get_value(client_refs, Terms, []), + RecIndexModule = proplists:get_value(index_module, Terms), + case (lists:sort(ClientRefs) =:= lists:sort(RecClientRefs) + andalso IndexModule =:= RecIndexModule) of + true -> case IndexModule:recover(Dir) of + {ok, IndexState1} -> + ClientRefs1 = sets:from_list(ClientRefs), + {true, IndexState1, ClientRefs1}; + {error, Error} -> + Fresh("failed to recover index: ~p", [Error]) + end; + false -> Fresh("recovery terms differ from present", []) + end + end. + +store_recovery_terms(Terms, Dir) -> + rabbit_misc:write_term_file(filename:join(Dir, ?CLEAN_FILENAME), Terms). + +read_recovery_terms(Dir) -> + Path = filename:join(Dir, ?CLEAN_FILENAME), + case rabbit_misc:read_term_file(Path) of + {ok, Terms} -> case file:delete(Path) of + ok -> {true, Terms}; + {error, Error} -> {false, Error} + end; + {error, Error} -> {false, Error} + end. + +store_file_summary(Tid, Dir) -> + ok = ets:tab2file(Tid, filename:join(Dir, ?FILE_SUMMARY_FILENAME), + [{extended_info, [object_count]}]). + +recover_file_summary(false, _Dir, _Server) -> + %% TODO: the only reason for this to be an *ordered*_set is so + %% that a) maybe_compact can start a traversal from the eldest + %% file, and b) build_index in fast recovery mode can easily + %% identify the current file. It's awkward to have both that + %% odering and the left/right pointers in the entries - replacing + %% the former with some additional bit of state would be easy, but + %% ditching the latter would be neater. + {false, ets:new(rabbit_msg_store_file_summary, + [ordered_set, public, {keypos, #file_summary.file}])}; +recover_file_summary(true, Dir, Server) -> + Path = filename:join(Dir, ?FILE_SUMMARY_FILENAME), + case ets:file2tab(Path) of + {ok, Tid} -> file:delete(Path), + {true, Tid}; + {error, Error} -> rabbit_log:warning( + "~w: failed to recover file summary: ~p~n" + "rebuilding~n", [Server, Error]), + recover_file_summary(false, Dir, Server) + end. + +count_msg_refs(Gen, Seed, State) -> + case Gen(Seed) of + finished -> + ok; + {_Guid, 0, Next} -> + count_msg_refs(Gen, Next, State); + {Guid, Delta, Next} -> + ok = case index_lookup(Guid, State) of + not_found -> + index_insert(#msg_location { guid = Guid, + ref_count = Delta }, + State); + #msg_location { ref_count = RefCount } = StoreEntry -> + NewRefCount = RefCount + Delta, + case NewRefCount of + 0 -> index_delete(Guid, State); + _ -> index_update(StoreEntry #msg_location { + ref_count = NewRefCount }, + State) + end + end, + count_msg_refs(Gen, Next, State) + end. + +recover_crashed_compactions(Dir, FileNames, TmpFileNames) -> + lists:foreach( + fun (TmpFileName) -> + NonTmpRelatedFileName = + filename:rootname(TmpFileName) ++ ?FILE_EXTENSION, + true = lists:member(NonTmpRelatedFileName, FileNames), + ok = recover_crashed_compaction( + Dir, TmpFileName, NonTmpRelatedFileName) + end, TmpFileNames), + ok. + +recover_crashed_compaction(Dir, TmpFileName, NonTmpRelatedFileName) -> + {ok, UncorruptedMessagesTmp, GuidsTmp} = + scan_file_for_valid_messages_and_guids(Dir, TmpFileName), + {ok, UncorruptedMessages, Guids} = + scan_file_for_valid_messages_and_guids(Dir, NonTmpRelatedFileName), + %% 1) It's possible that everything in the tmp file is also in the + %% main file such that the main file is (prefix ++ + %% tmpfile). This means that compaction failed immediately + %% prior to the final step of deleting the tmp file. Plan: just + %% delete the tmp file + %% 2) It's possible that everything in the tmp file is also in the + %% main file but with holes throughout (or just somthing like + %% main = (prefix ++ hole ++ tmpfile)). This means that + %% compaction wrote out the tmp file successfully and then + %% failed. Plan: just delete the tmp file and allow the + %% compaction to eventually be triggered later + %% 3) It's possible that everything in the tmp file is also in the + %% main file but such that the main file does not end with tmp + %% file (and there are valid messages in the suffix; main = + %% (prefix ++ tmpfile[with extra holes?] ++ suffix)). This + %% means that compaction failed as we were writing out the tmp + %% file. Plan: just delete the tmp file and allow the + %% compaction to eventually be triggered later + %% 4) It's possible that there are messages in the tmp file which + %% are not in the main file. This means that writing out the + %% tmp file succeeded, but then we failed as we were copying + %% them back over to the main file, after truncating the main + %% file. As the main file has already been truncated, it should + %% consist only of valid messages. Plan: Truncate the main file + %% back to before any of the files in the tmp file and copy + %% them over again + TmpPath = form_filename(Dir, TmpFileName), + case is_sublist(GuidsTmp, Guids) of + true -> %% we're in case 1, 2 or 3 above. Just delete the tmp file + %% note this also catches the case when the tmp file + %% is empty + ok = file:delete(TmpPath); + false -> + %% We're in case 4 above. We only care about the inital + %% msgs in main file that are not in the tmp file. If + %% there are no msgs in the tmp file then we would be in + %% the 'true' branch of this case, so we know the + %% lists:last call is safe. + EldestTmpGuid = lists:last(GuidsTmp), + {Guids1, UncorruptedMessages1} + = case lists:splitwith( + fun (Guid) -> Guid =/= EldestTmpGuid end, Guids) of + {_Guids, []} -> %% no msgs from tmp in main + {Guids, UncorruptedMessages}; + {Dropped, [EldestTmpGuid | Rest]} -> + %% Msgs in Dropped are in tmp, so forget them. + %% *cry*. Lists indexed from 1. + {Rest, lists:sublist(UncorruptedMessages, + 2 + length(Dropped), + length(Rest))} + end, + %% The main file prefix should be contiguous + {Top, Guids1} = find_contiguous_block_prefix( + lists:reverse(UncorruptedMessages1)), + %% we should have that none of the messages in the prefix + %% are in the tmp file + true = is_disjoint(Guids1, GuidsTmp), + %% must open with read flag, otherwise will stomp over contents + {ok, MainHdl} = open_file(Dir, NonTmpRelatedFileName, + [read | ?WRITE_MODE]), + %% Wipe out any rubbish at the end of the file. Remember + %% the head of the list will be the highest entry in the + %% file. + [{_, TmpTopTotalSize, TmpTopOffset}|_] = UncorruptedMessagesTmp, + TmpSize = TmpTopOffset + TmpTopTotalSize, + %% Extend the main file as big as necessary in a single + %% move. If we run out of disk space, this truncate could + %% fail, but we still aren't risking losing data + ok = truncate_and_extend_file(MainHdl, Top, Top + TmpSize), + {ok, TmpHdl} = open_file(Dir, TmpFileName, ?READ_AHEAD_MODE), + {ok, TmpSize} = file_handle_cache:copy(TmpHdl, MainHdl, TmpSize), + ok = file_handle_cache:close(MainHdl), + ok = file_handle_cache:delete(TmpHdl), + + {ok, _MainMessages, GuidsMain} = + scan_file_for_valid_messages_and_guids( + Dir, NonTmpRelatedFileName), + %% check that everything in Guids1 is in GuidsMain + true = is_sublist(Guids1, GuidsMain), + %% check that everything in GuidsTmp is in GuidsMain + true = is_sublist(GuidsTmp, GuidsMain) + end, + ok. + +is_sublist(SmallerL, BiggerL) -> + lists:all(fun (Item) -> lists:member(Item, BiggerL) end, SmallerL). + +is_disjoint(SmallerL, BiggerL) -> + lists:all(fun (Item) -> not lists:member(Item, BiggerL) end, SmallerL). + +scan_file_for_valid_messages(Dir, FileName) -> + case open_file(Dir, FileName, ?READ_MODE) of + {ok, Hdl} -> Valid = rabbit_msg_file:scan( + Hdl, filelib:file_size( + form_filename(Dir, FileName))), + %% if something really bad has happened, + %% the close could fail, but ignore + file_handle_cache:close(Hdl), + Valid; + {error, enoent} -> {ok, [], 0}; + {error, Reason} -> {error, {unable_to_scan_file, FileName, Reason}} + end. + +scan_file_for_valid_messages_and_guids(Dir, FileName) -> + {ok, Messages, _FileSize} = scan_file_for_valid_messages(Dir, FileName), + {ok, Messages, [Guid || {Guid, _TotalSize, _FileOffset} <- Messages]}. + +%% Takes the list in *ascending* order (i.e. eldest message +%% first). This is the opposite of what scan_file_for_valid_messages +%% produces. The list of msgs that is produced is youngest first. +find_contiguous_block_prefix(L) -> find_contiguous_block_prefix(L, 0, []). + +find_contiguous_block_prefix([], ExpectedOffset, Guids) -> + {ExpectedOffset, Guids}; +find_contiguous_block_prefix([{Guid, TotalSize, ExpectedOffset} | Tail], + ExpectedOffset, Guids) -> + ExpectedOffset1 = ExpectedOffset + TotalSize, + find_contiguous_block_prefix(Tail, ExpectedOffset1, [Guid | Guids]); +find_contiguous_block_prefix([_MsgAfterGap | _Tail], ExpectedOffset, Guids) -> + {ExpectedOffset, Guids}. + +build_index(true, _Files, State = #msstate { + file_summary_ets = FileSummaryEts }) -> + ets:foldl( + fun (#file_summary { valid_total_size = ValidTotalSize, + file_size = FileSize, + file = File }, + {_Offset, State1 = #msstate { sum_valid_data = SumValid, + sum_file_size = SumFileSize }}) -> + {FileSize, State1 #msstate { + sum_valid_data = SumValid + ValidTotalSize, + sum_file_size = SumFileSize + FileSize, + current_file = File }} + end, {0, State}, FileSummaryEts); +build_index(false, Files, State) -> + {ok, Pid} = gatherer:start_link(), + case Files of + [] -> build_index(Pid, undefined, [State #msstate.current_file], State); + _ -> {Offset, State1} = build_index(Pid, undefined, Files, State), + {Offset, lists:foldl(fun delete_file_if_empty/2, State1, Files)} + end. + +build_index(Gatherer, Left, [], + State = #msstate { file_summary_ets = FileSummaryEts, + sum_valid_data = SumValid, + sum_file_size = SumFileSize }) -> + case gatherer:out(Gatherer) of + empty -> + ok = gatherer:stop(Gatherer), + ok = rabbit_misc:unlink_and_capture_exit(Gatherer), + ok = index_delete_by_file(undefined, State), + Offset = case ets:lookup(FileSummaryEts, Left) of + [] -> 0; + [#file_summary { file_size = FileSize }] -> FileSize + end, + {Offset, State #msstate { current_file = Left }}; + {value, #file_summary { valid_total_size = ValidTotalSize, + file_size = FileSize } = FileSummary} -> + true = ets:insert_new(FileSummaryEts, FileSummary), + build_index(Gatherer, Left, [], + State #msstate { + sum_valid_data = SumValid + ValidTotalSize, + sum_file_size = SumFileSize + FileSize }) + end; +build_index(Gatherer, Left, [File|Files], State) -> + ok = gatherer:fork(Gatherer), + ok = worker_pool:submit_async( + fun () -> build_index_worker(Gatherer, State, + Left, File, Files) + end), + build_index(Gatherer, File, Files, State). + +build_index_worker(Gatherer, State = #msstate { dir = Dir }, + Left, File, Files) -> + {ok, Messages, FileSize} = + scan_file_for_valid_messages(Dir, filenum_to_name(File)), + {ValidMessages, ValidTotalSize} = + lists:foldl( + fun (Obj = {Guid, TotalSize, Offset}, {VMAcc, VTSAcc}) -> + case index_lookup(Guid, State) of + not_found -> + {VMAcc, VTSAcc}; + StoreEntry -> + ok = index_update(StoreEntry #msg_location { + file = File, offset = Offset, + total_size = TotalSize }, + State), + {[Obj | VMAcc], VTSAcc + TotalSize} + end + end, {[], 0}, Messages), + %% foldl reverses lists, find_contiguous_block_prefix needs + %% msgs eldest first, so, ValidMessages is the right way round + {ContiguousTop, _} = find_contiguous_block_prefix(ValidMessages), + {Right, FileSize1} = + case Files of + %% if it's the last file, we'll truncate to remove any + %% rubbish above the last valid message. This affects the + %% file size. + [] -> {undefined, case ValidMessages of + [] -> 0; + _ -> {_Guid, TotalSize, Offset} = + lists:last(ValidMessages), + Offset + TotalSize + end}; + [F|_] -> {F, FileSize} + end, + ok = gatherer:in(Gatherer, #file_summary { + file = File, + valid_total_size = ValidTotalSize, + contiguous_top = ContiguousTop, + left = Left, + right = Right, + file_size = FileSize1, + locked = false, + readers = 0 }), + ok = gatherer:finish(Gatherer). + +%%---------------------------------------------------------------------------- +%% garbage collection / compaction / aggregation -- internal +%%---------------------------------------------------------------------------- + +maybe_roll_to_new_file( + Offset, + State = #msstate { dir = Dir, + current_file_handle = CurHdl, + current_file = CurFile, + file_summary_ets = FileSummaryEts, + cur_file_cache_ets = CurFileCacheEts, + file_size_limit = FileSizeLimit }) + when Offset >= FileSizeLimit -> + State1 = internal_sync(State), + ok = file_handle_cache:close(CurHdl), + NextFile = CurFile + 1, + {ok, NextHdl} = open_file(Dir, filenum_to_name(NextFile), ?WRITE_MODE), + true = ets:insert_new(FileSummaryEts, #file_summary { + file = NextFile, + valid_total_size = 0, + contiguous_top = 0, + left = CurFile, + right = undefined, + file_size = 0, + locked = false, + readers = 0 }), + true = ets:update_element(FileSummaryEts, CurFile, + {#file_summary.right, NextFile}), + true = ets:match_delete(CurFileCacheEts, {'_', '_', 0}), + maybe_compact(State1 #msstate { current_file_handle = NextHdl, + current_file = NextFile }); +maybe_roll_to_new_file(_, State) -> + State. + +maybe_compact(State = #msstate { sum_valid_data = SumValid, + sum_file_size = SumFileSize, + gc_active = false, + gc_pid = GCPid, + file_summary_ets = FileSummaryEts, + file_size_limit = FileSizeLimit }) + when (SumFileSize > 2 * FileSizeLimit andalso + (SumFileSize - SumValid) / SumFileSize > ?GARBAGE_FRACTION) -> + %% TODO: the algorithm here is sub-optimal - it may result in a + %% complete traversal of FileSummaryEts. + case ets:first(FileSummaryEts) of + '$end_of_table' -> + State; + First -> + case find_files_to_gc(FileSummaryEts, FileSizeLimit, + ets:lookup(FileSummaryEts, First)) of + not_found -> + State; + {Src, Dst} -> + State1 = close_handle(Src, close_handle(Dst, State)), + true = ets:update_element(FileSummaryEts, Src, + {#file_summary.locked, true}), + true = ets:update_element(FileSummaryEts, Dst, + {#file_summary.locked, true}), + ok = rabbit_msg_store_gc:gc(GCPid, Src, Dst), + State1 #msstate { gc_active = {Src, Dst} } + end + end; +maybe_compact(State) -> + State. + +find_files_to_gc(FileSummaryEts, FileSizeLimit, + [#file_summary { file = Dst, + valid_total_size = DstValid, + right = Src }]) -> + case Src of + undefined -> + not_found; + _ -> + [#file_summary { file = Src, + valid_total_size = SrcValid, + left = Dst, + right = SrcRight }] = Next = + ets:lookup(FileSummaryEts, Src), + case SrcRight of + undefined -> not_found; + _ -> case DstValid + SrcValid =< FileSizeLimit of + true -> {Src, Dst}; + false -> find_files_to_gc( + FileSummaryEts, FileSizeLimit, Next) + end + end + end. + +delete_file_if_empty(File, State = #msstate { current_file = File }) -> + State; +delete_file_if_empty(File, State = #msstate { + dir = Dir, + sum_file_size = SumFileSize, + file_handles_ets = FileHandlesEts, + file_summary_ets = FileSummaryEts }) -> + [#file_summary { valid_total_size = ValidData, + left = Left, + right = Right, + file_size = FileSize, + locked = false }] = + ets:lookup(FileSummaryEts, File), + case ValidData of + %% we should NEVER find the current file in here hence right + %% should always be a file, not undefined + 0 -> case {Left, Right} of + {undefined, _} when Right =/= undefined -> + %% the eldest file is empty. + true = ets:update_element( + FileSummaryEts, Right, + {#file_summary.left, undefined}); + {_, _} when Right =/= undefined -> + true = ets:update_element(FileSummaryEts, Right, + {#file_summary.left, Left}), + true = ets:update_element(FileSummaryEts, Left, + {#file_summary.right, Right}) + end, + true = mark_handle_to_close(FileHandlesEts, File), + true = ets:delete(FileSummaryEts, File), + State1 = close_handle(File, State), + ok = file:delete(form_filename(Dir, filenum_to_name(File))), + State1 #msstate { sum_file_size = SumFileSize - FileSize }; + _ -> State + end. + +%%---------------------------------------------------------------------------- +%% garbage collection / compaction / aggregation -- external +%%---------------------------------------------------------------------------- + +gc(SrcFile, DstFile, State = {FileSummaryEts, _Dir, _Index, _IndexState}) -> + [SrcObj = #file_summary { + readers = SrcReaders, + left = DstFile, + file_size = SrcFileSize, + locked = true }] = ets:lookup(FileSummaryEts, SrcFile), + [DstObj = #file_summary { + readers = DstReaders, + right = SrcFile, + file_size = DstFileSize, + locked = true }] = ets:lookup(FileSummaryEts, DstFile), + + case SrcReaders =:= 0 andalso DstReaders =:= 0 of + true -> TotalValidData = combine_files(SrcObj, DstObj, State), + %% don't update dest.right, because it could be + %% changing at the same time + true = ets:update_element( + FileSummaryEts, DstFile, + [{#file_summary.valid_total_size, TotalValidData}, + {#file_summary.contiguous_top, TotalValidData}, + {#file_summary.file_size, TotalValidData}]), + SrcFileSize + DstFileSize - TotalValidData; + false -> concurrent_readers + end. + +combine_files(#file_summary { file = Source, + valid_total_size = SourceValid, + left = Destination }, + #file_summary { file = Destination, + valid_total_size = DestinationValid, + contiguous_top = DestinationContiguousTop, + right = Source }, + State = {_FileSummaryEts, Dir, _Index, _IndexState}) -> + SourceName = filenum_to_name(Source), + DestinationName = filenum_to_name(Destination), + {ok, SourceHdl} = open_file(Dir, SourceName, + ?READ_AHEAD_MODE), + {ok, DestinationHdl} = open_file(Dir, DestinationName, + ?READ_AHEAD_MODE ++ ?WRITE_MODE), + ExpectedSize = SourceValid + DestinationValid, + %% if DestinationValid =:= DestinationContiguousTop then we don't + %% need a tmp file + %% if they're not equal, then we need to write out everything past + %% the DestinationContiguousTop to a tmp file then truncate, + %% copy back in, and then copy over from Source + %% otherwise we just truncate straight away and copy over from Source + case DestinationContiguousTop =:= DestinationValid of + true -> + ok = truncate_and_extend_file( + DestinationHdl, DestinationContiguousTop, ExpectedSize); + false -> + {DestinationWorkList, DestinationValid} = + find_unremoved_messages_in_file(Destination, State), + Worklist = + lists:dropwhile( + fun (#msg_location { offset = Offset }) + when Offset =/= DestinationContiguousTop -> + %% it cannot be that Offset =:= + %% DestinationContiguousTop because if it + %% was then DestinationContiguousTop would + %% have been extended by TotalSize + Offset < DestinationContiguousTop + end, DestinationWorkList), + Tmp = filename:rootname(DestinationName) ++ ?FILE_EXTENSION_TMP, + {ok, TmpHdl} = open_file(Dir, Tmp, ?READ_AHEAD_MODE ++ ?WRITE_MODE), + ok = copy_messages( + Worklist, DestinationContiguousTop, DestinationValid, + DestinationHdl, TmpHdl, Destination, State), + TmpSize = DestinationValid - DestinationContiguousTop, + %% so now Tmp contains everything we need to salvage from + %% Destination, and index_state has been updated to + %% reflect the compaction of Destination so truncate + %% Destination and copy from Tmp back to the end + {ok, 0} = file_handle_cache:position(TmpHdl, 0), + ok = truncate_and_extend_file( + DestinationHdl, DestinationContiguousTop, ExpectedSize), + {ok, TmpSize} = + file_handle_cache:copy(TmpHdl, DestinationHdl, TmpSize), + %% position in DestinationHdl should now be DestinationValid + ok = file_handle_cache:sync(DestinationHdl), + ok = file_handle_cache:delete(TmpHdl) + end, + {SourceWorkList, SourceValid} = + find_unremoved_messages_in_file(Source, State), + ok = copy_messages(SourceWorkList, DestinationValid, ExpectedSize, + SourceHdl, DestinationHdl, Destination, State), + %% tidy up + ok = file_handle_cache:close(DestinationHdl), + ok = file_handle_cache:delete(SourceHdl), + ExpectedSize. + +find_unremoved_messages_in_file(File, + {_FileSummaryEts, Dir, Index, IndexState}) -> + %% Messages here will be end-of-file at start-of-list + {ok, Messages, _FileSize} = + scan_file_for_valid_messages(Dir, filenum_to_name(File)), + %% foldl will reverse so will end up with msgs in ascending offset order + lists:foldl(fun ({Guid, TotalSize, _Offset}, Acc = {List, Size}) -> + case Index:lookup(Guid, IndexState) of + #msg_location { file = File } = Entry -> + {[ Entry | List ], TotalSize + Size}; + _ -> + Acc + end + end, {[], 0}, Messages). + +copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl, + Destination, {_FileSummaryEts, _Dir, Index, IndexState}) -> + Copy = fun ({BlockStart, BlockEnd}) -> + BSize = BlockEnd - BlockStart, + {ok, BlockStart} = + file_handle_cache:position(SourceHdl, BlockStart), + {ok, BSize} = + file_handle_cache:copy(SourceHdl, DestinationHdl, BSize) + end, + case + lists:foldl( + fun (#msg_location { guid = Guid, offset = Offset, + total_size = TotalSize }, + {CurOffset, Block = {BlockStart, BlockEnd}}) -> + %% CurOffset is in the DestinationFile. + %% Offset, BlockStart and BlockEnd are in the SourceFile + %% update MsgLocation to reflect change of file and offset + ok = Index:update_fields(Guid, + [{#msg_location.file, Destination}, + {#msg_location.offset, CurOffset}], + IndexState), + {CurOffset + TotalSize, + case BlockEnd of + undefined -> + %% base case, called only for the first list elem + {Offset, Offset + TotalSize}; + Offset -> + %% extend the current block because the + %% next msg follows straight on + {BlockStart, BlockEnd + TotalSize}; + _ -> + %% found a gap, so actually do the work for + %% the previous block + Copy(Block), + {Offset, Offset + TotalSize} + end} + end, {InitOffset, {undefined, undefined}}, WorkList) of + {FinalOffset, Block} -> + case WorkList of + [] -> ok; + _ -> Copy(Block), %% do the last remaining block + ok = file_handle_cache:sync(DestinationHdl) + end; + {FinalOffsetZ, _Block} -> + {gc_error, [{expected, FinalOffset}, + {got, FinalOffsetZ}, + {destination, Destination}]} + end. diff --git a/src/rabbit_msg_store_ets_index.erl b/src/rabbit_msg_store_ets_index.erl new file mode 100644 index 00000000..1eb3c11f --- /dev/null +++ b/src/rabbit_msg_store_ets_index.erl @@ -0,0 +1,90 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developers of the Original Code are LShift Ltd, +%% Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd, +%% Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd +%% are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial +%% Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift +%% Ltd. Portions created by Cohesive Financial Technologies LLC are +%% Copyright (C) 2007-2010 Cohesive Financial Technologies +%% LLC. Portions created by Rabbit Technologies Ltd are Copyright +%% (C) 2007-2010 Rabbit Technologies Ltd. +%% +%% All Rights Reserved. +%% +%% Contributor(s): ______________________________________. +%% + +-module(rabbit_msg_store_ets_index). + +-behaviour(rabbit_msg_store_index). + +-export([new/1, recover/1, + lookup/2, insert/2, update/2, update_fields/3, delete/2, + delete_by_file/2, terminate/1]). + +-define(MSG_LOC_NAME, rabbit_msg_store_ets_index). +-define(FILENAME, "msg_store_index.ets"). + +-include("rabbit_msg_store_index.hrl"). + +-record(state, { table, dir }). + +new(Dir) -> + file:delete(filename:join(Dir, ?FILENAME)), + Tid = ets:new(?MSG_LOC_NAME, [set, public, {keypos, #msg_location.guid}]), + #state { table = Tid, dir = Dir }. + +recover(Dir) -> + Path = filename:join(Dir, ?FILENAME), + case ets:file2tab(Path) of + {ok, Tid} -> file:delete(Path), + {ok, #state { table = Tid, dir = Dir }}; + Error -> Error + end. + +lookup(Key, State) -> + case ets:lookup(State #state.table, Key) of + [] -> not_found; + [Entry] -> Entry + end. + +insert(Obj, State) -> + true = ets:insert_new(State #state.table, Obj), + ok. + +update(Obj, State) -> + true = ets:insert(State #state.table, Obj), + ok. + +update_fields(Key, Updates, State) -> + true = ets:update_element(State #state.table, Key, Updates), + ok. + +delete(Key, State) -> + true = ets:delete(State #state.table, Key), + ok. + +delete_by_file(File, State) -> + MatchHead = #msg_location { file = File, _ = '_' }, + ets:select_delete(State #state.table, [{MatchHead, [], [true]}]), + ok. + +terminate(#state { table = MsgLocations, dir = Dir }) -> + ok = ets:tab2file(MsgLocations, filename:join(Dir, ?FILENAME), + [{extended_info, [object_count]}]), + ets:delete(MsgLocations). diff --git a/src/rabbit_msg_store_gc.erl b/src/rabbit_msg_store_gc.erl new file mode 100644 index 00000000..eaa41173 --- /dev/null +++ b/src/rabbit_msg_store_gc.erl @@ -0,0 +1,141 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developers of the Original Code are LShift Ltd, +%% Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd, +%% Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd +%% are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial +%% Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift +%% Ltd. Portions created by Cohesive Financial Technologies LLC are +%% Copyright (C) 2007-2010 Cohesive Financial Technologies +%% LLC. Portions created by Rabbit Technologies Ltd are Copyright +%% (C) 2007-2010 Rabbit Technologies Ltd. +%% +%% All Rights Reserved. +%% +%% Contributor(s): ______________________________________. +%% + +-module(rabbit_msg_store_gc). + +-behaviour(gen_server2). + +-export([start_link/4, gc/3, no_readers/2, stop/1]). + +-export([set_maximum_since_use/2]). + +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). + +-record(gcstate, + {dir, + index_state, + index_module, + parent, + file_summary_ets, + scheduled + }). + +-include("rabbit.hrl"). + +%%---------------------------------------------------------------------------- + +-ifdef(use_specs). + +-spec(start_link/4 :: (file:filename(), any(), atom(), ets:tid()) -> + 'ignore' | rabbit_types:ok_or_error2(pid(), any())). +-spec(gc/3 :: (pid(), non_neg_integer(), non_neg_integer()) -> 'ok'). +-spec(no_readers/2 :: (pid(), non_neg_integer()) -> 'ok'). +-spec(stop/1 :: (pid()) -> 'ok'). +-spec(set_maximum_since_use/2 :: (pid(), non_neg_integer()) -> 'ok'). + +-endif. + +%%---------------------------------------------------------------------------- + +start_link(Dir, IndexState, IndexModule, FileSummaryEts) -> + gen_server2:start_link( + ?MODULE, [self(), Dir, IndexState, IndexModule, FileSummaryEts], + [{timeout, infinity}]). + +gc(Server, Source, Destination) -> + gen_server2:cast(Server, {gc, Source, Destination}). + +no_readers(Server, File) -> + gen_server2:cast(Server, {no_readers, File}). + +stop(Server) -> + gen_server2:call(Server, stop, infinity). + +set_maximum_since_use(Pid, Age) -> + gen_server2:pcast(Pid, 8, {set_maximum_since_use, Age}). + +%%---------------------------------------------------------------------------- + +init([Parent, Dir, IndexState, IndexModule, FileSummaryEts]) -> + ok = file_handle_cache:register_callback(?MODULE, set_maximum_since_use, + [self()]), + {ok, #gcstate { dir = Dir, + index_state = IndexState, + index_module = IndexModule, + parent = Parent, + file_summary_ets = FileSummaryEts, + scheduled = undefined }, + hibernate, + {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. + +handle_call(stop, _From, State) -> + {stop, normal, ok, State}. + +handle_cast({gc, Source, Destination}, + State = #gcstate { scheduled = undefined }) -> + {noreply, attempt_gc(State #gcstate { scheduled = {Source, Destination} }), + hibernate}; + +handle_cast({no_readers, File}, + State = #gcstate { scheduled = {Source, Destination} }) + when File =:= Source orelse File =:= Destination -> + {noreply, attempt_gc(State), hibernate}; + +handle_cast({no_readers, _File}, State) -> + {noreply, State, hibernate}; + +handle_cast({set_maximum_since_use, Age}, State) -> + ok = file_handle_cache:set_maximum_since_use(Age), + {noreply, State, hibernate}. + +handle_info(Info, State) -> + {stop, {unhandled_info, Info}, State}. + +terminate(_Reason, State) -> + State. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +attempt_gc(State = #gcstate { dir = Dir, + index_state = IndexState, + index_module = Index, + parent = Parent, + file_summary_ets = FileSummaryEts, + scheduled = {Source, Destination} }) -> + case rabbit_msg_store:gc(Source, Destination, + {FileSummaryEts, Dir, Index, IndexState}) of + concurrent_readers -> State; + Reclaimed -> ok = rabbit_msg_store:gc_done( + Parent, Reclaimed, Source, Destination), + State #gcstate { scheduled = undefined } + end. diff --git a/src/rabbit_msg_store_index.erl b/src/rabbit_msg_store_index.erl new file mode 100644 index 00000000..0ed64a9d --- /dev/null +++ b/src/rabbit_msg_store_index.erl @@ -0,0 +1,47 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developers of the Original Code are LShift Ltd, +%% Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd, +%% Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd +%% are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial +%% Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift +%% Ltd. Portions created by Cohesive Financial Technologies LLC are +%% Copyright (C) 2007-2010 Cohesive Financial Technologies +%% LLC. Portions created by Rabbit Technologies Ltd are Copyright +%% (C) 2007-2010 Rabbit Technologies Ltd. +%% +%% All Rights Reserved. +%% +%% Contributor(s): ______________________________________. +%% + +-module(rabbit_msg_store_index). + +-export([behaviour_info/1]). + +behaviour_info(callbacks) -> + [{new, 1}, + {recover, 1}, + {lookup, 2}, + {insert, 2}, + {update, 2}, + {update_fields, 3}, + {delete, 2}, + {delete_by_file, 2}, + {terminate, 1}]; +behaviour_info(_Other) -> + undefined. diff --git a/src/rabbit_multi.erl b/src/rabbit_multi.erl index 336f74bf..3facef17 100644 --- a/src/rabbit_multi.erl +++ b/src/rabbit_multi.erl @@ -111,7 +111,7 @@ action(start_all, [NodeCount], RpcTimeout) -> action(status, [], RpcTimeout) -> io:format("Status of all running nodes...~n", []), call_all_nodes( - fun({Node, Pid}) -> + fun ({Node, Pid}) -> RabbitRunning = case is_rabbit_running(Node, RpcTimeout) of false -> not_running; @@ -123,7 +123,7 @@ action(status, [], RpcTimeout) -> action(stop_all, [], RpcTimeout) -> io:format("Stopping all nodes...~n", []), - call_all_nodes(fun({Node, Pid}) -> + call_all_nodes(fun ({Node, Pid}) -> io:format("Stopping node ~p~n", [Node]), rpc:call(Node, rabbit, stop_and_halt, []), case kill_wait(Pid, RpcTimeout, false) of @@ -309,9 +309,9 @@ is_dead(Pid) -> {win32, fun () -> Res = os:cmd("tasklist /nh /fi \"pid eq " ++ PidS ++ "\""), - case regexp:first_match(Res, "erl.exe") of - {match, _, _} -> false; - _ -> true + case re:run(Res, "erl\\.exe", [{capture, none}]) of + match -> false; + _ -> true end end}]). diff --git a/src/rabbit_net.erl b/src/rabbit_net.erl index 406977b4..6baa4b88 100644 --- a/src/rabbit_net.erl +++ b/src/rabbit_net.erl @@ -31,31 +31,42 @@ -module(rabbit_net). -include("rabbit.hrl"). --include_lib("kernel/include/inet.hrl"). -export([async_recv/3, close/1, controlling_process/2, getstat/2, peername/1, port_command/2, send/2, sockname/1]). + %%--------------------------------------------------------------------------- -ifdef(use_specs). +-export_type([socket/0]). + -type(stat_option() :: 'recv_cnt' | 'recv_max' | 'recv_avg' | 'recv_oct' | 'recv_dvi' | 'send_cnt' | 'send_max' | 'send_avg' | 'send_oct' | 'send_pend'). --type(error() :: {'error', any()}). - --spec(async_recv/3 :: (socket(), integer(), timeout()) -> {'ok', any()}). --spec(close/1 :: (socket()) -> 'ok' | error()). --spec(controlling_process/2 :: (socket(), pid()) -> 'ok' | error()). +-type(error() :: rabbit_types:error(any())). +-type(socket() :: rabbit_networking:ip_port() | rabbit_types:ssl_socket()). + +-spec(async_recv/3 :: + (socket(), integer(), timeout()) -> rabbit_types:ok(any())). +-spec(close/1 :: (socket()) -> rabbit_types:ok_or_error(any())). +-spec(controlling_process/2 :: + (socket(), pid()) -> rabbit_types:ok_or_error(any())). -spec(port_command/2 :: (socket(), iolist()) -> 'true'). --spec(send/2 :: (socket(), binary() | iolist()) -> 'ok' | error()). --spec(peername/1 :: (socket()) -> - {'ok', {ip_address(), non_neg_integer()}} | error()). --spec(sockname/1 :: (socket()) -> - {'ok', {ip_address(), non_neg_integer()}} | error()). --spec(getstat/2 :: (socket(), [stat_option()]) -> - {'ok', [{stat_option(), integer()}]} | error()). +-spec(send/2 :: + (socket(), binary() | iolist()) -> rabbit_types:ok_or_error(any())). +-spec(peername/1 :: + (socket()) + -> rabbit_types:ok({inet:ip_address(), rabbit_networking:ip_port()}) | + error()). +-spec(sockname/1 :: + (socket()) + -> rabbit_types:ok({inet:ip_address(), rabbit_networking:ip_port()}) | + error()). +-spec(getstat/2 :: + (socket(), [stat_option()]) + -> rabbit_types:ok([{stat_option(), integer()}]) | error()). -endif. @@ -66,7 +77,7 @@ async_recv(Sock, Length, Timeout) when is_record(Sock, ssl_socket) -> Pid = self(), Ref = make_ref(), - spawn(fun() -> Pid ! {inet_async, Sock, Ref, + spawn(fun () -> Pid ! {inet_async, Sock, Ref, ssl:recv(Sock#ssl_socket.ssl, Length, Timeout)} end), diff --git a/src/rabbit_networking.erl b/src/rabbit_networking.erl index c3d0b7b7..3a3357ba 100644 --- a/src/rabbit_networking.erl +++ b/src/rabbit_networking.erl @@ -63,25 +63,29 @@ -ifdef(use_specs). --type(host() :: ip_address() | string() | atom()). --type(connection() :: pid()). +-export_type([ip_port/0, hostname/0]). -spec(start/0 :: () -> 'ok'). --spec(start_tcp_listener/2 :: (host(), ip_port()) -> 'ok'). --spec(start_ssl_listener/3 :: (host(), ip_port(), [info()]) -> 'ok'). --spec(stop_tcp_listener/2 :: (host(), ip_port()) -> 'ok'). --spec(active_listeners/0 :: () -> [listener()]). --spec(node_listeners/1 :: (erlang_node()) -> [listener()]). --spec(connections/0 :: () -> [connection()]). --spec(connection_info_keys/0 :: () -> [info_key()]). --spec(connection_info/1 :: (connection()) -> [info()]). --spec(connection_info/2 :: (connection(), [info_key()]) -> [info()]). --spec(connection_info_all/0 :: () -> [[info()]]). --spec(connection_info_all/1 :: ([info_key()]) -> [[info()]]). +-spec(start_tcp_listener/2 :: (hostname(), ip_port()) -> 'ok'). +-spec(start_ssl_listener/3 :: (hostname(), ip_port(), [rabbit_types:info()]) + -> 'ok'). +-spec(stop_tcp_listener/2 :: (hostname(), ip_port()) -> 'ok'). +-spec(active_listeners/0 :: () -> [rabbit_types:listener()]). +-spec(node_listeners/1 :: (node()) -> [rabbit_types:listener()]). +-spec(connections/0 :: () -> [rabbit_types:connection()]). +-spec(connection_info_keys/0 :: () -> [rabbit_types:info_key()]). +-spec(connection_info/1 :: + (rabbit_types:connection()) -> [rabbit_types:info()]). +-spec(connection_info/2 :: + (rabbit_types:connection(), [rabbit_types:info_key()]) + -> [rabbit_types:info()]). +-spec(connection_info_all/0 :: () -> [[rabbit_types:info()]]). +-spec(connection_info_all/1 :: + ([rabbit_types:info_key()]) -> [[rabbit_types:info()]]). -spec(close_connection/2 :: (pid(), string()) -> 'ok'). --spec(on_node_down/1 :: (erlang_node()) -> 'ok'). --spec(check_tcp_listener_address/3 :: (atom(), host(), ip_port()) -> - {ip_address(), atom()}). +-spec(on_node_down/1 :: (node()) -> 'ok'). +-spec(check_tcp_listener_address/3 :: + (atom(), hostname(), ip_port()) -> {inet:ip_address(), atom()}). -endif. @@ -102,7 +106,7 @@ boot_ssl() -> {ok, []} -> ok; {ok, SslListeners} -> - ok = rabbit_misc:start_applications([crypto, ssl]), + ok = rabbit_misc:start_applications([crypto, public_key, ssl]), {ok, SslOpts} = application:get_env(ssl_options), [start_ssl_listener(Host, Port, SslOpts) || {Host, Port} <- SslListeners], ok diff --git a/src/rabbit_persister.erl b/src/rabbit_persister.erl index 3cd42e47..a427b135 100644 --- a/src/rabbit_persister.erl +++ b/src/rabbit_persister.erl @@ -65,21 +65,29 @@ -ifdef(use_specs). --type(pmsg() :: {queue_name(), pkey()}). +-type(pkey() :: rabbit_guid:guid()). +-type(pmsg() :: {rabbit_amqqueue:name(), pkey()}). + -type(work_item() :: - {publish, message(), pmsg()} | + {publish, rabbit_types:message(), pmsg()} | {deliver, pmsg()} | {ack, pmsg()}). --spec(start_link/1 :: ([queue_name()]) -> - {'ok', pid()} | 'ignore' | {'error', any()}). +-spec(start_link/1 :: + ([rabbit_amqqueue:name()]) + -> 'ignore' | rabbit_types:ok_or_error2(pid(), any())). -spec(transaction/1 :: ([work_item()]) -> 'ok'). --spec(extend_transaction/2 :: ({txn(), queue_name()}, [work_item()]) -> 'ok'). +-spec(extend_transaction/2 :: + ({rabbit_types:txn(), rabbit_amqqueue:name()}, [work_item()]) + -> 'ok'). -spec(dirty_work/1 :: ([work_item()]) -> 'ok'). --spec(commit_transaction/1 :: ({txn(), queue_name()}) -> 'ok'). --spec(rollback_transaction/1 :: ({txn(), queue_name()}) -> 'ok'). +-spec(commit_transaction/1 :: + ({rabbit_types:txn(), rabbit_amqqueue:name()}) -> 'ok'). +-spec(rollback_transaction/1 :: + ({rabbit_types:txn(), rabbit_amqqueue:name()}) -> 'ok'). -spec(force_snapshot/0 :: () -> 'ok'). --spec(queue_content/1 :: (queue_name()) -> [{message(), boolean()}]). +-spec(queue_content/1 :: + (rabbit_amqqueue:name()) -> [{rabbit_types:message(), boolean()}]). -endif. @@ -236,7 +244,7 @@ log_work(CreateWorkUnit, MessageList, snapshot = Snapshot = #psnapshot{messages = Messages}}) -> Unit = CreateWorkUnit( rabbit_misc:map_in_order( - fun(M = {publish, Message, QK = {_QName, PKey}}) -> + fun (M = {publish, Message, QK = {_QName, PKey}}) -> case ets:lookup(Messages, PKey) of [_] -> {tied, QK}; [] -> ets:insert(Messages, {PKey, Message}), diff --git a/src/rabbit_plugin_activator.erl b/src/rabbit_plugin_activator.erl index 274981ef..ef3c5cc2 100644 --- a/src/rabbit_plugin_activator.erl +++ b/src/rabbit_plugin_activator.erl @@ -108,6 +108,7 @@ start() -> WApp == stdlib; WApp == kernel; WApp == sasl; + WApp == crypto; WApp == os_mon -> false; _ -> true end]), diff --git a/src/rabbit_queue_collector.erl b/src/rabbit_queue_collector.erl new file mode 100644 index 00000000..ea3768d4 --- /dev/null +++ b/src/rabbit_queue_collector.erl @@ -0,0 +1,106 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developers of the Original Code are LShift Ltd, +%% Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd, +%% Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd +%% are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial +%% Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift +%% Ltd. Portions created by Cohesive Financial Technologies LLC are +%% Copyright (C) 2007-2010 Cohesive Financial Technologies +%% LLC. Portions created by Rabbit Technologies Ltd are Copyright +%% (C) 2007-2010 Rabbit Technologies Ltd. +%% +%% All Rights Reserved. +%% +%% Contributor(s): ______________________________________. +%% + +-module(rabbit_queue_collector). + +-behaviour(gen_server). + +-export([start_link/0, register/2, delete_all/1, shutdown/1]). + +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). + +-record(state, {queues}). + +-include("rabbit.hrl"). + +%%---------------------------------------------------------------------------- + +-ifdef(use_specs). + +-spec(start_link/0 :: () -> rabbit_types:ok(pid())). +-spec(register/2 :: (pid(), rabbit_types:amqqueue()) -> 'ok'). +-spec(delete_all/1 :: (pid()) -> 'ok'). + +-endif. + +%%---------------------------------------------------------------------------- + +start_link() -> + gen_server:start_link(?MODULE, [], []). + +register(CollectorPid, Q) -> + gen_server:call(CollectorPid, {register, Q}, infinity). + +delete_all(CollectorPid) -> + gen_server:call(CollectorPid, delete_all, infinity). + +shutdown(CollectorPid) -> + gen_server:call(CollectorPid, shutdown, infinity). + +%%---------------------------------------------------------------------------- + +init([]) -> + {ok, #state{queues = dict:new()}}. + +%%-------------------------------------------------------------------------- + +handle_call({register, Q}, _From, + State = #state{queues = Queues}) -> + MonitorRef = erlang:monitor(process, Q#amqqueue.pid), + {reply, ok, + State#state{queues = dict:store(MonitorRef, Q, Queues)}}; + +handle_call(delete_all, _From, State = #state{queues = Queues}) -> + [rabbit_misc:with_exit_handler( + fun () -> ok end, + fun () -> + erlang:demonitor(MonitorRef), + rabbit_amqqueue:delete(Q, false, false) + end) + || {MonitorRef, Q} <- dict:to_list(Queues)], + {reply, ok, State}; + +handle_call(shutdown, _From, State) -> + {stop, normal, ok, State}. + +handle_cast(_Msg, State) -> + {noreply, State}. + +handle_info({'DOWN', MonitorRef, process, _DownPid, _Reason}, + State = #state{queues = Queues}) -> + {noreply, State#state{queues = dict:erase(MonitorRef, Queues)}}. + +terminate(_Reason, _State) -> + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl new file mode 100644 index 00000000..d6b8bb28 --- /dev/null +++ b/src/rabbit_queue_index.erl @@ -0,0 +1,932 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developers of the Original Code are LShift Ltd, +%% Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd, +%% Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd +%% are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial +%% Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift +%% Ltd. Portions created by Cohesive Financial Technologies LLC are +%% Copyright (C) 2007-2010 Cohesive Financial Technologies +%% LLC. Portions created by Rabbit Technologies Ltd are Copyright +%% (C) 2007-2010 Rabbit Technologies Ltd. +%% +%% All Rights Reserved. +%% +%% Contributor(s): ______________________________________. +%% + +-module(rabbit_queue_index). + +-export([init/4, terminate/2, delete_and_terminate/1, publish/4, + deliver/2, ack/2, sync/2, flush/1, read/3, + next_segment_boundary/1, bounds/1, recover/1]). + +-define(CLEAN_FILENAME, "clean.dot"). + +%%---------------------------------------------------------------------------- + +%% The queue index is responsible for recording the order of messages +%% within a queue on disk. +%% +%% Because of the fact that the queue can decide at any point to send +%% a queue entry to disk, you can not rely on publishes appearing in +%% order. The only thing you can rely on is a message being published, +%% then delivered, then ack'd. +%% +%% In order to be able to clean up ack'd messages, we write to segment +%% files. These files have a fixed maximum size: ?SEGMENT_ENTRY_COUNT +%% publishes, delivers and acknowledgements. They are numbered, and so +%% it is known that the 0th segment contains messages 0 -> +%% ?SEGMENT_ENTRY_COUNT - 1, the 1st segment contains messages +%% ?SEGMENT_ENTRY_COUNT -> 2*?SEGMENT_ENTRY_COUNT - 1 and so on. As +%% such, in the segment files, we only refer to message sequence ids +%% by the LSBs as SeqId rem ?SEGMENT_ENTRY_COUNT. This gives them a +%% fixed size. +%% +%% However, transient messages which are not sent to disk at any point +%% will cause gaps to appear in segment files. Therefore, we delete a +%% segment file whenever the number of publishes == number of acks +%% (note that although it is not fully enforced, it is assumed that a +%% message will never be ackd before it is delivered, thus this test +%% also implies == number of delivers). In practise, this does not +%% cause disk churn in the pathological case because of the journal +%% and caching (see below). +%% +%% Because of the fact that publishes, delivers and acks can occur all +%% over, we wish to avoid lots of seeking. Therefore we have a fixed +%% sized journal to which all actions are appended. When the number of +%% entries in this journal reaches max_journal_entries, the journal +%% entries are scattered out to their relevant files, and the journal +%% is truncated to zero size. Note that entries in the journal must +%% carry the full sequence id, thus the format of entries in the +%% journal is different to that in the segments. +%% +%% The journal is also kept fully in memory, pre-segmented: the state +%% contains a mapping from segment numbers to state-per-segment (this +%% state is held for all segments which have been "seen": thus a +%% segment which has been read but has no pending entries in the +%% journal is still held in this mapping. Also note that a dict is +%% used for this mapping, not an array because with an array, you will +%% always have entries from 0). Actions are stored directly in this +%% state. Thus at the point of flushing the journal, firstly no +%% reading from disk is necessary, but secondly if the known number of +%% acks and publishes in a segment are equal, given the known state of +%% the segment file combined with the journal, no writing needs to be +%% done to the segment file either (in fact it is deleted if it exists +%% at all). This is safe given that the set of acks is a subset of the +%% set of publishes. When it's necessary to sync messages because of +%% transactions, it's only necessary to fsync on the journal: when +%% entries are distributed from the journal to segment files, those +%% segments appended to are fsync'd prior to the journal being +%% truncated. +%% +%% This module is also responsible for scanning the queue index files +%% and seeding the message store on start up. +%% +%% Note that in general, the representation of a message's state as +%% the tuple: {('no_pub'|{Guid, IsPersistent}), ('del'|'no_del'), +%% ('ack'|'no_ack')} is richer than strictly necessary for most +%% operations. However, for startup, and to ensure the safe and +%% correct combination of journal entries with entries read from the +%% segment on disk, this richer representation vastly simplifies and +%% clarifies the code. +%% +%% For notes on Clean Shutdown and startup, see documentation in +%% variable_queue. +%% +%%---------------------------------------------------------------------------- + +%% ---- Journal details ---- + +-define(JOURNAL_FILENAME, "journal.jif"). + +-define(PUB_PERSIST_JPREFIX, 2#00). +-define(PUB_TRANS_JPREFIX, 2#01). +-define(DEL_JPREFIX, 2#10). +-define(ACK_JPREFIX, 2#11). +-define(JPREFIX_BITS, 2). +-define(SEQ_BYTES, 8). +-define(SEQ_BITS, ((?SEQ_BYTES * 8) - ?JPREFIX_BITS)). + +%% ---- Segment details ---- + +-define(SEGMENT_EXTENSION, ".idx"). + +%% TODO: The segment size would be configurable, but deriving all the +%% other values is quite hairy and quite possibly noticably less +%% efficient, depending on how clever the compiler is when it comes to +%% binary generation/matching with constant vs variable lengths. + +-define(REL_SEQ_BITS, 14). +-define(SEGMENT_ENTRY_COUNT, 16384). %% trunc(math:pow(2,?REL_SEQ_BITS))). + +%% seq only is binary 00 followed by 14 bits of rel seq id +%% (range: 0 - 16383) +-define(REL_SEQ_ONLY_PREFIX, 00). +-define(REL_SEQ_ONLY_PREFIX_BITS, 2). +-define(REL_SEQ_ONLY_ENTRY_LENGTH_BYTES, 2). + +%% publish record is binary 1 followed by a bit for is_persistent, +%% then 14 bits of rel seq id, and 128 bits of md5sum msg id +-define(PUBLISH_PREFIX, 1). +-define(PUBLISH_PREFIX_BITS, 1). + +-define(GUID_BYTES, 16). %% md5sum is 128 bit or 16 bytes +-define(GUID_BITS, (?GUID_BYTES * 8)). +%% 16 bytes for md5sum + 2 for seq, bits and prefix +-define(PUBLISH_RECORD_LENGTH_BYTES, ?GUID_BYTES + 2). + +%% 1 publish, 1 deliver, 1 ack per msg +-define(SEGMENT_TOTAL_SIZE, ?SEGMENT_ENTRY_COUNT * + (?PUBLISH_RECORD_LENGTH_BYTES + + (2 * ?REL_SEQ_ONLY_ENTRY_LENGTH_BYTES))). + +%% ---- misc ---- + +-define(PUB, {_, _}). %% {Guid, IsPersistent} + +-define(READ_MODE, [binary, raw, read, {read_ahead, ?SEGMENT_TOTAL_SIZE}]). + +%%---------------------------------------------------------------------------- + +-record(qistate, { dir, segments, journal_handle, dirty_count, + max_journal_entries }). + +-record(segment, { num, path, journal_entries, unacked }). + +-include("rabbit.hrl"). + +%%---------------------------------------------------------------------------- + +-ifdef(use_specs). + +-type(hdl() :: ('undefined' | any())). +-type(segment() :: ('undefined' | + #segment { num :: non_neg_integer(), + path :: file:filename(), + journal_entries :: array(), + unacked :: non_neg_integer() + })). +-type(seq_id() :: integer()). +-type(seg_dict() :: {dict:dictionary(), [segment()]}). +-type(qistate() :: #qistate { dir :: file:filename(), + segments :: 'undefined' | seg_dict(), + journal_handle :: hdl(), + dirty_count :: integer(), + max_journal_entries :: non_neg_integer() + }). +-type(startup_fun_state() :: + {(fun ((A) -> 'finished' | {rabbit_guid:guid(), non_neg_integer(), A})), + A}). + +-spec(init/4 :: (rabbit_amqqueue:name(), boolean(), boolean(), + fun ((rabbit_guid:guid()) -> boolean())) -> + {'undefined' | non_neg_integer(), [any()], qistate()}). +-spec(terminate/2 :: ([any()], qistate()) -> qistate()). +-spec(delete_and_terminate/1 :: (qistate()) -> qistate()). +-spec(publish/4 :: (rabbit_guid:guid(), seq_id(), boolean(), qistate()) -> + qistate()). +-spec(deliver/2 :: ([seq_id()], qistate()) -> qistate()). +-spec(ack/2 :: ([seq_id()], qistate()) -> qistate()). +-spec(sync/2 :: ([seq_id()], qistate()) -> qistate()). +-spec(flush/1 :: (qistate()) -> qistate()). +-spec(read/3 :: (seq_id(), seq_id(), qistate()) -> + {[{rabbit_guid:guid(), seq_id(), boolean(), boolean()}], + qistate()}). +-spec(next_segment_boundary/1 :: (seq_id()) -> seq_id()). +-spec(bounds/1 :: (qistate()) -> + {non_neg_integer(), non_neg_integer(), qistate()}). +-spec(recover/1 :: + ([rabbit_amqqueue:name()]) -> {[[any()]], startup_fun_state()}). + +-endif. + + +%%---------------------------------------------------------------------------- +%% public API +%%---------------------------------------------------------------------------- + +init(Name, Recover, MsgStoreRecovered, ContainsCheckFun) -> + State = #qistate { dir = Dir } = blank_state(Name, not Recover), + Terms = case read_shutdown_terms(Dir) of + {error, _} -> []; + {ok, Terms1} -> Terms1 + end, + CleanShutdown = detect_clean_shutdown(Dir), + {Count, State1} = + case CleanShutdown andalso MsgStoreRecovered of + true -> RecoveredCounts = proplists:get_value(segments, Terms, []), + init_clean(RecoveredCounts, State); + false -> init_dirty(CleanShutdown, ContainsCheckFun, State) + end, + {Count, Terms, State1}. + +terminate(Terms, State) -> + {SegmentCounts, State1 = #qistate { dir = Dir }} = terminate(State), + store_clean_shutdown([{segments, SegmentCounts} | Terms], Dir), + State1. + +delete_and_terminate(State) -> + {_SegmentCounts, State1 = #qistate { dir = Dir }} = terminate(State), + ok = rabbit_misc:recursive_delete([Dir]), + State1. + +publish(Guid, SeqId, IsPersistent, State) when is_binary(Guid) -> + ?GUID_BYTES = size(Guid), + {JournalHdl, State1} = get_journal_handle(State), + ok = file_handle_cache:append( + JournalHdl, [<<(case IsPersistent of + true -> ?PUB_PERSIST_JPREFIX; + false -> ?PUB_TRANS_JPREFIX + end):?JPREFIX_BITS, SeqId:?SEQ_BITS>>, Guid]), + maybe_flush_journal(add_to_journal(SeqId, {Guid, IsPersistent}, State1)). + +deliver(SeqIds, State) -> + deliver_or_ack(del, SeqIds, State). + +ack(SeqIds, State) -> + deliver_or_ack(ack, SeqIds, State). + +sync([], State) -> + State; +sync(_SeqIds, State = #qistate { journal_handle = undefined }) -> + State; +sync(_SeqIds, State = #qistate { journal_handle = JournalHdl }) -> + %% The SeqIds here contains the SeqId of every publish and ack in + %% the transaction. Ideally we should go through these seqids and + %% only sync the journal if the pubs or acks appear in the + %% journal. However, this would be complex to do, and given that + %% the variable queue publishes and acks to the qi, and then + %% syncs, all in one operation, there is no possibility of the + %% seqids not being in the journal, provided the transaction isn't + %% emptied (handled above anyway). + ok = file_handle_cache:sync(JournalHdl), + State. + +flush(State = #qistate { dirty_count = 0 }) -> State; +flush(State) -> flush_journal(State). + +read(StartEnd, StartEnd, State) -> + {[], State}; +read(Start, End, State = #qistate { segments = Segments, + dir = Dir }) when Start =< End -> + %% Start is inclusive, End is exclusive. + LowerB = {StartSeg, _StartRelSeq} = seq_id_to_seg_and_rel_seq_id(Start), + UpperB = {EndSeg, _EndRelSeq} = seq_id_to_seg_and_rel_seq_id(End - 1), + {Messages, Segments1} = + lists:foldr(fun (Seg, Acc) -> + read_bounded_segment(Seg, LowerB, UpperB, Acc, Dir) + end, {[], Segments}, lists:seq(StartSeg, EndSeg)), + {Messages, State #qistate { segments = Segments1 }}. + +next_segment_boundary(SeqId) -> + {Seg, _RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId), + reconstruct_seq_id(Seg + 1, 0). + +bounds(State = #qistate { segments = Segments }) -> + %% This is not particularly efficient, but only gets invoked on + %% queue initialisation. + SegNums = lists:sort(segment_nums(Segments)), + %% Don't bother trying to figure out the lowest seq_id, merely the + %% seq_id of the start of the lowest segment. That seq_id may not + %% actually exist, but that's fine. The important thing is that + %% the segment exists and the seq_id reported is on a segment + %% boundary. + %% + %% We also don't really care about the max seq_id. Just start the + %% next segment: it makes life much easier. + %% + %% SegNums is sorted, ascending. + {LowSeqId, NextSeqId} = + case SegNums of + [] -> {0, 0}; + [MinSeg|_] -> {reconstruct_seq_id(MinSeg, 0), + reconstruct_seq_id(1 + lists:last(SegNums), 0)} + end, + {LowSeqId, NextSeqId, State}. + +recover(DurableQueues) -> + DurableDict = dict:from_list([ {queue_name_to_dir_name(Queue), Queue} || + Queue <- DurableQueues ]), + QueuesDir = queues_dir(), + Directories = case file:list_dir(QueuesDir) of + {ok, Entries} -> [ Entry || Entry <- Entries, + filelib:is_dir( + filename:join( + QueuesDir, Entry)) ]; + {error, enoent} -> [] + end, + DurableDirectories = sets:from_list(dict:fetch_keys(DurableDict)), + {DurableQueueNames, DurableTerms} = + lists:foldl( + fun (QueueDir, {DurableAcc, TermsAcc}) -> + case sets:is_element(QueueDir, DurableDirectories) of + true -> + TermsAcc1 = + case read_shutdown_terms( + filename:join(QueuesDir, QueueDir)) of + {error, _} -> TermsAcc; + {ok, Terms} -> [Terms | TermsAcc] + end, + {[dict:fetch(QueueDir, DurableDict) | DurableAcc], + TermsAcc1}; + false -> + Dir = filename:join(queues_dir(), QueueDir), + ok = rabbit_misc:recursive_delete([Dir]), + {DurableAcc, TermsAcc} + end + end, {[], []}, Directories), + {DurableTerms, {fun queue_index_walker/1, {start, DurableQueueNames}}}. + +%%---------------------------------------------------------------------------- +%% startup and shutdown +%%---------------------------------------------------------------------------- + +blank_state(QueueName, EnsureFresh) -> + StrName = queue_name_to_dir_name(QueueName), + Dir = filename:join(queues_dir(), StrName), + ok = case EnsureFresh of + true -> false = filelib:is_file(Dir), %% is_file == is file or dir + ok; + false -> ok + end, + ok = filelib:ensure_dir(filename:join(Dir, "nothing")), + {ok, MaxJournal} = + application:get_env(rabbit, queue_index_max_journal_entries), + #qistate { dir = Dir, + segments = segments_new(), + journal_handle = undefined, + dirty_count = 0, + max_journal_entries = MaxJournal }. + +detect_clean_shutdown(Dir) -> + case file:delete(filename:join(Dir, ?CLEAN_FILENAME)) of + ok -> true; + {error, enoent} -> false + end. + +read_shutdown_terms(Dir) -> + rabbit_misc:read_term_file(filename:join(Dir, ?CLEAN_FILENAME)). + +store_clean_shutdown(Terms, Dir) -> + rabbit_misc:write_term_file(filename:join(Dir, ?CLEAN_FILENAME), Terms). + +init_clean(RecoveredCounts, State) -> + %% Load the journal. Since this is a clean recovery this (almost) + %% gets us back to where we were on shutdown. + State1 = #qistate { dir = Dir, segments = Segments } = load_journal(State), + %% The journal loading only creates records for segments touched + %% by the journal, and the counts are based on the journal entries + %% only. We need *complete* counts for *all* segments. By an + %% amazing coincidence we stored that information on shutdown. + Segments1 = + lists:foldl( + fun ({Seg, UnackedCount}, SegmentsN) -> + Segment = segment_find_or_new(Seg, Dir, SegmentsN), + segment_store(Segment #segment { unacked = UnackedCount }, + SegmentsN) + end, Segments, RecoveredCounts), + %% the counts above include transient messages, which would be the + %% wrong thing to return + {undefined, State1 # qistate { segments = Segments1 }}. + +init_dirty(CleanShutdown, ContainsCheckFun, State) -> + %% Recover the journal completely. This will also load segments + %% which have entries in the journal and remove duplicates. The + %% counts will correctly reflect the combination of the segment + %% and the journal. + State1 = #qistate { dir = Dir, segments = Segments } = + recover_journal(State), + {Segments1, Count} = + %% Load each segment in turn and filter out messages that are + %% not in the msg_store, by adding acks to the journal. These + %% acks only go to the RAM journal as it doesn't matter if we + %% lose them. Also mark delivered if not clean shutdown. Also + %% find the number of unacked messages. + lists:foldl( + fun (Seg, {Segments2, CountAcc}) -> + Segment = #segment { unacked = UnackedCount } = + recover_segment(ContainsCheckFun, CleanShutdown, + segment_find_or_new(Seg, Dir, Segments2)), + {segment_store(Segment, Segments2), CountAcc + UnackedCount} + end, {Segments, 0}, all_segment_nums(State1)), + %% Unconditionally flush since the dirty_count doesn't get updated + %% by the above foldl. + State2 = flush_journal(State1 #qistate { segments = Segments1 }), + {Count, State2}. + +terminate(State = #qistate { journal_handle = JournalHdl, + segments = Segments }) -> + ok = case JournalHdl of + undefined -> ok; + _ -> file_handle_cache:close(JournalHdl) + end, + SegmentCounts = + segment_fold( + fun (#segment { num = Seg, unacked = UnackedCount }, Acc) -> + [{Seg, UnackedCount} | Acc] + end, [], Segments), + {SegmentCounts, State #qistate { journal_handle = undefined, + segments = undefined }}. + +recover_segment(ContainsCheckFun, CleanShutdown, + Segment = #segment { journal_entries = JEntries }) -> + {SegEntries, UnackedCount} = load_segment(false, Segment), + {SegEntries1, UnackedCountDelta} = + segment_plus_journal(SegEntries, JEntries), + array:sparse_foldl( + fun (RelSeq, {{Guid, _IsPersistent}, Del, no_ack}, Segment1) -> + recover_message(ContainsCheckFun(Guid), CleanShutdown, + Del, RelSeq, Segment1) + end, + Segment #segment { unacked = UnackedCount + UnackedCountDelta }, + SegEntries1). + +recover_message( true, true, _Del, _RelSeq, Segment) -> + Segment; +recover_message( true, false, del, _RelSeq, Segment) -> + Segment; +recover_message( true, false, no_del, RelSeq, Segment) -> + add_to_journal(RelSeq, del, Segment); +recover_message(false, _, del, RelSeq, Segment) -> + add_to_journal(RelSeq, ack, Segment); +recover_message(false, _, no_del, RelSeq, Segment) -> + add_to_journal(RelSeq, ack, add_to_journal(RelSeq, del, Segment)). + +queue_name_to_dir_name(Name = #resource { kind = queue }) -> + <<Num:128>> = erlang:md5(term_to_binary(Name)), + lists:flatten(io_lib:format("~.36B", [Num])). + +queues_dir() -> + filename:join(rabbit_mnesia:dir(), "queues"). + +%%---------------------------------------------------------------------------- +%% msg store startup delta function +%%---------------------------------------------------------------------------- + +queue_index_walker({start, DurableQueues}) when is_list(DurableQueues) -> + {ok, Gatherer} = gatherer:start_link(), + [begin + ok = gatherer:fork(Gatherer), + ok = worker_pool:submit_async( + fun () -> queue_index_walker_reader(QueueName, Gatherer) + end) + end || QueueName <- DurableQueues], + queue_index_walker({next, Gatherer}); + +queue_index_walker({next, Gatherer}) when is_pid(Gatherer) -> + case gatherer:out(Gatherer) of + empty -> + ok = gatherer:stop(Gatherer), + ok = rabbit_misc:unlink_and_capture_exit(Gatherer), + finished; + {value, {Guid, Count}} -> + {Guid, Count, {next, Gatherer}} + end. + +queue_index_walker_reader(QueueName, Gatherer) -> + State = #qistate { segments = Segments, dir = Dir } = + recover_journal(blank_state(QueueName, false)), + [ok = segment_entries_foldr( + fun (_RelSeq, {{Guid, true}, _IsDelivered, no_ack}, ok) -> + gatherer:in(Gatherer, {Guid, 1}); + (_RelSeq, _Value, Acc) -> + Acc + end, ok, segment_find_or_new(Seg, Dir, Segments)) || + Seg <- all_segment_nums(State)], + {_SegmentCounts, _State} = terminate(State), + ok = gatherer:finish(Gatherer). + +%%---------------------------------------------------------------------------- +%% journal manipulation +%%---------------------------------------------------------------------------- + +add_to_journal(SeqId, Action, State = #qistate { dirty_count = DCount, + segments = Segments, + dir = Dir }) -> + {Seg, RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId), + Segment = segment_find_or_new(Seg, Dir, Segments), + Segment1 = add_to_journal(RelSeq, Action, Segment), + State #qistate { dirty_count = DCount + 1, + segments = segment_store(Segment1, Segments) }; + +add_to_journal(RelSeq, Action, + Segment = #segment { journal_entries = JEntries, + unacked = UnackedCount }) -> + Segment1 = Segment #segment { + journal_entries = add_to_journal(RelSeq, Action, JEntries) }, + case Action of + del -> Segment1; + ack -> Segment1 #segment { unacked = UnackedCount - 1 }; + ?PUB -> Segment1 #segment { unacked = UnackedCount + 1 } + end; + +add_to_journal(RelSeq, Action, JEntries) -> + Val = case array:get(RelSeq, JEntries) of + undefined -> + case Action of + ?PUB -> {Action, no_del, no_ack}; + del -> {no_pub, del, no_ack}; + ack -> {no_pub, no_del, ack} + end; + ({Pub, no_del, no_ack}) when Action == del -> + {Pub, del, no_ack}; + ({Pub, Del, no_ack}) when Action == ack -> + {Pub, Del, ack} + end, + array:set(RelSeq, Val, JEntries). + +maybe_flush_journal(State = #qistate { dirty_count = DCount, + max_journal_entries = MaxJournal }) + when DCount > MaxJournal -> + flush_journal(State); +maybe_flush_journal(State) -> + State. + +flush_journal(State = #qistate { segments = Segments }) -> + Segments1 = + segment_fold( + fun (#segment { unacked = 0, path = Path }, SegmentsN) -> + case filelib:is_file(Path) of + true -> ok = file:delete(Path); + false -> ok + end, + SegmentsN; + (#segment {} = Segment, SegmentsN) -> + segment_store(append_journal_to_segment(Segment), SegmentsN) + end, segments_new(), Segments), + {JournalHdl, State1} = + get_journal_handle(State #qistate { segments = Segments1 }), + ok = file_handle_cache:clear(JournalHdl), + State1 #qistate { dirty_count = 0 }. + +append_journal_to_segment(#segment { journal_entries = JEntries, + path = Path } = Segment) -> + case array:sparse_size(JEntries) of + 0 -> Segment; + _ -> {ok, Hdl} = file_handle_cache:open(Path, [write | ?READ_MODE], + [{write_buffer, infinity}]), + array:sparse_foldl(fun write_entry_to_segment/3, Hdl, JEntries), + ok = file_handle_cache:close(Hdl), + Segment #segment { journal_entries = array_new() } + end. + +get_journal_handle(State = #qistate { journal_handle = undefined, + dir = Dir }) -> + Path = filename:join(Dir, ?JOURNAL_FILENAME), + {ok, Hdl} = file_handle_cache:open(Path, [write | ?READ_MODE], + [{write_buffer, infinity}]), + {Hdl, State #qistate { journal_handle = Hdl }}; +get_journal_handle(State = #qistate { journal_handle = Hdl }) -> + {Hdl, State}. + +%% Loading Journal. This isn't idempotent and will mess up the counts +%% if you call it more than once on the same state. Assumes the counts +%% are 0 to start with. +load_journal(State) -> + {JournalHdl, State1} = get_journal_handle(State), + {ok, 0} = file_handle_cache:position(JournalHdl, 0), + load_journal_entries(State1). + +%% ditto +recover_journal(State) -> + State1 = #qistate { segments = Segments } = load_journal(State), + Segments1 = + segment_map( + fun (Segment = #segment { journal_entries = JEntries, + unacked = UnackedCountInJournal }) -> + %% We want to keep ack'd entries in so that we can + %% remove them if duplicates are in the journal. The + %% counts here are purely from the segment itself. + {SegEntries, UnackedCountInSeg} = load_segment(true, Segment), + {JEntries1, UnackedCountDuplicates} = + journal_minus_segment(JEntries, SegEntries), + Segment #segment { journal_entries = JEntries1, + unacked = (UnackedCountInJournal + + UnackedCountInSeg - + UnackedCountDuplicates) } + end, Segments), + State1 #qistate { segments = Segments1 }. + +load_journal_entries(State = #qistate { journal_handle = Hdl }) -> + case file_handle_cache:read(Hdl, ?SEQ_BYTES) of + {ok, <<Prefix:?JPREFIX_BITS, SeqId:?SEQ_BITS>>} -> + case Prefix of + ?DEL_JPREFIX -> + load_journal_entries(add_to_journal(SeqId, del, State)); + ?ACK_JPREFIX -> + load_journal_entries(add_to_journal(SeqId, ack, State)); + _ -> + case file_handle_cache:read(Hdl, ?GUID_BYTES) of + {ok, <<GuidNum:?GUID_BITS>>} -> + %% work around for binary data + %% fragmentation. See + %% rabbit_msg_file:read_next/2 + <<Guid:?GUID_BYTES/binary>> = + <<GuidNum:?GUID_BITS>>, + Publish = {Guid, case Prefix of + ?PUB_PERSIST_JPREFIX -> true; + ?PUB_TRANS_JPREFIX -> false + end}, + load_journal_entries( + add_to_journal(SeqId, Publish, State)); + _ErrOrEoF -> %% err, we've lost at least a publish + State + end + end; + _ErrOrEoF -> State + end. + +deliver_or_ack(_Kind, [], State) -> + State; +deliver_or_ack(Kind, SeqIds, State) -> + JPrefix = case Kind of ack -> ?ACK_JPREFIX; del -> ?DEL_JPREFIX end, + {JournalHdl, State1} = get_journal_handle(State), + ok = file_handle_cache:append( + JournalHdl, + [<<JPrefix:?JPREFIX_BITS, SeqId:?SEQ_BITS>> || SeqId <- SeqIds]), + maybe_flush_journal(lists:foldl(fun (SeqId, StateN) -> + add_to_journal(SeqId, Kind, StateN) + end, State1, SeqIds)). + +%%---------------------------------------------------------------------------- +%% segment manipulation +%%---------------------------------------------------------------------------- + +seq_id_to_seg_and_rel_seq_id(SeqId) -> + { SeqId div ?SEGMENT_ENTRY_COUNT, SeqId rem ?SEGMENT_ENTRY_COUNT }. + +reconstruct_seq_id(Seg, RelSeq) -> + (Seg * ?SEGMENT_ENTRY_COUNT) + RelSeq. + +all_segment_nums(#qistate { dir = Dir, segments = Segments }) -> + lists:sort( + sets:to_list( + lists:foldl( + fun (SegName, Set) -> + sets:add_element( + list_to_integer( + lists:takewhile(fun (C) -> $0 =< C andalso C =< $9 end, + SegName)), Set) + end, sets:from_list(segment_nums(Segments)), + filelib:wildcard("*" ++ ?SEGMENT_EXTENSION, Dir)))). + +segment_find_or_new(Seg, Dir, Segments) -> + case segment_find(Seg, Segments) of + {ok, Segment} -> Segment; + error -> SegName = integer_to_list(Seg) ++ ?SEGMENT_EXTENSION, + Path = filename:join(Dir, SegName), + #segment { num = Seg, + path = Path, + journal_entries = array_new(), + unacked = 0 } + end. + +segment_find(Seg, {_Segments, [Segment = #segment { num = Seg } |_]}) -> + {ok, Segment}; %% 1 or (2, matches head) +segment_find(Seg, {_Segments, [_, Segment = #segment { num = Seg }]}) -> + {ok, Segment}; %% 2, matches tail +segment_find(Seg, {Segments, _}) -> %% no match + dict:find(Seg, Segments). + +segment_store(Segment = #segment { num = Seg }, %% 1 or (2, matches head) + {Segments, [#segment { num = Seg } | Tail]}) -> + {Segments, [Segment | Tail]}; +segment_store(Segment = #segment { num = Seg }, %% 2, matches tail + {Segments, [SegmentA, #segment { num = Seg }]}) -> + {Segments, [Segment, SegmentA]}; +segment_store(Segment = #segment { num = Seg }, {Segments, []}) -> + {dict:erase(Seg, Segments), [Segment]}; +segment_store(Segment = #segment { num = Seg }, {Segments, [SegmentA]}) -> + {dict:erase(Seg, Segments), [Segment, SegmentA]}; +segment_store(Segment = #segment { num = Seg }, + {Segments, [SegmentA, SegmentB]}) -> + {dict:store(SegmentB#segment.num, SegmentB, dict:erase(Seg, Segments)), + [Segment, SegmentA]}. + +segment_fold(Fun, Acc, {Segments, CachedSegments}) -> + dict:fold(fun (_Seg, Segment, Acc1) -> Fun(Segment, Acc1) end, + lists:foldl(Fun, Acc, CachedSegments), Segments). + +segment_map(Fun, {Segments, CachedSegments}) -> + {dict:map(fun (_Seg, Segment) -> Fun(Segment) end, Segments), + lists:map(Fun, CachedSegments)}. + +segment_nums({Segments, CachedSegments}) -> + lists:map(fun (#segment { num = Num }) -> Num end, CachedSegments) ++ + dict:fetch_keys(Segments). + +segments_new() -> + {dict:new(), []}. + +write_entry_to_segment(_RelSeq, {?PUB, del, ack}, Hdl) -> + Hdl; +write_entry_to_segment(RelSeq, {Pub, Del, Ack}, Hdl) -> + ok = case Pub of + no_pub -> + ok; + {Guid, IsPersistent} -> + file_handle_cache:append( + Hdl, [<<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS, + (bool_to_int(IsPersistent)):1, + RelSeq:?REL_SEQ_BITS>>, Guid]) + end, + ok = case {Del, Ack} of + {no_del, no_ack} -> + ok; + _ -> + Binary = <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS, + RelSeq:?REL_SEQ_BITS>>, + file_handle_cache:append( + Hdl, case {Del, Ack} of + {del, ack} -> [Binary, Binary]; + _ -> Binary + end) + end, + Hdl. + +read_bounded_segment(Seg, {StartSeg, StartRelSeq}, {EndSeg, EndRelSeq}, + {Messages, Segments}, Dir) -> + Segment = segment_find_or_new(Seg, Dir, Segments), + {segment_entries_foldr( + fun (RelSeq, {{Guid, IsPersistent}, IsDelivered, no_ack}, Acc) + when (Seg > StartSeg orelse StartRelSeq =< RelSeq) andalso + (Seg < EndSeg orelse EndRelSeq >= RelSeq) -> + [ {Guid, reconstruct_seq_id(StartSeg, RelSeq), + IsPersistent, IsDelivered == del} | Acc ]; + (_RelSeq, _Value, Acc) -> + Acc + end, Messages, Segment), + segment_store(Segment, Segments)}. + +segment_entries_foldr(Fun, Init, + Segment = #segment { journal_entries = JEntries }) -> + {SegEntries, _UnackedCount} = load_segment(false, Segment), + {SegEntries1, _UnackedCountD} = segment_plus_journal(SegEntries, JEntries), + array:sparse_foldr(Fun, Init, SegEntries1). + +%% Loading segments +%% +%% Does not do any combining with the journal at all. +load_segment(KeepAcked, #segment { path = Path }) -> + case filelib:is_file(Path) of + false -> {array_new(), 0}; + true -> {ok, Hdl} = file_handle_cache:open(Path, ?READ_MODE, []), + {ok, 0} = file_handle_cache:position(Hdl, bof), + Res = load_segment_entries(KeepAcked, Hdl, array_new(), 0), + ok = file_handle_cache:close(Hdl), + Res + end. + +load_segment_entries(KeepAcked, Hdl, SegEntries, UnackedCount) -> + case file_handle_cache:read(Hdl, ?REL_SEQ_ONLY_ENTRY_LENGTH_BYTES) of + {ok, <<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS, + IsPersistentNum:1, RelSeq:?REL_SEQ_BITS>>} -> + %% because we specify /binary, and binaries are complete + %% bytes, the size spec is in bytes, not bits. + {ok, Guid} = file_handle_cache:read(Hdl, ?GUID_BYTES), + Obj = {{Guid, 1 == IsPersistentNum}, no_del, no_ack}, + SegEntries1 = array:set(RelSeq, Obj, SegEntries), + load_segment_entries(KeepAcked, Hdl, SegEntries1, + UnackedCount + 1); + {ok, <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS, + RelSeq:?REL_SEQ_BITS>>} -> + {UnackedCountDelta, SegEntries1} = + case array:get(RelSeq, SegEntries) of + {Pub, no_del, no_ack} -> + { 0, array:set(RelSeq, {Pub, del, no_ack}, SegEntries)}; + {Pub, del, no_ack} when KeepAcked -> + {-1, array:set(RelSeq, {Pub, del, ack}, SegEntries)}; + {_Pub, del, no_ack} -> + {-1, array:reset(RelSeq, SegEntries)} + end, + load_segment_entries(KeepAcked, Hdl, SegEntries1, + UnackedCount + UnackedCountDelta); + _ErrOrEoF -> + {SegEntries, UnackedCount} + end. + +array_new() -> + array:new([{default, undefined}, fixed, {size, ?SEGMENT_ENTRY_COUNT}]). + +bool_to_int(true ) -> 1; +bool_to_int(false) -> 0. + +%%---------------------------------------------------------------------------- +%% journal & segment combination +%%---------------------------------------------------------------------------- + +%% Combine what we have just read from a segment file with what we're +%% holding for that segment in memory. There must be no duplicates. +segment_plus_journal(SegEntries, JEntries) -> + array:sparse_foldl( + fun (RelSeq, JObj, {SegEntriesOut, AdditionalUnacked}) -> + SegEntry = array:get(RelSeq, SegEntriesOut), + {Obj, AdditionalUnackedDelta} = + segment_plus_journal1(SegEntry, JObj), + {case Obj of + undefined -> array:reset(RelSeq, SegEntriesOut); + _ -> array:set(RelSeq, Obj, SegEntriesOut) + end, + AdditionalUnacked + AdditionalUnackedDelta} + end, {SegEntries, 0}, JEntries). + +%% Here, the result is a tuple with the first element containing the +%% item which we may be adding to (for items only in the journal), +%% modifying in (bits in both), or, when returning 'undefined', +%% erasing from (ack in journal, not segment) the segment array. The +%% other element of the tuple is the delta for AdditionalUnacked. +segment_plus_journal1(undefined, {?PUB, no_del, no_ack} = Obj) -> + {Obj, 1}; +segment_plus_journal1(undefined, {?PUB, del, no_ack} = Obj) -> + {Obj, 1}; +segment_plus_journal1(undefined, {?PUB, del, ack}) -> + {undefined, 0}; + +segment_plus_journal1({?PUB = Pub, no_del, no_ack}, {no_pub, del, no_ack}) -> + {{Pub, del, no_ack}, 0}; +segment_plus_journal1({?PUB, no_del, no_ack}, {no_pub, del, ack}) -> + {undefined, -1}; +segment_plus_journal1({?PUB, del, no_ack}, {no_pub, no_del, ack}) -> + {undefined, -1}. + +%% Remove from the journal entries for a segment, items that are +%% duplicates of entries found in the segment itself. Used on start up +%% to clean up the journal. +journal_minus_segment(JEntries, SegEntries) -> + array:sparse_foldl( + fun (RelSeq, JObj, {JEntriesOut, UnackedRemoved}) -> + SegEntry = array:get(RelSeq, SegEntries), + {Obj, UnackedRemovedDelta} = + journal_minus_segment1(JObj, SegEntry), + {case Obj of + keep -> JEntriesOut; + undefined -> array:reset(RelSeq, JEntriesOut); + _ -> array:set(RelSeq, Obj, JEntriesOut) + end, + UnackedRemoved + UnackedRemovedDelta} + end, {JEntries, 0}, JEntries). + +%% Here, the result is a tuple with the first element containing the +%% item we are adding to or modifying in the (initially fresh) journal +%% array. If the item is 'undefined' we leave the journal array +%% alone. The other element of the tuple is the deltas for +%% UnackedRemoved. + +%% Both the same. Must be at least the publish +journal_minus_segment1({?PUB, _Del, no_ack} = Obj, Obj) -> + {undefined, 1}; +journal_minus_segment1({?PUB, _Del, ack} = Obj, Obj) -> + {undefined, 0}; + +%% Just publish in journal +journal_minus_segment1({?PUB, no_del, no_ack}, undefined) -> + {keep, 0}; + +%% Publish and deliver in journal +journal_minus_segment1({?PUB, del, no_ack}, undefined) -> + {keep, 0}; +journal_minus_segment1({?PUB = Pub, del, no_ack}, {Pub, no_del, no_ack}) -> + {{no_pub, del, no_ack}, 1}; + +%% Publish, deliver and ack in journal +journal_minus_segment1({?PUB, del, ack}, undefined) -> + {keep, 0}; +journal_minus_segment1({?PUB = Pub, del, ack}, {Pub, no_del, no_ack}) -> + {{no_pub, del, ack}, 1}; +journal_minus_segment1({?PUB = Pub, del, ack}, {Pub, del, no_ack}) -> + {{no_pub, no_del, ack}, 1}; + +%% Just deliver in journal +journal_minus_segment1({no_pub, del, no_ack}, {?PUB, no_del, no_ack}) -> + {keep, 0}; +journal_minus_segment1({no_pub, del, no_ack}, {?PUB, del, no_ack}) -> + {undefined, 0}; + +%% Just ack in journal +journal_minus_segment1({no_pub, no_del, ack}, {?PUB, del, no_ack}) -> + {keep, 0}; +journal_minus_segment1({no_pub, no_del, ack}, {?PUB, del, ack}) -> + {undefined, -1}; + +%% Deliver and ack in journal +journal_minus_segment1({no_pub, del, ack}, {?PUB, no_del, no_ack}) -> + {keep, 0}; +journal_minus_segment1({no_pub, del, ack}, {?PUB, del, no_ack}) -> + {{no_pub, no_del, ack}, 0}; +journal_minus_segment1({no_pub, del, ack}, {?PUB, del, ack}) -> + {undefined, -1}. diff --git a/src/rabbit_reader.erl b/src/rabbit_reader.erl index 5cf519b7..a8b2ae54 100644 --- a/src/rabbit_reader.erl +++ b/src/rabbit_reader.erl @@ -41,7 +41,7 @@ -export([server_properties/0]). --export([analyze_frame/2]). +-export([analyze_frame/3]). -import(gen_tcp). -import(fprof). @@ -52,15 +52,18 @@ -define(NORMAL_TIMEOUT, 3). -define(CLOSING_TIMEOUT, 1). -define(CHANNEL_TERMINATION_TIMEOUT, 3). +-define(SILENT_CLOSE_DELAY, 3). +-define(FRAME_MAX, 131072). %% set to zero once QPid fix their negotiation %--------------------------------------------------------------------------- --record(v1, {sock, connection, callback, recv_ref, connection_state}). +-record(v1, {sock, connection, callback, recv_ref, connection_state, + queue_collector}). -define(INFO_KEYS, [pid, address, port, peer_address, peer_port, - recv_oct, recv_cnt, send_oct, send_cnt, send_pend, - state, channels, user, vhost, timeout, frame_max, client_properties]). + recv_oct, recv_cnt, send_oct, send_cnt, send_pend, state, channels, + protocol, user, vhost, timeout, frame_max, client_properties]). %% connection lifecycle %% @@ -100,6 +103,8 @@ %% heartbeat timeout -> *throw* %% closing: %% socket close -> *terminate* +%% receive connection.close -> send connection.close_ok, +%% *closing* %% receive frame -> ignore, *closing* %% handshake_timeout -> ignore, *closing* %% heartbeat timeout -> *throw* @@ -116,6 +121,8 @@ %% start terminate_connection timer, *closed* %% closed: %% socket close -> *terminate* +%% receive connection.close -> send connection.close_ok, +%% *closed* %% receive connection.close_ok -> self() ! terminate_connection, %% *closed* %% receive frame -> ignore, *closed* @@ -131,11 +138,11 @@ -ifdef(use_specs). --spec(info_keys/0 :: () -> [info_key()]). --spec(info/1 :: (pid()) -> [info()]). --spec(info/2 :: (pid(), [info_key()]) -> [info()]). +-spec(info_keys/0 :: () -> [rabbit_types:info_key()]). +-spec(info/1 :: (pid()) -> [rabbit_types:info()]). +-spec(info/2 :: (pid(), [rabbit_types:info_key()]) -> [rabbit_types:info()]). -spec(shutdown/2 :: (pid(), string()) -> 'ok'). --spec(server_properties/0 :: () -> amqp_table()). +-spec(server_properties/0 :: () -> rabbit_framing:amqp_table()). -endif. @@ -233,6 +240,7 @@ start_connection(Parent, Deb, Sock, SockTransform) -> erlang:send_after(?HANDSHAKE_TIMEOUT * 1000, self(), handshake_timeout), ProfilingValue = setup_profiling(), + {ok, Collector} = rabbit_queue_collector:start_link(), try mainloop(Parent, Deb, switch_callback( #v1{sock = ClientSock, @@ -241,10 +249,12 @@ start_connection(Parent, Deb, Sock, SockTransform) -> timeout_sec = ?HANDSHAKE_TIMEOUT, frame_max = ?FRAME_MIN_SIZE, vhost = none, - client_properties = none}, + client_properties = none, + protocol = none}, callback = uninitialized_callback, recv_ref = none, - connection_state = pre_init}, + connection_state = pre_init, + queue_collector = Collector}, handshake, 8)) catch Ex -> (if Ex == connection_closed_abruptly -> @@ -262,7 +272,9 @@ start_connection(Parent, Deb, Sock, SockTransform) -> %% output to be sent, which results in unnecessary delays. %% %% gen_tcp:close(ClientSock), - teardown_profiling(ProfilingValue) + teardown_profiling(ProfilingValue), + rabbit_queue_collector:shutdown(Collector), + rabbit_misc:unlink_and_capture_exit(Collector) end, done. @@ -425,19 +437,29 @@ wait_for_channel_termination(N, TimerRef) -> exit(channel_termination_timeout) end. -maybe_close(State = #v1{connection_state = closing}) -> +maybe_close(State = #v1{connection_state = closing, + queue_collector = Collector, + connection = #connection{protocol = Protocol}, + sock = Sock}) -> case all_channels() of - [] -> ok = send_on_channel0( - State#v1.sock, #'connection.close_ok'{}), - close_connection(State); + [] -> + %% Spec says "Exclusive queues may only be accessed by the current + %% connection, and are deleted when that connection closes." + %% This does not strictly imply synchrony, but in practice it seems + %% to be what people assume. + rabbit_queue_collector:delete_all(Collector), + ok = send_on_channel0(Sock, #'connection.close_ok'{}, Protocol), + close_connection(State); _ -> State end; maybe_close(State) -> State. -handle_frame(Type, 0, Payload, State = #v1{connection_state = CS}) +handle_frame(Type, 0, Payload, + State = #v1{connection_state = CS, + connection = #connection{protocol = Protocol}}) when CS =:= closing; CS =:= closed -> - case analyze_frame(Type, Payload) of + case analyze_frame(Type, Payload, Protocol) of {method, MethodName, FieldsBin} -> handle_method0(MethodName, FieldsBin, State); _Other -> State @@ -445,20 +467,20 @@ handle_frame(Type, 0, Payload, State = #v1{connection_state = CS}) handle_frame(_Type, _Channel, _Payload, State = #v1{connection_state = CS}) when CS =:= closing; CS =:= closed -> State; -handle_frame(Type, 0, Payload, State) -> - case analyze_frame(Type, Payload) of +handle_frame(Type, 0, Payload, + State = #v1{connection = #connection{protocol = Protocol}}) -> + case analyze_frame(Type, Payload, Protocol) of error -> throw({unknown_frame, 0, Type, Payload}); heartbeat -> State; - trace -> State; {method, MethodName, FieldsBin} -> handle_method0(MethodName, FieldsBin, State); Other -> throw({unexpected_frame_on_channel0, Other}) end; -handle_frame(Type, Channel, Payload, State) -> - case analyze_frame(Type, Payload) of +handle_frame(Type, Channel, Payload, + State = #v1{connection = #connection{protocol = Protocol}}) -> + case analyze_frame(Type, Payload, Protocol) of error -> throw({unknown_frame, Channel, Type, Payload}); heartbeat -> throw({unexpected_heartbeat_frame, Channel}); - trace -> throw({unexpected_trace_frame, Channel}); AnalyzedFrame -> %%?LOGDEBUG("Ch ~p Frame ~p~n", [Channel, AnalyzedFrame]), case get({channel, Channel}) of @@ -473,10 +495,18 @@ handle_frame(Type, Channel, Payload, State) -> closing -> %% According to the spec, after sending a %% channel.close we must ignore all frames except + %% channel.close and channel.close_ok. In the + %% event of a channel.close, we should send back a %% channel.close_ok. case AnalyzedFrame of {method, 'channel.close_ok', _} -> erase({channel, Channel}); + {method, 'channel.close', _} -> + %% We're already closing this channel, so + %% there's no cleanup to do (notify + %% queues, etc.) + ok = rabbit_writer:send_command(State#v1.sock, + #'channel.close_ok'{}); _ -> ok end, State; @@ -491,17 +521,20 @@ handle_frame(Type, Channel, Payload, State) -> end end. -analyze_frame(?FRAME_METHOD, <<ClassId:16, MethodId:16, MethodFields/binary>>) -> - {method, rabbit_framing:lookup_method_name({ClassId, MethodId}), MethodFields}; -analyze_frame(?FRAME_HEADER, <<ClassId:16, Weight:16, BodySize:64, Properties/binary>>) -> +analyze_frame(?FRAME_METHOD, + <<ClassId:16, MethodId:16, MethodFields/binary>>, + Protocol) -> + MethodName = Protocol:lookup_method_name({ClassId, MethodId}), + {method, MethodName, MethodFields}; +analyze_frame(?FRAME_HEADER, + <<ClassId:16, Weight:16, BodySize:64, Properties/binary>>, + _Protocol) -> {content_header, ClassId, Weight, BodySize, Properties}; -analyze_frame(?FRAME_BODY, Body) -> +analyze_frame(?FRAME_BODY, Body, _Protocol) -> {content_body, Body}; -analyze_frame(?FRAME_TRACE, _Body) -> - trace; -analyze_frame(?FRAME_HEARTBEAT, <<>>) -> +analyze_frame(?FRAME_HEARTBEAT, <<>>, _Protocol) -> heartbeat; -analyze_frame(_Type, _Body) -> +analyze_frame(_Type, _Body, _Protocol) -> error. handle_input(frame_header, <<Type:8,Channel:16,PayloadSize:32>>, State) -> @@ -518,54 +551,70 @@ handle_input({frame_payload, Type, Channel, PayloadSize}, PayloadAndMarker, Stat throw({bad_payload, PayloadAndMarker}) end; -handle_input(handshake, <<"AMQP",1,1,ProtocolMajor,ProtocolMinor>>, - State = #v1{sock = Sock, connection = Connection}) -> - case check_version({ProtocolMajor, ProtocolMinor}, - {?PROTOCOL_VERSION_MAJOR, ?PROTOCOL_VERSION_MINOR}) of - true -> - ok = send_on_channel0( - Sock, - #'connection.start'{ - version_major = ?PROTOCOL_VERSION_MAJOR, - version_minor = ?PROTOCOL_VERSION_MINOR, - server_properties = server_properties(), - mechanisms = <<"PLAIN AMQPLAIN">>, - locales = <<"en_US">> }), - {State#v1{connection = Connection#connection{ - timeout_sec = ?NORMAL_TIMEOUT}, - connection_state = starting}, - frame_header, 7}; - false -> - throw({bad_version, ProtocolMajor, ProtocolMinor}) - end; +%% The two rules pertaining to version negotiation: +%% +%% * If the server cannot support the protocol specified in the +%% protocol header, it MUST respond with a valid protocol header and +%% then close the socket connection. +%% +%% * The server MUST provide a protocol version that is lower than or +%% equal to that requested by the client in the protocol header. +handle_input(handshake, <<"AMQP", 0, 0, 9, 1>>, State) -> + start_connection({0, 9, 1}, rabbit_framing_amqp_0_9_1, State); + +%% This is the protocol header for 0-9, which we can safely treat as +%% though it were 0-9-1. +handle_input(handshake, <<"AMQP", 1, 1, 0, 9>>, State) -> + start_connection({0, 9, 0}, rabbit_framing_amqp_0_9_1, State); + +%% This is what most clients send for 0-8. The 0-8 spec, confusingly, +%% defines the version as 8-0. +handle_input(handshake, <<"AMQP", 1, 1, 8, 0>>, State) -> + start_connection({8, 0, 0}, rabbit_framing_amqp_0_8, State); + +%% The 0-8 spec as on the AMQP web site actually has this as the +%% protocol header; some libraries e.g., py-amqplib, send it when they +%% want 0-8. +handle_input(handshake, <<"AMQP", 1, 1, 9, 1>>, State) -> + start_connection({8, 0, 0}, rabbit_framing_amqp_0_8, State); + +handle_input(handshake, <<"AMQP", A, B, C, D>>, #v1{sock = Sock}) -> + refuse_connection(Sock, {bad_version, A, B, C, D}); handle_input(handshake, Other, #v1{sock = Sock}) -> - ok = inet_op(fun () -> rabbit_net:send( - Sock, <<"AMQP",1,1, - ?PROTOCOL_VERSION_MAJOR, - ?PROTOCOL_VERSION_MINOR>>) end), - throw({bad_header, Other}); + refuse_connection(Sock, {bad_header, Other}); handle_input(Callback, Data, _State) -> throw({bad_input, Callback, Data}). -%% the 0-8 spec, confusingly, defines the version as 8-0 -adjust_version({8,0}) -> {0,8}; -adjust_version(Version) -> Version. -check_version(ClientVersion, ServerVersion) -> - {ClientMajor, ClientMinor} = adjust_version(ClientVersion), - {ServerMajor, ServerMinor} = adjust_version(ServerVersion), - ClientMajor > ServerMajor - orelse - (ClientMajor == ServerMajor andalso - ClientMinor >= ServerMinor). +%% Offer a protocol version to the client. Connection.start only +%% includes a major and minor version number, Luckily 0-9 and 0-9-1 +%% are similar enough that clients will be happy with either. +start_connection({ProtocolMajor, ProtocolMinor, _ProtocolRevision}, + Protocol, + State = #v1{sock = Sock, connection = Connection}) -> + Start = #'connection.start'{ version_major = ProtocolMajor, + version_minor = ProtocolMinor, + server_properties = server_properties(), + mechanisms = <<"PLAIN AMQPLAIN">>, + locales = <<"en_US">> }, + ok = send_on_channel0(Sock, Start, Protocol), + {State#v1{connection = Connection#connection{ + timeout_sec = ?NORMAL_TIMEOUT, + protocol = Protocol}, + connection_state = starting}, + frame_header, 7}. + +refuse_connection(Sock, Exception) -> + ok = inet_op(fun () -> rabbit_net:send(Sock, <<"AMQP",0,0,9,1>>) end), + throw(Exception). %%-------------------------------------------------------------------------- -handle_method0(MethodName, FieldsBin, State) -> +handle_method0(MethodName, FieldsBin, + State = #v1{connection = #connection{protocol = Protocol}}) -> try - handle_method0(rabbit_framing:decode_method_fields( - MethodName, FieldsBin), + handle_method0(Protocol:decode_method_fields(MethodName, FieldsBin), State) catch exit:Reason -> CompleteReason = case Reason of @@ -575,7 +624,11 @@ handle_method0(MethodName, FieldsBin, State) -> end, case State#v1.connection_state of running -> send_exception(State, 0, CompleteReason); - Other -> throw({channel0_error, Other, CompleteReason}) + %% We don't trust the client at this point - force + %% them to wait for a bit so they can't DOS us with + %% repeated failed logins etc. + Other -> timer:sleep(?SILENT_CLOSE_DELAY * 1000), + throw({channel0_error, Other, CompleteReason}) end end. @@ -583,66 +636,64 @@ handle_method0(#'connection.start_ok'{mechanism = Mechanism, response = Response, client_properties = ClientProperties}, State = #v1{connection_state = starting, - connection = Connection, + connection = Connection = + #connection{protocol = Protocol}, sock = Sock}) -> User = rabbit_access_control:check_login(Mechanism, Response), - ok = send_on_channel0( - Sock, - #'connection.tune'{channel_max = 0, - %% set to zero once QPid fix their negotiation - frame_max = 131072, - heartbeat = 0}), + Tune = #'connection.tune'{channel_max = 0, + frame_max = ?FRAME_MAX, + heartbeat = 0}, + ok = send_on_channel0(Sock, Tune, Protocol), State#v1{connection_state = tuning, connection = Connection#connection{ user = User, client_properties = ClientProperties}}; -handle_method0(#'connection.tune_ok'{channel_max = _ChannelMax, - frame_max = FrameMax, +handle_method0(#'connection.tune_ok'{frame_max = FrameMax, heartbeat = ClientHeartbeat}, State = #v1{connection_state = tuning, connection = Connection, sock = Sock}) -> - %% if we have a channel_max limit that the client wishes to - %% exceed, die as per spec. Not currently a problem, so we ignore - %% the client's channel_max parameter. - rabbit_heartbeat:start_heartbeat(Sock, ClientHeartbeat), - State#v1{connection_state = opening, - connection = Connection#connection{ - timeout_sec = ClientHeartbeat, - frame_max = FrameMax}}; -handle_method0(#'connection.open'{virtual_host = VHostPath, - insist = Insist}, + if (FrameMax /= 0) and (FrameMax < ?FRAME_MIN_SIZE) -> + rabbit_misc:protocol_error( + not_allowed, "frame_max=~w < ~w min size", + [FrameMax, ?FRAME_MIN_SIZE]); + (?FRAME_MAX /= 0) and (FrameMax > ?FRAME_MAX) -> + rabbit_misc:protocol_error( + not_allowed, "frame_max=~w > ~w max size", + [FrameMax, ?FRAME_MAX]); + true -> + rabbit_heartbeat:start_heartbeat(Sock, ClientHeartbeat), + State#v1{connection_state = opening, + connection = Connection#connection{ + timeout_sec = ClientHeartbeat, + frame_max = FrameMax}} + end; + +handle_method0(#'connection.open'{virtual_host = VHostPath}, + State = #v1{connection_state = opening, connection = Connection = #connection{ - user = User}, + user = User, + protocol = Protocol}, sock = Sock}) -> ok = rabbit_access_control:check_vhost_access(User, VHostPath), NewConnection = Connection#connection{vhost = VHostPath}, - KnownHosts = format_listeners(rabbit_networking:active_listeners()), - Redirects = compute_redirects(Insist), - if Redirects == [] -> - ok = send_on_channel0( - Sock, - #'connection.open_ok'{known_hosts = KnownHosts}), - State#v1{connection_state = running, - connection = NewConnection}; - true -> - %% FIXME: 'host' is supposed to only contain one - %% address; but which one do we pick? This is - %% really a problem with the spec. - Host = format_listeners(Redirects), - rabbit_log:info("connection ~p redirecting to ~p~n", - [self(), Host]), - ok = send_on_channel0( - Sock, - #'connection.redirect'{host = Host, - known_hosts = KnownHosts}), - close_connection(State#v1{connection = NewConnection}) - end; + ok = send_on_channel0(Sock, #'connection.open_ok'{}, Protocol), + State#v1{connection_state = running, + connection = NewConnection}; handle_method0(#'connection.close'{}, State = #v1{connection_state = running}) -> lists:foreach(fun rabbit_framing_channel:shutdown/1, all_channels()), maybe_close(State#v1{connection_state = closing}); +handle_method0(#'connection.close'{}, + State = #v1{connection_state = CS, + connection = #connection{protocol = Protocol}, + sock = Sock}) + when CS =:= closing; CS =:= closed -> + %% We're already closed or closing, so we don't need to cleanup + %% anything. + ok = send_on_channel0(Sock, #'connection.close_ok'{}, Protocol), + State; handle_method0(#'connection.close_ok'{}, State = #v1{connection_state = closed}) -> self() ! terminate_connection, @@ -654,23 +705,8 @@ handle_method0(_Method, #v1{connection_state = S}) -> rabbit_misc:protocol_error( channel_error, "unexpected method in connection state ~w", [S]). -send_on_channel0(Sock, Method) -> - ok = rabbit_writer:internal_send_command(Sock, 0, Method). - -format_listeners(Listeners) -> - list_to_binary( - rabbit_misc:intersperse( - $,, - [io_lib:format("~s:~w", [Host, Port]) || - #listener{host = Host, port = Port} <- Listeners])). - -compute_redirects(true) -> []; -compute_redirects(false) -> - Node = node(), - LNode = rabbit_load:pick(), - if Node == LNode -> []; - true -> rabbit_networking:node_listeners(LNode) - end. +send_on_channel0(Sock, Method, Protocol) -> + ok = rabbit_writer:internal_send_command(Sock, 0, Method, Protocol). %%-------------------------------------------------------------------------- @@ -704,6 +740,10 @@ i(state, #v1{connection_state = S}) -> S; i(channels, #v1{}) -> length(all_channels()); +i(protocol, #v1{connection = #connection{protocol = none}}) -> + none; +i(protocol, #v1{connection = #connection{protocol = Protocol}}) -> + Protocol:version(); i(user, #v1{connection = #connection{user = #user{username = Username}}}) -> Username; i(user, #v1{connection = #connection{user = none}}) -> @@ -722,15 +762,18 @@ i(Item, #v1{}) -> %%-------------------------------------------------------------------------- -send_to_new_channel(Channel, AnalyzedFrame, State) -> +send_to_new_channel(Channel, AnalyzedFrame, + State = #v1{queue_collector = Collector}) -> #v1{sock = Sock, connection = #connection{ frame_max = FrameMax, user = #user{username = Username}, - vhost = VHost}} = State, - WriterPid = rabbit_writer:start(Sock, Channel, FrameMax), - ChPid = rabbit_framing_channel:start_link( - fun rabbit_channel:start_link/5, - [Channel, self(), WriterPid, Username, VHost]), + vhost = VHost, + protocol = Protocol}} = State, + {ok, WriterPid} = rabbit_writer:start(Sock, Channel, FrameMax, Protocol), + {ok, ChPid} = rabbit_framing_channel:start_link( + fun rabbit_channel:start_link/6, + [Channel, self(), WriterPid, Username, VHost, Collector], + Protocol), put({channel, Channel}, {chpid, ChPid}), put({chpid, ChPid}, {channel, Channel}), ok = rabbit_framing_channel:process(ChPid, AnalyzedFrame). @@ -746,25 +789,27 @@ handle_exception(State = #v1{connection_state = CS}, Channel, Reason) -> log_channel_error(CS, Channel, Reason), send_exception(State, Channel, Reason). -send_exception(State, Channel, Reason) -> - {ShouldClose, CloseChannel, CloseMethod} = map_exception(Channel, Reason), +send_exception(State = #v1{connection = #connection{protocol = Protocol}}, + Channel, Reason) -> + {ShouldClose, CloseChannel, CloseMethod} = + map_exception(Channel, Reason, Protocol), NewState = case ShouldClose of true -> terminate_channels(), close_connection(State); false -> close_channel(Channel, State) end, ok = rabbit_writer:internal_send_command( - NewState#v1.sock, CloseChannel, CloseMethod), + NewState#v1.sock, CloseChannel, CloseMethod, Protocol), NewState. -map_exception(Channel, Reason) -> +map_exception(Channel, Reason, Protocol) -> {SuggestedClose, ReplyCode, ReplyText, FailedMethod} = - lookup_amqp_exception(Reason), + lookup_amqp_exception(Reason, Protocol), ShouldClose = SuggestedClose or (Channel == 0), {ClassId, MethodId} = case FailedMethod of {_, _} -> FailedMethod; - none -> {0, 0}; - _ -> rabbit_framing:method_id(FailedMethod) + none -> {0, 0}; + _ -> Protocol:method_id(FailedMethod) end, {CloseChannel, CloseMethod} = case ShouldClose of @@ -779,22 +824,16 @@ map_exception(Channel, Reason) -> end, {ShouldClose, CloseChannel, CloseMethod}. -%% FIXME: this clause can go when we move to AMQP spec >=8.1 -lookup_amqp_exception(#amqp_error{name = precondition_failed, - explanation = Expl, - method = Method}) -> - ExplBin = amqp_exception_explanation(<<"PRECONDITION_FAILED">>, Expl), - {false, 406, ExplBin, Method}; lookup_amqp_exception(#amqp_error{name = Name, explanation = Expl, - method = Method}) -> - {ShouldClose, Code, Text} = rabbit_framing:lookup_amqp_exception(Name), + method = Method}, + Protocol) -> + {ShouldClose, Code, Text} = Protocol:lookup_amqp_exception(Name), ExplBin = amqp_exception_explanation(Text, Expl), {ShouldClose, Code, ExplBin, Method}; -lookup_amqp_exception(Other) -> +lookup_amqp_exception(Other, Protocol) -> rabbit_log:warning("Non-AMQP exit reason '~p'~n", [Other]), - {ShouldClose, Code, Text} = - rabbit_framing:lookup_amqp_exception(internal_error), + {ShouldClose, Code, Text} = Protocol:lookup_amqp_exception(internal_error), {ShouldClose, Code, Text, none}. amqp_exception_explanation(Text, Expl) -> diff --git a/src/rabbit_router.erl b/src/rabbit_router.erl index 03979d6c..d50b9f31 100644 --- a/src/rabbit_router.erl +++ b/src/rabbit_router.erl @@ -41,7 +41,13 @@ -ifdef(use_specs). --spec(deliver/2 :: ([pid()], delivery()) -> {routing_result(), [pid()]}). +-export_type([routing_key/0, routing_result/0]). + +-type(routing_key() :: binary()). +-type(routing_result() :: 'routed' | 'unroutable' | 'not_delivered'). + +-spec(deliver/2 :: + ([pid()], rabbit_types:delivery()) -> {routing_result(), [pid()]}). -endif. @@ -57,14 +63,17 @@ deliver(QPids, Delivery = #delivery{mandatory = false, %% is preserved. This scales much better than the non-immediate %% case below. delegate:invoke_no_result( - QPids, fun(Pid) -> rabbit_amqqueue:deliver(Pid, Delivery) end), + QPids, fun (Pid) -> rabbit_amqqueue:deliver(Pid, Delivery) end), {routed, QPids}; deliver(QPids, Delivery) -> {Success, _} = delegate:invoke(QPids, - fun(Pid) -> rabbit_amqqueue:deliver(Pid, Delivery) end), - {Routed, Handled} = lists:foldl(fun fold_deliveries/2, {false, []}, Success), + fun (Pid) -> + rabbit_amqqueue:deliver(Pid, Delivery) + end), + {Routed, Handled} = + lists:foldl(fun fold_deliveries/2, {false, []}, Success), check_delivery(Delivery#delivery.mandatory, Delivery#delivery.immediate, {Routed, Handled}). @@ -87,13 +96,13 @@ match_routing_key(Name, RoutingKey) -> lookup_qpids(mnesia:dirty_select(rabbit_route, [{MatchHead, [], ['$1']}])). lookup_qpids(Queues) -> - sets:fold( - fun(Key, Acc) -> + lists:foldl( + fun (Key, Acc) -> case mnesia:dirty_read({rabbit_queue, Key}) of [#amqqueue{pid = QPid}] -> [QPid | Acc]; [] -> Acc end - end, [], sets:from_list(Queues)). + end, [], lists:usort(Queues)). %%-------------------------------------------------------------------- diff --git a/src/rabbit_sasl_report_file_h.erl b/src/rabbit_sasl_report_file_h.erl index 434cdae0..eb2037c2 100644 --- a/src/rabbit_sasl_report_file_h.erl +++ b/src/rabbit_sasl_report_file_h.erl @@ -33,7 +33,8 @@ -behaviour(gen_event). --export([init/1, handle_event/2, handle_call/2, handle_info/2, terminate/2, code_change/3]). +-export([init/1, handle_event/2, handle_call/2, handle_info/2, terminate/2, + code_change/3]). %% rabbit_sasl_report_file_h is a wrapper around the sasl_report_file_h %% module because the original's init/1 does not match properly diff --git a/src/rabbit_sup.erl b/src/rabbit_sup.erl index 2c5e5112..97613d17 100644 --- a/src/rabbit_sup.erl +++ b/src/rabbit_sup.erl @@ -34,7 +34,7 @@ -behaviour(supervisor). -export([start_link/0, start_child/1, start_child/2, start_child/3, - start_restartable_child/1, start_restartable_child/2]). + start_restartable_child/1, start_restartable_child/2, stop_child/1]). -export([init/1]). @@ -69,5 +69,11 @@ start_restartable_child(Mod, Args) -> transient, infinity, supervisor, [rabbit_restartable_sup]}), ok. +stop_child(ChildId) -> + case supervisor:terminate_child(?SERVER, ChildId) of + ok -> supervisor:delete_child(?SERVER, ChildId); + E -> E + end. + init([]) -> {ok, {{one_for_all, 0, 1}, []}}. diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl index 76ebd982..56aca1d6 100644 --- a/src/rabbit_tests.erl +++ b/src/rabbit_tests.erl @@ -41,8 +41,12 @@ -import(lists). -include("rabbit.hrl"). +-include("rabbit_framing.hrl"). -include_lib("kernel/include/file.hrl"). +-define(PERSISTENT_MSG_STORE, msg_store_persistent). +-define(TRANSIENT_MSG_STORE, msg_store_transient). + test_content_prop_roundtrip(Datum, Binary) -> Types = [element(1, E) || E <- Datum], Values = [element(2, E) || E <- Datum], @@ -50,14 +54,20 @@ test_content_prop_roundtrip(Datum, Binary) -> Binary = rabbit_binary_generator:encode_properties(Types, Values). %% assertion all_tests() -> + application:set_env(rabbit, file_handles_high_watermark, 10, infinity), + passed = test_backing_queue(), passed = test_priority_queue(), + passed = test_bpqueue(), passed = test_pg_local(), passed = test_unfold(), + passed = test_supervisor_delayed_restart(), passed = test_parsing(), + passed = test_content_framing(), passed = test_topic_matching(), passed = test_log_management(), passed = test_app_management(), passed = test_log_management_during_startup(), + passed = test_memory_pressure(), passed = test_cluster_management(), passed = test_user_management(), passed = test_server_status(), @@ -204,6 +214,143 @@ test_priority_queue(Q) -> priority_queue:to_list(Q), priority_queue_out_all(Q)}. +test_bpqueue() -> + Q = bpqueue:new(), + true = bpqueue:is_empty(Q), + 0 = bpqueue:len(Q), + [] = bpqueue:to_list(Q), + + Q1 = bpqueue_test(fun bpqueue:in/3, fun bpqueue:out/1, + fun bpqueue:to_list/1, + fun bpqueue:foldl/3, fun bpqueue:map_fold_filter_l/4), + Q2 = bpqueue_test(fun bpqueue:in_r/3, fun bpqueue:out_r/1, + fun (QR) -> lists:reverse( + [{P, lists:reverse(L)} || + {P, L} <- bpqueue:to_list(QR)]) + end, + fun bpqueue:foldr/3, fun bpqueue:map_fold_filter_r/4), + + [{foo, [1, 2]}, {bar, [3]}] = bpqueue:to_list(bpqueue:join(Q, Q1)), + [{bar, [3]}, {foo, [2, 1]}] = bpqueue:to_list(bpqueue:join(Q2, Q)), + [{foo, [1, 2]}, {bar, [3, 3]}, {foo, [2,1]}] = + bpqueue:to_list(bpqueue:join(Q1, Q2)), + + [{foo, [1, 2]}, {bar, [3]}, {foo, [1, 2]}, {bar, [3]}] = + bpqueue:to_list(bpqueue:join(Q1, Q1)), + + [{foo, [1, 2]}, {bar, [3]}] = + bpqueue:to_list( + bpqueue:from_list( + [{x, []}, {foo, [1]}, {y, []}, {foo, [2]}, {bar, [3]}, {z, []}])), + + [{undefined, [a]}] = bpqueue:to_list(bpqueue:from_list([{undefined, [a]}])), + + {4, [a,b,c,d]} = + bpqueue:foldl( + fun (Prefix, Value, {Prefix, Acc}) -> + {Prefix + 1, [Value | Acc]} + end, + {0, []}, bpqueue:from_list([{0,[d]}, {1,[c]}, {2,[b]}, {3,[a]}])), + + [{bar,3}, {foo,2}, {foo,1}] = + bpqueue:foldr(fun (P, V, I) -> [{P,V} | I] end, [], Q2), + + BPQL = [{foo,[1,2,2]}, {bar,[3,4,5]}, {foo,[5,6,7]}], + BPQ = bpqueue:from_list(BPQL), + + %% no effect + {BPQL, 0} = bpqueue_mffl([none], {none, []}, BPQ), + {BPQL, 0} = bpqueue_mffl([foo,bar], {none, [1]}, BPQ), + {BPQL, 0} = bpqueue_mffl([bar], {none, [3]}, BPQ), + {BPQL, 0} = bpqueue_mffr([bar], {foo, [5]}, BPQ), + + %% process 1 item + {[{foo,[-1,2,2]}, {bar,[3,4,5]}, {foo,[5,6,7]}], 1} = + bpqueue_mffl([foo,bar], {foo, [2]}, BPQ), + {[{foo,[1,2,2]}, {bar,[-3,4,5]}, {foo,[5,6,7]}], 1} = + bpqueue_mffl([bar], {bar, [4]}, BPQ), + {[{foo,[1,2,2]}, {bar,[3,4,5]}, {foo,[5,6,-7]}], 1} = + bpqueue_mffr([foo,bar], {foo, [6]}, BPQ), + {[{foo,[1,2,2]}, {bar,[3,4]}, {baz,[-5]}, {foo,[5,6,7]}], 1} = + bpqueue_mffr([bar], {baz, [4]}, BPQ), + + %% change prefix + {[{bar,[-1,-2,-2,-3,-4,-5,-5,-6,-7]}], 9} = + bpqueue_mffl([foo,bar], {bar, []}, BPQ), + {[{bar,[-1,-2,-2,3,4,5]}, {foo,[5,6,7]}], 3} = + bpqueue_mffl([foo], {bar, [5]}, BPQ), + {[{bar,[-1,-2,-2,3,4,5,-5,-6]}, {foo,[7]}], 5} = + bpqueue_mffl([foo], {bar, [7]}, BPQ), + {[{foo,[1,2,2,-3,-4]}, {bar,[5]}, {foo,[5,6,7]}], 2} = + bpqueue_mffl([bar], {foo, [5]}, BPQ), + {[{bar,[-1,-2,-2,3,4,5,-5,-6,-7]}], 6} = + bpqueue_mffl([foo], {bar, []}, BPQ), + {[{foo,[1,2,2,-3,-4,-5,5,6,7]}], 3} = + bpqueue_mffl([bar], {foo, []}, BPQ), + + %% edge cases + {[{foo,[-1,-2,-2]}, {bar,[3,4,5]}, {foo,[5,6,7]}], 3} = + bpqueue_mffl([foo], {foo, [5]}, BPQ), + {[{foo,[1,2,2]}, {bar,[3,4,5]}, {foo,[-5,-6,-7]}], 3} = + bpqueue_mffr([foo], {foo, [2]}, BPQ), + + passed. + +bpqueue_test(In, Out, List, Fold, MapFoldFilter) -> + Q = bpqueue:new(), + {empty, _Q} = Out(Q), + + ok = Fold(fun (Prefix, Value, ok) -> {error, Prefix, Value} end, ok, Q), + {Q1M, 0} = MapFoldFilter(fun(_P) -> throw(explosion) end, + fun(_V, _N) -> throw(explosion) end, 0, Q), + [] = bpqueue:to_list(Q1M), + + Q1 = In(bar, 3, In(foo, 2, In(foo, 1, Q))), + false = bpqueue:is_empty(Q1), + 3 = bpqueue:len(Q1), + [{foo, [1, 2]}, {bar, [3]}] = List(Q1), + + {{value, foo, 1}, Q3} = Out(Q1), + {{value, foo, 2}, Q4} = Out(Q3), + {{value, bar, 3}, _Q5} = Out(Q4), + + F = fun (QN) -> + MapFoldFilter(fun (foo) -> true; + (_) -> false + end, + fun (2, _Num) -> stop; + (V, Num) -> {bar, -V, V - Num} end, + 0, QN) + end, + {Q6, 0} = F(Q), + [] = bpqueue:to_list(Q6), + {Q7, 1} = F(Q1), + [{bar, [-1]}, {foo, [2]}, {bar, [3]}] = List(Q7), + + Q1. + +bpqueue_mffl(FF1A, FF2A, BPQ) -> + bpqueue_mff(fun bpqueue:map_fold_filter_l/4, FF1A, FF2A, BPQ). + +bpqueue_mffr(FF1A, FF2A, BPQ) -> + bpqueue_mff(fun bpqueue:map_fold_filter_r/4, FF1A, FF2A, BPQ). + +bpqueue_mff(Fold, FF1A, FF2A, BPQ) -> + FF1 = fun (Prefixes) -> + fun (P) -> lists:member(P, Prefixes) end + end, + FF2 = fun ({Prefix, Stoppers}) -> + fun (Val, Num) -> + case lists:member(Val, Stoppers) of + true -> stop; + false -> {Prefix, -Val, 1 + Num} + end + end + end, + Queue_to_list = fun ({LHS, RHS}) -> {bpqueue:to_list(LHS), RHS} end, + + Queue_to_list(Fold(FF1(FF1A), FF2(FF2A), 0, BPQ)). + test_simple_n_element_queue(N) -> Items = lists:seq(1, N), Q = priority_queue_in_all(priority_queue:new(), Items), @@ -351,6 +498,43 @@ test_field_values() -> >>), passed. +%% Test that content frames don't exceed frame-max +test_content_framing(FrameMax, BodyBin) -> + [Header | Frames] = + rabbit_binary_generator:build_simple_content_frames( + 1, + rabbit_binary_generator:ensure_content_encoded( + rabbit_basic:build_content(#'P_basic'{}, BodyBin), + rabbit_framing_amqp_0_9_1), + FrameMax, + rabbit_framing_amqp_0_9_1), + %% header is formatted correctly and the size is the total of the + %% fragments + <<_FrameHeader:7/binary, _ClassAndWeight:4/binary, + BodySize:64/unsigned, _Rest/binary>> = list_to_binary(Header), + BodySize = size(BodyBin), + true = lists:all( + fun (ContentFrame) -> + FrameBinary = list_to_binary(ContentFrame), + %% assert + <<_TypeAndChannel:3/binary, + Size:32/unsigned, _Payload:Size/binary, 16#CE>> = + FrameBinary, + size(FrameBinary) =< FrameMax + end, Frames), + passed. + +test_content_framing() -> + %% no content + passed = test_content_framing(4096, <<>>), + %% easily fit in one frame + passed = test_content_framing(4096, <<"Easy">>), + %% exactly one frame (empty frame = 8 bytes) + passed = test_content_framing(11, <<"One">>), + %% more than one frame + passed = test_content_framing(11, <<"More than one frame">>), + passed. + test_topic_match(P, R) -> test_topic_match(P, R, true). @@ -559,19 +743,19 @@ test_cluster_management() -> ok = control_action(reset, []), lists:foreach(fun (Arg) -> - ok = control_action(cluster, Arg), + ok = control_action(force_cluster, Arg), ok end, ClusteringSequence), lists:foreach(fun (Arg) -> ok = control_action(reset, []), - ok = control_action(cluster, Arg), + ok = control_action(force_cluster, Arg), ok end, ClusteringSequence), ok = control_action(reset, []), lists:foreach(fun (Arg) -> - ok = control_action(cluster, Arg), + ok = control_action(force_cluster, Arg), ok = control_action(start_app, []), ok = control_action(stop_app, []), ok @@ -579,7 +763,7 @@ test_cluster_management() -> ClusteringSequence), lists:foreach(fun (Arg) -> ok = control_action(reset, []), - ok = control_action(cluster, Arg), + ok = control_action(force_cluster, Arg), ok = control_action(start_app, []), ok = control_action(stop_app, []), ok @@ -590,13 +774,13 @@ test_cluster_management() -> ok = control_action(reset, []), ok = control_action(start_app, []), ok = control_action(stop_app, []), - ok = control_action(cluster, ["invalid1@invalid", - "invalid2@invalid"]), + ok = control_action(force_cluster, ["invalid1@invalid", + "invalid2@invalid"]), %% join a non-existing cluster as a ram node ok = control_action(reset, []), - ok = control_action(cluster, ["invalid1@invalid", - "invalid2@invalid"]), + ok = control_action(force_cluster, ["invalid1@invalid", + "invalid2@invalid"]), SecondaryNode = rabbit_misc:makenode("hare"), case net_adm:ping(SecondaryNode) of @@ -621,18 +805,26 @@ test_cluster_management2(SecondaryNode) -> %% join cluster as a ram node ok = control_action(reset, []), - ok = control_action(cluster, [SecondaryNodeS, "invalid1@invalid"]), + ok = control_action(force_cluster, [SecondaryNodeS, "invalid1@invalid"]), ok = control_action(start_app, []), ok = control_action(stop_app, []), %% change cluster config while remaining in same cluster - ok = control_action(cluster, ["invalid2@invalid", SecondaryNodeS]), + ok = control_action(force_cluster, ["invalid2@invalid", SecondaryNodeS]), ok = control_action(start_app, []), ok = control_action(stop_app, []), %% join non-existing cluster as a ram node - ok = control_action(cluster, ["invalid1@invalid", - "invalid2@invalid"]), + ok = control_action(force_cluster, ["invalid1@invalid", + "invalid2@invalid"]), + ok = control_action(start_app, []), + ok = control_action(stop_app, []), + + %% join empty cluster as a ram node + ok = control_action(cluster, []), + ok = control_action(start_app, []), + ok = control_action(stop_app, []), + %% turn ram node into disk node ok = control_action(reset, []), ok = control_action(cluster, [SecondaryNodeS, NodeS]), @@ -640,8 +832,8 @@ test_cluster_management2(SecondaryNode) -> ok = control_action(stop_app, []), %% convert a disk node into a ram node - ok = control_action(cluster, ["invalid1@invalid", - "invalid2@invalid"]), + ok = control_action(force_cluster, ["invalid1@invalid", + "invalid2@invalid"]), %% turn a disk node into a ram node ok = control_action(reset, []), @@ -746,17 +938,17 @@ test_user_management() -> passed. test_server_status() -> - %% create a few things so there is some useful information to list Writer = spawn(fun () -> receive shutdown -> ok end end), - Ch = rabbit_channel:start_link(1, self(), Writer, <<"user">>, <<"/">>), - [Q, Q2] = [#amqqueue{} = rabbit_amqqueue:declare( + {ok, Ch} = rabbit_channel:start_link(1, self(), Writer, + <<"user">>, <<"/">>, self()), + [Q, Q2] = [Queue || Name <- [<<"foo">>, <<"bar">>], + {new, Queue = #amqqueue{}} <- + [rabbit_amqqueue:declare( rabbit_misc:r(<<"/">>, queue, Name), - false, false, []) || - Name <- [<<"foo">>, <<"bar">>]], + false, false, [], none)]], - ok = rabbit_amqqueue:claim_queue(Q, self()), - ok = rabbit_amqqueue:basic_consume(Q, true, self(), Ch, undefined, + ok = rabbit_amqqueue:basic_consume(Q, true, Ch, undefined, <<"ctag">>, true, undefined), %% list queues @@ -823,7 +1015,7 @@ test_hooks() -> {[arg1, arg2], 1, 3} = get(arg_hook_test_fired), %% Invoking Pids - Remote = fun() -> + Remote = fun () -> receive {rabbitmq_hook,[remote_test,test,[],Target]} -> Target ! invoked @@ -840,11 +1032,143 @@ test_hooks() -> end, passed. +test_memory_pressure_receiver(Pid) -> + receive + shutdown -> + ok; + {send_command, Method} -> + ok = case Method of + #'channel.flow'{} -> ok; + #'basic.qos_ok'{} -> ok; + #'channel.open_ok'{} -> ok + end, + Pid ! Method, + test_memory_pressure_receiver(Pid); + sync -> + Pid ! sync, + test_memory_pressure_receiver(Pid) + end. + +test_memory_pressure_receive_flow(Active) -> + receive #'channel.flow'{active = Active} -> ok + after 1000 -> throw(failed_to_receive_channel_flow) + end, + receive #'channel.flow'{} -> + throw(pipelining_sync_commands_detected) + after 0 -> + ok + end. + +test_memory_pressure_sync(Ch, Writer) -> + ok = rabbit_channel:do(Ch, #'basic.qos'{}), + Writer ! sync, + receive sync -> ok after 1000 -> throw(failed_to_receive_writer_sync) end, + receive #'basic.qos_ok'{} -> ok + after 1000 -> throw(failed_to_receive_basic_qos_ok) + end. + +test_memory_pressure_spawn() -> + Me = self(), + Writer = spawn(fun () -> test_memory_pressure_receiver(Me) end), + {ok, Ch} = rabbit_channel:start_link(1, self(), Writer, + <<"user">>, <<"/">>, self()), + ok = rabbit_channel:do(Ch, #'channel.open'{}), + MRef = erlang:monitor(process, Ch), + receive #'channel.open_ok'{} -> ok + after 1000 -> throw(failed_to_receive_channel_open_ok) + end, + {Writer, Ch, MRef}. + +expect_normal_channel_termination(MRef, Ch) -> + receive {'DOWN', MRef, process, Ch, normal} -> ok + after 1000 -> throw(channel_failed_to_exit) + end. + +gobble_channel_exit() -> + receive {channel_exit, _, _} -> ok + after 1000 -> throw(channel_exit_not_received) + end. + +test_memory_pressure() -> + {Writer0, Ch0, MRef0} = test_memory_pressure_spawn(), + [ok = rabbit_channel:conserve_memory(Ch0, Conserve) || + Conserve <- [false, false, true, false, true, true, false]], + ok = test_memory_pressure_sync(Ch0, Writer0), + receive {'DOWN', MRef0, process, Ch0, Info0} -> + throw({channel_died_early, Info0}) + after 0 -> ok + end, + + %% we should have just 1 active=false waiting for us + ok = test_memory_pressure_receive_flow(false), + + %% if we reply with flow_ok, we should immediately get an + %% active=true back + ok = rabbit_channel:do(Ch0, #'channel.flow_ok'{active = false}), + ok = test_memory_pressure_receive_flow(true), + + %% if we publish at this point, the channel should die + Content = rabbit_basic:build_content(#'P_basic'{}, <<>>), + ok = rabbit_channel:do(Ch0, #'basic.publish'{}, Content), + expect_normal_channel_termination(MRef0, Ch0), + gobble_channel_exit(), + + {Writer1, Ch1, MRef1} = test_memory_pressure_spawn(), + ok = rabbit_channel:conserve_memory(Ch1, true), + ok = test_memory_pressure_receive_flow(false), + ok = rabbit_channel:do(Ch1, #'channel.flow_ok'{active = false}), + ok = test_memory_pressure_sync(Ch1, Writer1), + ok = rabbit_channel:conserve_memory(Ch1, false), + ok = test_memory_pressure_receive_flow(true), + %% send back the wrong flow_ok. Channel should die. + ok = rabbit_channel:do(Ch1, #'channel.flow_ok'{active = false}), + expect_normal_channel_termination(MRef1, Ch1), + gobble_channel_exit(), + + {_Writer2, Ch2, MRef2} = test_memory_pressure_spawn(), + %% just out of the blue, send a flow_ok. Life should end. + ok = rabbit_channel:do(Ch2, #'channel.flow_ok'{active = true}), + expect_normal_channel_termination(MRef2, Ch2), + gobble_channel_exit(), + + {_Writer3, Ch3, MRef3} = test_memory_pressure_spawn(), + ok = rabbit_channel:conserve_memory(Ch3, true), + ok = test_memory_pressure_receive_flow(false), + receive {'DOWN', MRef3, process, Ch3, _} -> + ok + after 12000 -> + throw(channel_failed_to_exit) + end, + gobble_channel_exit(), + + alarm_handler:set_alarm({vm_memory_high_watermark, []}), + Me = self(), + Writer4 = spawn(fun () -> test_memory_pressure_receiver(Me) end), + {ok, Ch4} = rabbit_channel:start_link(1, self(), Writer4, + <<"user">>, <<"/">>, self()), + ok = rabbit_channel:do(Ch4, #'channel.open'{}), + MRef4 = erlang:monitor(process, Ch4), + Writer4 ! sync, + receive sync -> ok after 1000 -> throw(failed_to_receive_writer_sync) end, + receive #'channel.open_ok'{} -> throw(unexpected_channel_open_ok) + after 0 -> ok + end, + alarm_handler:clear_alarm(vm_memory_high_watermark), + Writer4 ! sync, + receive sync -> ok after 1000 -> throw(failed_to_receive_writer_sync) end, + receive #'channel.open_ok'{} -> ok + after 1000 -> throw(failed_to_receive_channel_open_ok) + end, + rabbit_channel:shutdown(Ch4), + expect_normal_channel_termination(MRef4, Ch4), + + passed. + test_delegates_async(SecondaryNode) -> Self = self(), - Sender = fun(Pid) -> Pid ! {invoked, Self} end, + Sender = fun (Pid) -> Pid ! {invoked, Self} end, - Responder = make_responder(fun({invoked, Pid}) -> Pid ! response end), + Responder = make_responder(fun ({invoked, Pid}) -> Pid ! response end), ok = delegate:invoke_no_result(spawn(Responder), Sender), ok = delegate:invoke_no_result(spawn(SecondaryNode, Responder), Sender), @@ -857,10 +1181,11 @@ test_delegates_async(SecondaryNode) -> passed. -make_responder(FMsg) -> - fun() -> +make_responder(FMsg) -> make_responder(FMsg, timeout). +make_responder(FMsg, Throw) -> + fun () -> receive Msg -> FMsg(Msg) - after 1000 -> throw(timeout) + after 1000 -> throw(Throw) end end. @@ -887,24 +1212,28 @@ must_exit(Fun) -> end. test_delegates_sync(SecondaryNode) -> - Sender = fun(Pid) -> gen_server:call(Pid, invoked) end, - BadSender = fun(_Pid) -> exit(exception) end, + Sender = fun (Pid) -> gen_server:call(Pid, invoked) end, + BadSender = fun (_Pid) -> exit(exception) end, - Responder = make_responder(fun({'$gen_call', From, invoked}) -> + Responder = make_responder(fun ({'$gen_call', From, invoked}) -> gen_server:reply(From, response) end), + BadResponder = make_responder(fun ({'$gen_call', From, invoked}) -> + gen_server:reply(From, response) + end, bad_responder_died), + response = delegate:invoke(spawn(Responder), Sender), response = delegate:invoke(spawn(SecondaryNode, Responder), Sender), - must_exit(fun() -> delegate:invoke(spawn(Responder), BadSender) end), - must_exit(fun() -> - delegate:invoke(spawn(SecondaryNode, Responder), BadSender) end), + must_exit(fun () -> delegate:invoke(spawn(BadResponder), BadSender) end), + must_exit(fun () -> + delegate:invoke(spawn(SecondaryNode, BadResponder), BadSender) end), LocalGoodPids = spawn_responders(node(), Responder, 2), RemoteGoodPids = spawn_responders(SecondaryNode, Responder, 2), - LocalBadPids = spawn_responders(node(), Responder, 2), - RemoteBadPids = spawn_responders(SecondaryNode, Responder, 2), + LocalBadPids = spawn_responders(node(), BadResponder, 2), + RemoteBadPids = spawn_responders(SecondaryNode, BadResponder, 2), {GoodRes, []} = delegate:invoke(LocalGoodPids ++ RemoteGoodPids, Sender), true = lists:all(fun ({_, response}) -> true end, GoodRes), @@ -1010,6 +1339,586 @@ handle_hook(HookName, Handler, Args) -> A = atom_to_list(HookName) ++ "_" ++ atom_to_list(Handler) ++ "_fired", put(list_to_atom(A), Args). bad_handle_hook(_, _, _) -> - bad:bad(). + exit(bad_handle_hook_called). extra_arg_hook(Hookname, Handler, Args, Extra1, Extra2) -> handle_hook(Hookname, Handler, {Args, Extra1, Extra2}). + +test_supervisor_delayed_restart() -> + test_sup:test_supervisor_delayed_restart(). + +test_backing_queue() -> + case application:get_env(rabbit, backing_queue_module) of + {ok, rabbit_variable_queue} -> + {ok, FileSizeLimit} = + application:get_env(rabbit, msg_store_file_size_limit), + application:set_env(rabbit, msg_store_file_size_limit, 512, + infinity), + {ok, MaxJournal} = + application:get_env(rabbit, queue_index_max_journal_entries), + application:set_env(rabbit, queue_index_max_journal_entries, 128, + infinity), + passed = test_msg_store(), + application:set_env(rabbit, msg_store_file_size_limit, + FileSizeLimit, infinity), + passed = test_queue_index(), + passed = test_variable_queue(), + passed = test_queue_recover(), + application:set_env(rabbit, queue_index_max_journal_entries, + MaxJournal, infinity), + passed; + _ -> + passed + end. + +restart_msg_store_empty() -> + ok = rabbit_variable_queue:stop_msg_store(), + ok = rabbit_variable_queue:start_msg_store( + undefined, {fun (ok) -> finished end, ok}). + +guid_bin(X) -> + erlang:md5(term_to_binary(X)). + +msg_store_contains(Atom, Guids) -> + Atom = lists:foldl( + fun (Guid, Atom1) when Atom1 =:= Atom -> + rabbit_msg_store:contains(?PERSISTENT_MSG_STORE, Guid) end, + Atom, Guids). + +msg_store_sync(Guids) -> + Ref = make_ref(), + Self = self(), + ok = rabbit_msg_store:sync(?PERSISTENT_MSG_STORE, Guids, + fun () -> Self ! {sync, Ref} end), + receive + {sync, Ref} -> ok + after + 10000 -> + io:format("Sync from msg_store missing for guids ~p~n", [Guids]), + throw(timeout) + end. + +msg_store_read(Guids, MSCState) -> + lists:foldl(fun (Guid, MSCStateM) -> + {{ok, Guid}, MSCStateN} = rabbit_msg_store:read( + ?PERSISTENT_MSG_STORE, + Guid, MSCStateM), + MSCStateN + end, MSCState, Guids). + +msg_store_write(Guids, MSCState) -> + lists:foldl(fun (Guid, {ok, MSCStateN}) -> + rabbit_msg_store:write(?PERSISTENT_MSG_STORE, + Guid, Guid, MSCStateN) + end, {ok, MSCState}, Guids). + +msg_store_remove(Guids) -> + rabbit_msg_store:remove(?PERSISTENT_MSG_STORE, Guids). + +foreach_with_msg_store_client(MsgStore, Ref, Fun, L) -> + rabbit_msg_store:client_terminate( + lists:foldl(fun (Guid, MSCState) -> Fun(Guid, MsgStore, MSCState) end, + rabbit_msg_store:client_init(MsgStore, Ref), L)). + +test_msg_store() -> + restart_msg_store_empty(), + Self = self(), + Guids = [guid_bin(M) || M <- lists:seq(1,100)], + {Guids1stHalf, Guids2ndHalf} = lists:split(50, Guids), + %% check we don't contain any of the msgs we're about to publish + false = msg_store_contains(false, Guids), + Ref = rabbit_guid:guid(), + MSCState = rabbit_msg_store:client_init(?PERSISTENT_MSG_STORE, Ref), + %% publish the first half + {ok, MSCState1} = msg_store_write(Guids1stHalf, MSCState), + %% sync on the first half + ok = msg_store_sync(Guids1stHalf), + %% publish the second half + {ok, MSCState2} = msg_store_write(Guids2ndHalf, MSCState1), + %% sync on the first half again - the msg_store will be dirty, but + %% we won't need the fsync + ok = msg_store_sync(Guids1stHalf), + %% check they're all in there + true = msg_store_contains(true, Guids), + %% publish the latter half twice so we hit the caching and ref count code + {ok, MSCState3} = msg_store_write(Guids2ndHalf, MSCState2), + %% check they're still all in there + true = msg_store_contains(true, Guids), + %% sync on the 2nd half, but do lots of individual syncs to try + %% and cause coalescing to happen + ok = lists:foldl( + fun (Guid, ok) -> rabbit_msg_store:sync( + ?PERSISTENT_MSG_STORE, + [Guid], fun () -> Self ! {sync, Guid} end) + end, ok, Guids2ndHalf), + lists:foldl( + fun(Guid, ok) -> + receive + {sync, Guid} -> ok + after + 10000 -> + io:format("Sync from msg_store missing (guid: ~p)~n", + [Guid]), + throw(timeout) + end + end, ok, Guids2ndHalf), + %% it's very likely we're not dirty here, so the 1st half sync + %% should hit a different code path + ok = msg_store_sync(Guids1stHalf), + %% read them all + MSCState4 = msg_store_read(Guids, MSCState3), + %% read them all again - this will hit the cache, not disk + MSCState5 = msg_store_read(Guids, MSCState4), + %% remove them all + ok = rabbit_msg_store:remove(?PERSISTENT_MSG_STORE, Guids), + %% check first half doesn't exist + false = msg_store_contains(false, Guids1stHalf), + %% check second half does exist + true = msg_store_contains(true, Guids2ndHalf), + %% read the second half again + MSCState6 = msg_store_read(Guids2ndHalf, MSCState5), + %% release the second half, just for fun (aka code coverage) + ok = rabbit_msg_store:release(?PERSISTENT_MSG_STORE, Guids2ndHalf), + %% read the second half again, just for fun (aka code coverage) + MSCState7 = msg_store_read(Guids2ndHalf, MSCState6), + ok = rabbit_msg_store:client_terminate(MSCState7), + %% stop and restart, preserving every other msg in 2nd half + ok = rabbit_variable_queue:stop_msg_store(), + ok = rabbit_variable_queue:start_msg_store( + [], {fun ([]) -> finished; + ([Guid|GuidsTail]) + when length(GuidsTail) rem 2 == 0 -> + {Guid, 1, GuidsTail}; + ([Guid|GuidsTail]) -> + {Guid, 0, GuidsTail} + end, Guids2ndHalf}), + %% check we have the right msgs left + lists:foldl( + fun (Guid, Bool) -> + not(Bool = rabbit_msg_store:contains(?PERSISTENT_MSG_STORE, Guid)) + end, false, Guids2ndHalf), + %% restart empty + restart_msg_store_empty(), + %% check we don't contain any of the msgs + false = msg_store_contains(false, Guids), + %% publish the first half again + MSCState8 = rabbit_msg_store:client_init(?PERSISTENT_MSG_STORE, Ref), + {ok, MSCState9} = msg_store_write(Guids1stHalf, MSCState8), + %% this should force some sort of sync internally otherwise misread + ok = rabbit_msg_store:client_terminate( + msg_store_read(Guids1stHalf, MSCState9)), + ok = rabbit_msg_store:remove(?PERSISTENT_MSG_STORE, Guids1stHalf), + %% restart empty + restart_msg_store_empty(), %% now safe to reuse guids + %% push a lot of msgs in... at least 100 files worth + {ok, FileSize} = application:get_env(rabbit, msg_store_file_size_limit), + PayloadSizeBits = 65536, + BigCount = trunc(100 * FileSize / (PayloadSizeBits div 8)), + GuidsBig = [guid_bin(X) || X <- lists:seq(1, BigCount)], + Payload = << 0:PayloadSizeBits >>, + ok = foreach_with_msg_store_client( + ?PERSISTENT_MSG_STORE, Ref, + fun (Guid, MsgStore, MSCStateM) -> + {ok, MSCStateN} = rabbit_msg_store:write( + MsgStore, Guid, Payload, MSCStateM), + MSCStateN + end, GuidsBig), + %% now read them to ensure we hit the fast client-side reading + ok = foreach_with_msg_store_client( + ?PERSISTENT_MSG_STORE, Ref, + fun (Guid, MsgStore, MSCStateM) -> + {{ok, Payload}, MSCStateN} = rabbit_msg_store:read( + MsgStore, Guid, MSCStateM), + MSCStateN + end, GuidsBig), + %% .., then 3s by 1... + ok = msg_store_remove([guid_bin(X) || X <- lists:seq(BigCount, 1, -3)]), + %% .., then remove 3s by 2, from the young end first. This hits + %% GC (under 50% good data left, but no empty files. Must GC). + ok = msg_store_remove([guid_bin(X) || X <- lists:seq(BigCount-1, 1, -3)]), + %% .., then remove 3s by 3, from the young end first. This hits + %% GC... + ok = msg_store_remove([guid_bin(X) || X <- lists:seq(BigCount-2, 1, -3)]), + %% ensure empty + false = msg_store_contains(false, GuidsBig), + %% restart empty + restart_msg_store_empty(), + passed. + +queue_name(Name) -> + rabbit_misc:r(<<"/">>, queue, Name). + +test_queue() -> + queue_name(<<"test">>). + +init_test_queue() -> + rabbit_queue_index:init( + test_queue(), true, false, + fun (Guid) -> + rabbit_msg_store:contains(?PERSISTENT_MSG_STORE, Guid) + end). + +restart_test_queue(Qi) -> + _ = rabbit_queue_index:terminate([], Qi), + ok = rabbit_variable_queue:stop(), + ok = rabbit_variable_queue:start([test_queue()]), + init_test_queue(). + +empty_test_queue() -> + ok = rabbit_variable_queue:stop(), + ok = rabbit_variable_queue:start([]), + {0, _Terms, Qi} = init_test_queue(), + _ = rabbit_queue_index:delete_and_terminate(Qi), + ok. + +with_empty_test_queue(Fun) -> + ok = empty_test_queue(), + {0, _Terms, Qi} = init_test_queue(), + rabbit_queue_index:delete_and_terminate(Fun(Qi)). + +queue_index_publish(SeqIds, Persistent, Qi) -> + Ref = rabbit_guid:guid(), + MsgStore = case Persistent of + true -> ?PERSISTENT_MSG_STORE; + false -> ?TRANSIENT_MSG_STORE + end, + {A, B, MSCStateEnd} = + lists:foldl( + fun (SeqId, {QiN, SeqIdsGuidsAcc, MSCStateN}) -> + Guid = rabbit_guid:guid(), + QiM = rabbit_queue_index:publish( + Guid, SeqId, Persistent, QiN), + {ok, MSCStateM} = rabbit_msg_store:write(MsgStore, Guid, + Guid, MSCStateN), + {QiM, [{SeqId, Guid} | SeqIdsGuidsAcc], MSCStateM} + end, {Qi, [], rabbit_msg_store:client_init(MsgStore, Ref)}, SeqIds), + ok = rabbit_msg_store:client_delete_and_terminate( + MSCStateEnd, MsgStore, Ref), + {A, B}. + +verify_read_with_published(_Delivered, _Persistent, [], _) -> + ok; +verify_read_with_published(Delivered, Persistent, + [{Guid, SeqId, Persistent, Delivered}|Read], + [{SeqId, Guid}|Published]) -> + verify_read_with_published(Delivered, Persistent, Read, Published); +verify_read_with_published(_Delivered, _Persistent, _Read, _Published) -> + ko. + +test_queue_index() -> + SegmentSize = rabbit_queue_index:next_segment_boundary(0), + TwoSegs = SegmentSize + SegmentSize, + MostOfASegment = trunc(SegmentSize*0.75), + SeqIdsA = lists:seq(0, MostOfASegment-1), + SeqIdsB = lists:seq(MostOfASegment, 2*MostOfASegment), + SeqIdsC = lists:seq(0, trunc(SegmentSize/2)), + SeqIdsD = lists:seq(0, SegmentSize*4), + + with_empty_test_queue( + fun (Qi0) -> + {0, 0, Qi1} = rabbit_queue_index:bounds(Qi0), + {Qi2, SeqIdsGuidsA} = queue_index_publish(SeqIdsA, false, Qi1), + {0, SegmentSize, Qi3} = rabbit_queue_index:bounds(Qi2), + {ReadA, Qi4} = rabbit_queue_index:read(0, SegmentSize, Qi3), + ok = verify_read_with_published(false, false, ReadA, + lists:reverse(SeqIdsGuidsA)), + %% should get length back as 0, as all the msgs were transient + {0, _Terms1, Qi6} = restart_test_queue(Qi4), + {0, 0, Qi7} = rabbit_queue_index:bounds(Qi6), + {Qi8, SeqIdsGuidsB} = queue_index_publish(SeqIdsB, true, Qi7), + {0, TwoSegs, Qi9} = rabbit_queue_index:bounds(Qi8), + {ReadB, Qi10} = rabbit_queue_index:read(0, SegmentSize, Qi9), + ok = verify_read_with_published(false, true, ReadB, + lists:reverse(SeqIdsGuidsB)), + %% should get length back as MostOfASegment + LenB = length(SeqIdsB), + {LenB, _Terms2, Qi12} = restart_test_queue(Qi10), + {0, TwoSegs, Qi13} = rabbit_queue_index:bounds(Qi12), + Qi14 = rabbit_queue_index:deliver(SeqIdsB, Qi13), + {ReadC, Qi15} = rabbit_queue_index:read(0, SegmentSize, Qi14), + ok = verify_read_with_published(true, true, ReadC, + lists:reverse(SeqIdsGuidsB)), + Qi16 = rabbit_queue_index:ack(SeqIdsB, Qi15), + Qi17 = rabbit_queue_index:flush(Qi16), + %% Everything will have gone now because #pubs == #acks + {0, 0, Qi18} = rabbit_queue_index:bounds(Qi17), + %% should get length back as 0 because all persistent + %% msgs have been acked + {0, _Terms3, Qi19} = restart_test_queue(Qi18), + Qi19 + end), + + %% These next bits are just to hit the auto deletion of segment files. + %% First, partials: + %% a) partial pub+del+ack, then move to new segment + with_empty_test_queue( + fun (Qi0) -> + {Qi1, _SeqIdsGuidsC} = queue_index_publish(SeqIdsC, + false, Qi0), + Qi2 = rabbit_queue_index:deliver(SeqIdsC, Qi1), + Qi3 = rabbit_queue_index:ack(SeqIdsC, Qi2), + Qi4 = rabbit_queue_index:flush(Qi3), + {Qi5, _SeqIdsGuidsC1} = queue_index_publish([SegmentSize], + false, Qi4), + Qi5 + end), + + %% b) partial pub+del, then move to new segment, then ack all in old segment + with_empty_test_queue( + fun (Qi0) -> + {Qi1, _SeqIdsGuidsC2} = queue_index_publish(SeqIdsC, + false, Qi0), + Qi2 = rabbit_queue_index:deliver(SeqIdsC, Qi1), + {Qi3, _SeqIdsGuidsC3} = queue_index_publish([SegmentSize], + false, Qi2), + Qi4 = rabbit_queue_index:ack(SeqIdsC, Qi3), + rabbit_queue_index:flush(Qi4) + end), + + %% c) just fill up several segments of all pubs, then +dels, then +acks + with_empty_test_queue( + fun (Qi0) -> + {Qi1, _SeqIdsGuidsD} = queue_index_publish(SeqIdsD, + false, Qi0), + Qi2 = rabbit_queue_index:deliver(SeqIdsD, Qi1), + Qi3 = rabbit_queue_index:ack(SeqIdsD, Qi2), + rabbit_queue_index:flush(Qi3) + end), + + %% d) get messages in all states to a segment, then flush, then do + %% the same again, don't flush and read. This will hit all + %% possibilities in combining the segment with the journal. + with_empty_test_queue( + fun (Qi0) -> + {Qi1, [Seven,Five,Four|_]} = queue_index_publish([0,1,2,4,5,7], + false, Qi0), + Qi2 = rabbit_queue_index:deliver([0,1,4], Qi1), + Qi3 = rabbit_queue_index:ack([0], Qi2), + Qi4 = rabbit_queue_index:flush(Qi3), + {Qi5, [Eight,Six|_]} = queue_index_publish([3,6,8], false, Qi4), + Qi6 = rabbit_queue_index:deliver([2,3,5,6], Qi5), + Qi7 = rabbit_queue_index:ack([1,2,3], Qi6), + {[], Qi8} = rabbit_queue_index:read(0, 4, Qi7), + {ReadD, Qi9} = rabbit_queue_index:read(4, 7, Qi8), + ok = verify_read_with_published(true, false, ReadD, + [Four, Five, Six]), + {ReadE, Qi10} = rabbit_queue_index:read(7, 9, Qi9), + ok = verify_read_with_published(false, false, ReadE, + [Seven, Eight]), + Qi10 + end), + + %% e) as for (d), but use terminate instead of read, which will + %% exercise journal_minus_segment, not segment_plus_journal. + with_empty_test_queue( + fun (Qi0) -> + {Qi1, _SeqIdsGuidsE} = queue_index_publish([0,1,2,4,5,7], + true, Qi0), + Qi2 = rabbit_queue_index:deliver([0,1,4], Qi1), + Qi3 = rabbit_queue_index:ack([0], Qi2), + {5, _Terms9, Qi4} = restart_test_queue(Qi3), + {Qi5, _SeqIdsGuidsF} = queue_index_publish([3,6,8], true, Qi4), + Qi6 = rabbit_queue_index:deliver([2,3,5,6], Qi5), + Qi7 = rabbit_queue_index:ack([1,2,3], Qi6), + {5, _Terms10, Qi8} = restart_test_queue(Qi7), + Qi8 + end), + + ok = rabbit_variable_queue:stop(), + ok = rabbit_variable_queue:start([]), + + passed. + +variable_queue_publish(IsPersistent, Count, VQ) -> + lists:foldl( + fun (_N, VQN) -> + rabbit_variable_queue:publish( + rabbit_basic:message( + rabbit_misc:r(<<>>, exchange, <<>>), + <<>>, #'P_basic'{delivery_mode = case IsPersistent of + true -> 2; + false -> 1 + end}, <<>>), VQN) + end, VQ, lists:seq(1, Count)). + +variable_queue_fetch(Count, IsPersistent, IsDelivered, Len, VQ) -> + lists:foldl(fun (N, {VQN, AckTagsAcc}) -> + Rem = Len - N, + {{#basic_message { is_persistent = IsPersistent }, + IsDelivered, AckTagN, Rem}, VQM} = + rabbit_variable_queue:fetch(true, VQN), + {VQM, [AckTagN | AckTagsAcc]} + end, {VQ, []}, lists:seq(1, Count)). + +assert_prop(List, Prop, Value) -> + Value = proplists:get_value(Prop, List). + +assert_props(List, PropVals) -> + [assert_prop(List, Prop, Value) || {Prop, Value} <- PropVals]. + +with_fresh_variable_queue(Fun) -> + ok = empty_test_queue(), + VQ = rabbit_variable_queue:init(test_queue(), true, false), + S0 = rabbit_variable_queue:status(VQ), + assert_props(S0, [{q1, 0}, {q2, 0}, + {delta, {delta, undefined, 0, undefined}}, + {q3, 0}, {q4, 0}, + {len, 0}]), + _ = rabbit_variable_queue:delete_and_terminate(Fun(VQ)), + passed. + +test_variable_queue() -> + [passed = with_fresh_variable_queue(F) || + F <- [fun test_variable_queue_dynamic_duration_change/1, + fun test_variable_queue_partial_segments_delta_thing/1, + fun test_variable_queue_all_the_bits_not_covered_elsewhere1/1, + fun test_variable_queue_all_the_bits_not_covered_elsewhere2/1]], + passed. + +test_variable_queue_dynamic_duration_change(VQ0) -> + SegmentSize = rabbit_queue_index:next_segment_boundary(0), + + %% start by sending in a couple of segments worth + Len = 2*SegmentSize, + VQ1 = variable_queue_publish(false, Len, VQ0), + + %% squeeze and relax queue + Churn = Len div 32, + VQ2 = publish_fetch_and_ack(Churn, Len, VQ1), + {Duration, VQ3} = rabbit_variable_queue:ram_duration(VQ2), + VQ7 = lists:foldl( + fun (Duration1, VQ4) -> + {_Duration, VQ5} = rabbit_variable_queue:ram_duration(VQ4), + io:format("~p:~n~p~n", + [Duration1, rabbit_variable_queue:status(VQ5)]), + VQ6 = rabbit_variable_queue:set_ram_duration_target( + Duration1, VQ5), + publish_fetch_and_ack(Churn, Len, VQ6) + end, VQ3, [Duration / 4, 0, Duration / 4, infinity]), + + %% drain + {VQ8, AckTags} = variable_queue_fetch(Len, false, false, Len, VQ7), + VQ9 = rabbit_variable_queue:ack(AckTags, VQ8), + {empty, VQ10} = rabbit_variable_queue:fetch(true, VQ9), + + VQ10. + +publish_fetch_and_ack(0, _Len, VQ0) -> + VQ0; +publish_fetch_and_ack(N, Len, VQ0) -> + VQ1 = variable_queue_publish(false, 1, VQ0), + {{_Msg, false, AckTag, Len}, VQ2} = rabbit_variable_queue:fetch(true, VQ1), + publish_fetch_and_ack(N-1, Len, rabbit_variable_queue:ack([AckTag], VQ2)). + +test_variable_queue_partial_segments_delta_thing(VQ0) -> + SegmentSize = rabbit_queue_index:next_segment_boundary(0), + HalfSegment = SegmentSize div 2, + OneAndAHalfSegment = SegmentSize + HalfSegment, + VQ1 = variable_queue_publish(true, OneAndAHalfSegment, VQ0), + {_Duration, VQ2} = rabbit_variable_queue:ram_duration(VQ1), + VQ3 = check_variable_queue_status( + rabbit_variable_queue:set_ram_duration_target(0, VQ2), + %% one segment in q3 as betas, and half a segment in delta + [{delta, {delta, SegmentSize, HalfSegment, OneAndAHalfSegment}}, + {q3, SegmentSize}, + {len, SegmentSize + HalfSegment}]), + VQ4 = rabbit_variable_queue:set_ram_duration_target(infinity, VQ3), + VQ5 = check_variable_queue_status( + variable_queue_publish(true, 1, VQ4), + %% one alpha, but it's in the same segment as the deltas + [{q1, 1}, + {delta, {delta, SegmentSize, HalfSegment, OneAndAHalfSegment}}, + {q3, SegmentSize}, + {len, SegmentSize + HalfSegment + 1}]), + {VQ6, AckTags} = variable_queue_fetch(SegmentSize, true, false, + SegmentSize + HalfSegment + 1, VQ5), + VQ7 = check_variable_queue_status( + VQ6, + %% the half segment should now be in q3 as betas + [{q1, 1}, + {delta, {delta, undefined, 0, undefined}}, + {q3, HalfSegment}, + {len, HalfSegment + 1}]), + {VQ8, AckTags1} = variable_queue_fetch(HalfSegment + 1, true, false, + HalfSegment + 1, VQ7), + VQ9 = rabbit_variable_queue:ack(AckTags ++ AckTags1, VQ8), + %% should be empty now + {empty, VQ10} = rabbit_variable_queue:fetch(true, VQ9), + VQ10. + +check_variable_queue_status(VQ0, Props) -> + VQ1 = variable_queue_wait_for_shuffling_end(VQ0), + S = rabbit_variable_queue:status(VQ1), + io:format("~p~n", [S]), + assert_props(S, Props), + VQ1. + +variable_queue_wait_for_shuffling_end(VQ) -> + case rabbit_variable_queue:needs_idle_timeout(VQ) of + true -> variable_queue_wait_for_shuffling_end( + rabbit_variable_queue:idle_timeout(VQ)); + false -> VQ + end. + +test_variable_queue_all_the_bits_not_covered_elsewhere1(VQ0) -> + Count = 2 * rabbit_queue_index:next_segment_boundary(0), + VQ1 = variable_queue_publish(true, Count, VQ0), + VQ2 = variable_queue_publish(false, Count, VQ1), + VQ3 = rabbit_variable_queue:set_ram_duration_target(0, VQ2), + {VQ4, _AckTags} = variable_queue_fetch(Count, true, false, + Count + Count, VQ3), + {VQ5, _AckTags1} = variable_queue_fetch(Count, false, false, + Count, VQ4), + _VQ6 = rabbit_variable_queue:terminate(VQ5), + VQ7 = rabbit_variable_queue:init(test_queue(), true, true), + {{_Msg1, true, _AckTag1, Count1}, VQ8} = + rabbit_variable_queue:fetch(true, VQ7), + VQ9 = variable_queue_publish(false, 1, VQ8), + VQ10 = rabbit_variable_queue:set_ram_duration_target(0, VQ9), + {VQ11, _AckTags2} = variable_queue_fetch(Count1, true, true, Count, VQ10), + {VQ12, _AckTags3} = variable_queue_fetch(1, false, false, 1, VQ11), + VQ12. + +test_variable_queue_all_the_bits_not_covered_elsewhere2(VQ0) -> + VQ1 = rabbit_variable_queue:set_ram_duration_target(0, VQ0), + VQ2 = variable_queue_publish(false, 4, VQ1), + {VQ3, AckTags} = variable_queue_fetch(2, false, false, 4, VQ2), + VQ4 = rabbit_variable_queue:requeue(AckTags, VQ3), + VQ5 = rabbit_variable_queue:idle_timeout(VQ4), + _VQ6 = rabbit_variable_queue:terminate(VQ5), + VQ7 = rabbit_variable_queue:init(test_queue(), true, true), + {empty, VQ8} = rabbit_variable_queue:fetch(false, VQ7), + VQ8. + +test_queue_recover() -> + Count = 2 * rabbit_queue_index:next_segment_boundary(0), + TxID = rabbit_guid:guid(), + {new, #amqqueue { pid = QPid, name = QName }} = + rabbit_amqqueue:declare(test_queue(), true, false, [], none), + Msg = rabbit_basic:message(rabbit_misc:r(<<>>, exchange, <<>>), + <<>>, #'P_basic'{delivery_mode = 2}, <<>>), + Delivery = #delivery{mandatory = false, immediate = false, txn = TxID, + sender = self(), message = Msg}, + [true = rabbit_amqqueue:deliver(QPid, Delivery) || + _ <- lists:seq(1, Count)], + rabbit_amqqueue:commit_all([QPid], TxID, self()), + exit(QPid, kill), + MRef = erlang:monitor(process, QPid), + receive {'DOWN', MRef, process, QPid, _Info} -> ok + after 10000 -> exit(timeout_waiting_for_queue_death) + end, + rabbit_amqqueue:stop(), + ok = rabbit_amqqueue:start(), + rabbit_amqqueue:with_or_die( + QName, + fun (Q1 = #amqqueue { pid = QPid1 }) -> + CountMinusOne = Count - 1, + {ok, CountMinusOne, {QName, QPid1, _AckTag, true, _Msg}} = + rabbit_amqqueue:basic_get(Q1, self(), false), + exit(QPid1, shutdown), + VQ1 = rabbit_variable_queue:init(QName, true, true), + {{_Msg1, true, _AckTag1, CountMinusOne}, VQ2} = + rabbit_variable_queue:fetch(true, VQ1), + _VQ3 = rabbit_variable_queue:delete_and_terminate(VQ2), + rabbit_amqqueue:internal_delete(QName) + end), + passed. diff --git a/src/rabbit_types.erl b/src/rabbit_types.erl new file mode 100644 index 00000000..3aaf1917 --- /dev/null +++ b/src/rabbit_types.erl @@ -0,0 +1,147 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developers of the Original Code are LShift Ltd, +%% Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd, +%% Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd +%% are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial +%% Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift +%% Ltd. Portions created by Cohesive Financial Technologies LLC are +%% Copyright (C) 2007-2010 Cohesive Financial Technologies +%% LLC. Portions created by Rabbit Technologies Ltd are Copyright +%% (C) 2007-2010 Rabbit Technologies Ltd. +%% +%% All Rights Reserved. +%% +%% Contributor(s): ______________________________________. +%% + +-module(rabbit_types). + +-include("rabbit.hrl"). + +-ifdef(use_specs). + +-export_type([txn/0, maybe/1, info/0, info_key/0, message/0, basic_message/0, + delivery/0, content/0, decoded_content/0, undecoded_content/0, + unencoded_content/0, encoded_content/0, vhost/0, ctag/0, + amqp_error/0, r/1, r2/2, r3/3, ssl_socket/0, listener/0, + binding/0, amqqueue/0, exchange/0, connection/0, protocol/0, + user/0, error/1, ok_or_error/1, ok_or_error2/2, ok/1]). + +-type(maybe(T) :: T | 'none'). +-type(vhost() :: binary()). +-type(ctag() :: binary()). + +%% TODO: make this more precise by tying specific class_ids to +%% specific properties +-type(undecoded_content() :: + #content{class_id :: rabbit_framing:amqp_class_id(), + properties :: 'none', + properties_bin :: binary(), + payload_fragments_rev :: [binary()]} | + #content{class_id :: rabbit_framing:amqp_class_id(), + properties :: rabbit_framing:amqp_property_record(), + properties_bin :: 'none', + payload_fragments_rev :: [binary()]}). +-type(unencoded_content() :: undecoded_content()). +-type(decoded_content() :: + #content{class_id :: rabbit_framing:amqp_class_id(), + properties :: rabbit_framing:amqp_property_record(), + properties_bin :: maybe(binary()), + payload_fragments_rev :: [binary()]}). +-type(encoded_content() :: + #content{class_id :: rabbit_framing:amqp_class_id(), + properties :: maybe(rabbit_framing:amqp_property_record()), + properties_bin :: binary(), + payload_fragments_rev :: [binary()]}). +-type(content() :: undecoded_content() | decoded_content()). +-type(basic_message() :: + #basic_message{exchange_name :: rabbit_exchange:name(), + routing_key :: rabbit_router:routing_key(), + content :: content(), + guid :: rabbit_guid:guid(), + is_persistent :: boolean()}). +-type(message() :: basic_message()). +-type(delivery() :: + #delivery{mandatory :: boolean(), + immediate :: boolean(), + txn :: maybe(txn()), + sender :: pid(), + message :: message()}). + +%% this is really an abstract type, but dialyzer does not support them +-type(txn() :: rabbit_guid:guid()). + +-type(info_key() :: atom()). +-type(info() :: {info_key(), any()}). + +-type(amqp_error() :: + #amqp_error{name :: rabbit_framing:amqp_exception(), + explanation :: string(), + method :: rabbit_framing:amqp_method_name()}). + +-type(r(Kind) :: + r2(vhost(), Kind)). +-type(r2(VirtualHost, Kind) :: + r3(VirtualHost, Kind, rabbit_misc:resource_name())). +-type(r3(VirtualHost, Kind, Name) :: + #resource{virtual_host :: VirtualHost, + kind :: Kind, + name :: Name}). + +-type(ssl_socket() :: #ssl_socket{}). + +-type(listener() :: + #listener{node :: node(), + protocol :: atom(), + host :: rabbit_networking:hostname(), + port :: rabbit_networking:ip_port()}). + +-type(binding() :: + #binding{exchange_name :: rabbit_exchange:name(), + queue_name :: rabbit_amqqueue:name(), + key :: rabbit_exchange:binding_key()}). + +-type(amqqueue() :: + #amqqueue{name :: rabbit_amqqueue:name(), + durable :: boolean(), + auto_delete :: boolean(), + exclusive_owner :: rabbit_types:maybe(pid()), + arguments :: rabbit_framing:amqp_table(), + pid :: rabbit_types:maybe(pid())}). + +-type(exchange() :: + #exchange{name :: rabbit_exchange:name(), + type :: rabbit_exchange:type(), + durable :: boolean(), + auto_delete :: boolean(), + arguments :: rabbit_framing:amqp_table()}). + +-type(connection() :: pid()). + +-type(protocol() :: atom()). + +-type(user() :: + #user{username :: rabbit_access_control:username(), + password :: rabbit_access_control:password()}). + +-type(ok(A) :: {'ok', A}). +-type(error(A) :: {'error', A}). +-type(ok_or_error(A) :: 'ok' | error(A)). +-type(ok_or_error2(A, B) :: ok(A) | error(B)). + +-endif. % use_specs diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl new file mode 100644 index 00000000..0f52eee8 --- /dev/null +++ b/src/rabbit_variable_queue.erl @@ -0,0 +1,1433 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developers of the Original Code are LShift Ltd, +%% Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd, +%% Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd +%% are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial +%% Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift +%% Ltd. Portions created by Cohesive Financial Technologies LLC are +%% Copyright (C) 2007-2010 Cohesive Financial Technologies +%% LLC. Portions created by Rabbit Technologies Ltd are Copyright +%% (C) 2007-2010 Rabbit Technologies Ltd. +%% +%% All Rights Reserved. +%% +%% Contributor(s): ______________________________________. +%% + +-module(rabbit_variable_queue). + +-export([init/3, terminate/1, delete_and_terminate/1, + purge/1, publish/2, publish_delivered/3, fetch/2, ack/2, + tx_publish/3, tx_ack/3, tx_rollback/2, tx_commit/3, + requeue/2, len/1, is_empty/1, + set_ram_duration_target/2, ram_duration/1, + needs_idle_timeout/1, idle_timeout/1, handle_pre_hibernate/1, + status/1]). + +-export([start/1, stop/0]). + +%% exported for testing only +-export([start_msg_store/2, stop_msg_store/0]). + +%%---------------------------------------------------------------------------- +%% Definitions: + +%% alpha: this is a message where both the message itself, and its +%% position within the queue are held in RAM +%% +%% beta: this is a message where the message itself is only held on +%% disk, but its position within the queue is held in RAM. +%% +%% gamma: this is a message where the message itself is only held on +%% disk, but its position is both in RAM and on disk. +%% +%% delta: this is a collection of messages, represented by a single +%% term, where the messages and their position are only held on +%% disk. +%% +%% Note that for persistent messages, the message and its position +%% within the queue are always held on disk, *in addition* to being in +%% one of the above classifications. +%% +%% Also note that within this code, the term gamma never +%% appears. Instead, gammas are defined by betas who have had their +%% queue position recorded on disk. +%% +%% In general, messages move q1 -> q2 -> delta -> q3 -> q4, though +%% many of these steps are frequently skipped. q1 and q4 only hold +%% alphas, q2 and q3 hold both betas and gammas (as queues of queues, +%% using the bpqueue module where the block prefix determines whether +%% they're betas or gammas). When a message arrives, its +%% classification is determined. It is then added to the rightmost +%% appropriate queue. +%% +%% If a new message is determined to be a beta or gamma, q1 is +%% empty. If a new message is determined to be a delta, q1 and q2 are +%% empty (and actually q4 too). +%% +%% When removing messages from a queue, if q4 is empty then q3 is read +%% directly. If q3 becomes empty then the next segment's worth of +%% messages from delta are read into q3, reducing the size of +%% delta. If the queue is non empty, either q4 or q3 contain +%% entries. It is never permitted for delta to hold all the messages +%% in the queue. +%% +%% The duration indicated to us by the memory_monitor is used to +%% calculate, given our current ingress and egress rates, how many +%% messages we should hold in RAM. When we need to push alphas to +%% betas or betas to gammas, we favour writing out messages that are +%% further from the head of the queue. This minimises writes to disk, +%% as the messages closer to the tail of the queue stay in the queue +%% for longer, thus do not need to be replaced as quickly by sending +%% other messages to disk. +%% +%% Whilst messages are pushed to disk and forgotten from RAM as soon +%% as requested by a new setting of the queue RAM duration, the +%% inverse is not true: we only load messages back into RAM as +%% demanded as the queue is read from. Thus only publishes to the +%% queue will take up available spare capacity. +%% +%% When we report our duration to the memory monitor, we calculate +%% average ingress and egress rates over the last two samples, and +%% then calculate our duration based on the sum of the ingress and +%% egress rates. More than two samples could be used, but it's a +%% balance between responding quickly enough to changes in +%% producers/consumers versus ignoring temporary blips. The problem +%% with temporary blips is that with just a few queues, they can have +%% substantial impact on the calculation of the average duration and +%% hence cause unnecessary I/O. Another alternative is to increase the +%% amqqueue_process:RAM_DURATION_UPDATE_PERIOD to beyond 5 +%% seconds. However, that then runs the risk of being too slow to +%% inform the memory monitor of changes. Thus a 5 second interval, +%% plus a rolling average over the last two samples seems to work +%% well in practice. +%% +%% The sum of the ingress and egress rates is used because the egress +%% rate alone is not sufficient. Adding in the ingress rate means that +%% queues which are being flooded by messages are given more memory, +%% resulting in them being able to process the messages faster (by +%% doing less I/O, or at least deferring it) and thus helping keep +%% their mailboxes empty and thus the queue as a whole is more +%% responsive. If such a queue also has fast but previously idle +%% consumers, the consumer can then start to be driven as fast as it +%% can go, whereas if only egress rate was being used, the incoming +%% messages may have to be written to disk and then read back in, +%% resulting in the hard disk being a bottleneck in driving the +%% consumers. Generally, we want to give Rabbit every chance of +%% getting rid of messages as fast as possible and remaining +%% responsive, and using only the egress rate impacts that goal. +%% +%% If a queue is full of transient messages, then the transition from +%% betas to deltas will be potentially very expensive as millions of +%% entries must be written to disk by the queue_index module. This can +%% badly stall the queue. In order to avoid this, the proportion of +%% gammas / (betas+gammas) must not be lower than (betas+gammas) / +%% (alphas+betas+gammas). As the queue grows or available memory +%% shrinks, the latter ratio increases, requiring the conversion of +%% more gammas to betas in order to maintain the invariant. At the +%% point at which betas and gammas must be converted to deltas, there +%% should be very few betas remaining, thus the transition is fast (no +%% work needs to be done for the gamma -> delta transition). +%% +%% The conversion of betas to gammas is done in batches of exactly +%% ?IO_BATCH_SIZE. This value should not be too small, otherwise the +%% frequent operations on the queues of q2 and q3 will not be +%% effectively amortised (switching the direction of queue access +%% defeats amortisation), nor should it be too big, otherwise +%% converting a batch stalls the queue for too long. Therefore, it +%% must be just right. ram_index_count is used here and is the number +%% of betas. +%% +%% The conversion from alphas to betas is also chunked, but only to +%% ensure no more than ?IO_BATCH_SIZE alphas are converted to betas at +%% any one time. This further smooths the effects of changes to the +%% target_ram_msg_count and ensures the queue remains responsive +%% even when there is a large amount of IO work to do. The +%% idle_timeout callback is utilised to ensure that conversions are +%% done as promptly as possible whilst ensuring the queue remains +%% responsive. +%% +%% In the queue we keep track of both messages that are pending +%% delivery and messages that are pending acks. This ensures that +%% purging (deleting the former) and deletion (deleting the former and +%% the latter) are both cheap and do require any scanning through qi +%% segments. +%% +%% Notes on Clean Shutdown +%% (This documents behaviour in variable_queue, queue_index and +%% msg_store.) +%% +%% In order to try to achieve as fast a start-up as possible, if a +%% clean shutdown occurs, we try to save out state to disk to reduce +%% work on startup. In the msg_store this takes the form of the +%% index_module's state, plus the file_summary ets table, and client +%% refs. In the VQ, this takes the form of the count of persistent +%% messages in the queue and references into the msg_stores. The +%% queue_index adds to these terms the details of its segments and +%% stores the terms in the queue directory. +%% +%% Two message stores are used. One is created for persistent messages +%% to durable queues that must survive restarts, and the other is used +%% for all other messages that just happen to need to be written to +%% disk. On start up we can therefore nuke the transient message +%% store, and be sure that the messages in the persistent store are +%% all that we need. +%% +%% The references to the msg_stores are there so that the msg_store +%% knows to only trust its saved state if all of the queues it was +%% previously talking to come up cleanly. Likewise, the queues +%% themselves (esp queue_index) skips work in init if all the queues +%% and msg_store were shutdown cleanly. This gives both good speed +%% improvements and also robustness so that if anything possibly went +%% wrong in shutdown (or there was subsequent manual tampering), all +%% messages and queues that can be recovered are recovered, safely. +%% +%% To delete transient messages lazily, the variable_queue, on +%% startup, stores the next_seq_id reported by the queue_index as the +%% transient_threshold. From that point on, whenever it's reading a +%% message off disk via the queue_index, if the seq_id is below this +%% threshold and the message is transient then it drops the message +%% (the message itself won't exist on disk because it would have been +%% stored in the transient msg_store which would have had its saved +%% state nuked on startup). This avoids the expensive operation of +%% scanning the entire queue on startup in order to delete transient +%% messages that were only pushed to disk to save memory. +%% +%%---------------------------------------------------------------------------- + +-behaviour(rabbit_backing_queue). + +-record(vqstate, + { q1, + q2, + delta, + q3, + q4, + next_seq_id, + pending_ack, + index_state, + msg_store_clients, + on_sync, + durable, + transient_threshold, + + len, + persistent_count, + + duration_target, + target_ram_msg_count, + ram_msg_count, + ram_msg_count_prev, + ram_index_count, + out_counter, + in_counter, + rates + }). + +-record(rates, { egress, ingress, avg_egress, avg_ingress, timestamp }). + +-record(msg_status, + { seq_id, + guid, + msg, + is_persistent, + is_delivered, + msg_on_disk, + index_on_disk + }). + +-record(delta, + { start_seq_id, %% start_seq_id is inclusive + count, + end_seq_id %% end_seq_id is exclusive + }). + +-record(tx, { pending_messages, pending_acks }). + +-record(sync, { acks_persistent, acks_all, pubs, funs }). + +%% When we discover, on publish, that we should write some indices to +%% disk for some betas, the RAM_INDEX_BATCH_SIZE sets the number of +%% betas that we must be due to write indices for before we do any +%% work at all. This is both a minimum and a maximum - we don't write +%% fewer than RAM_INDEX_BATCH_SIZE indices out in one go, and we don't +%% write more - we can always come back on the next publish to do +%% more. +-define(IO_BATCH_SIZE, 64). +-define(PERSISTENT_MSG_STORE, msg_store_persistent). +-define(TRANSIENT_MSG_STORE, msg_store_transient). + +-include("rabbit.hrl"). + +%%---------------------------------------------------------------------------- + +-ifdef(use_specs). + +-type(timestamp() :: {non_neg_integer(), non_neg_integer(), non_neg_integer()}). +-type(seq_id() :: non_neg_integer()). +-type(ack() :: seq_id() | 'blank_ack'). + +-type(rates() :: #rates { egress :: {timestamp(), non_neg_integer()}, + ingress :: {timestamp(), non_neg_integer()}, + avg_egress :: float(), + avg_ingress :: float(), + timestamp :: timestamp() }). + +-type(delta() :: #delta { start_seq_id :: non_neg_integer(), + count :: non_neg_integer (), + end_seq_id :: non_neg_integer() }). + +-type(sync() :: #sync { acks_persistent :: [[seq_id()]], + acks_all :: [[seq_id()]], + pubs :: [[rabbit_guid:guid()]], + funs :: [fun (() -> any())] }). + +-type(state() :: #vqstate { + q1 :: queue(), + q2 :: bpqueue:bpqueue(), + delta :: delta(), + q3 :: bpqueue:bpqueue(), + q4 :: queue(), + next_seq_id :: seq_id(), + pending_ack :: dict:dictionary(), + index_state :: any(), + msg_store_clients :: 'undefined' | {{any(), binary()}, + {any(), binary()}}, + on_sync :: sync(), + durable :: boolean(), + + len :: non_neg_integer(), + persistent_count :: non_neg_integer(), + + transient_threshold :: non_neg_integer(), + duration_target :: number() | 'infinity', + target_ram_msg_count :: non_neg_integer() | 'infinity', + ram_msg_count :: non_neg_integer(), + ram_msg_count_prev :: non_neg_integer(), + ram_index_count :: non_neg_integer(), + out_counter :: non_neg_integer(), + in_counter :: non_neg_integer(), + rates :: rates() }). + +-include("rabbit_backing_queue_spec.hrl"). + +-endif. + +-define(BLANK_DELTA, #delta { start_seq_id = undefined, + count = 0, + end_seq_id = undefined }). +-define(BLANK_DELTA_PATTERN(Z), #delta { start_seq_id = Z, + count = 0, + end_seq_id = Z }). + +-define(BLANK_SYNC, #sync { acks_persistent = [], + acks_all = [], + pubs = [], + funs = [] }). + +%%---------------------------------------------------------------------------- +%% Public API +%%---------------------------------------------------------------------------- + +start(DurableQueues) -> + {AllTerms, StartFunState} = rabbit_queue_index:recover(DurableQueues), + start_msg_store( + [Ref || Terms <- AllTerms, + begin + Ref = proplists:get_value(persistent_ref, Terms), + Ref =/= undefined + end], + StartFunState). + +stop() -> stop_msg_store(). + +start_msg_store(Refs, StartFunState) -> + ok = rabbit_sup:start_child(?TRANSIENT_MSG_STORE, rabbit_msg_store, + [?TRANSIENT_MSG_STORE, rabbit_mnesia:dir(), + undefined, {fun (ok) -> finished end, ok}]), + ok = rabbit_sup:start_child(?PERSISTENT_MSG_STORE, rabbit_msg_store, + [?PERSISTENT_MSG_STORE, rabbit_mnesia:dir(), + Refs, StartFunState]). + +stop_msg_store() -> + ok = rabbit_sup:stop_child(?PERSISTENT_MSG_STORE), + ok = rabbit_sup:stop_child(?TRANSIENT_MSG_STORE). + +init(QueueName, IsDurable, Recover) -> + {DeltaCount, Terms, IndexState} = + rabbit_queue_index:init( + QueueName, Recover, + rabbit_msg_store:successfully_recovered_state(?PERSISTENT_MSG_STORE), + fun (Guid) -> + rabbit_msg_store:contains(?PERSISTENT_MSG_STORE, Guid) + end), + {LowSeqId, NextSeqId, IndexState1} = rabbit_queue_index:bounds(IndexState), + + {PRef, TRef, Terms1} = + case [persistent_ref, transient_ref] -- proplists:get_keys(Terms) of + [] -> {proplists:get_value(persistent_ref, Terms), + proplists:get_value(transient_ref, Terms), + Terms}; + _ -> {rabbit_guid:guid(), rabbit_guid:guid(), []} + end, + DeltaCount1 = proplists:get_value(persistent_count, Terms1, DeltaCount), + Delta = case DeltaCount1 == 0 andalso DeltaCount /= undefined of + true -> ?BLANK_DELTA; + false -> #delta { start_seq_id = LowSeqId, + count = DeltaCount1, + end_seq_id = NextSeqId } + end, + Now = now(), + PersistentClient = + case IsDurable of + true -> rabbit_msg_store:client_init(?PERSISTENT_MSG_STORE, PRef); + false -> undefined + end, + TransientClient = rabbit_msg_store:client_init(?TRANSIENT_MSG_STORE, TRef), + State = #vqstate { + q1 = queue:new(), + q2 = bpqueue:new(), + delta = Delta, + q3 = bpqueue:new(), + q4 = queue:new(), + next_seq_id = NextSeqId, + pending_ack = dict:new(), + index_state = IndexState1, + msg_store_clients = {{PersistentClient, PRef}, + {TransientClient, TRef}}, + on_sync = ?BLANK_SYNC, + durable = IsDurable, + transient_threshold = NextSeqId, + + len = DeltaCount1, + persistent_count = DeltaCount1, + + duration_target = infinity, + target_ram_msg_count = infinity, + ram_msg_count = 0, + ram_msg_count_prev = 0, + ram_index_count = 0, + out_counter = 0, + in_counter = 0, + rates = #rates { egress = {Now, 0}, + ingress = {Now, DeltaCount1}, + avg_egress = 0.0, + avg_ingress = 0.0, + timestamp = Now } }, + a(maybe_deltas_to_betas(State)). + +terminate(State) -> + State1 = #vqstate { persistent_count = PCount, + index_state = IndexState, + msg_store_clients = {{MSCStateP, PRef}, + {MSCStateT, TRef}} } = + remove_pending_ack(true, tx_commit_index(State)), + case MSCStateP of + undefined -> ok; + _ -> rabbit_msg_store:client_terminate(MSCStateP) + end, + rabbit_msg_store:client_terminate(MSCStateT), + Terms = [{persistent_ref, PRef}, + {transient_ref, TRef}, + {persistent_count, PCount}], + a(State1 #vqstate { index_state = rabbit_queue_index:terminate( + Terms, IndexState), + msg_store_clients = undefined }). + +%% the only difference between purge and delete is that delete also +%% needs to delete everything that's been delivered and not ack'd. +delete_and_terminate(State) -> + %% TODO: there is no need to interact with qi at all - which we do + %% as part of 'purge' and 'remove_pending_ack', other than + %% deleting it. + {_PurgeCount, State1} = purge(State), + State2 = #vqstate { index_state = IndexState, + msg_store_clients = {{MSCStateP, PRef}, + {MSCStateT, TRef}} } = + remove_pending_ack(false, State1), + IndexState1 = rabbit_queue_index:delete_and_terminate(IndexState), + case MSCStateP of + undefined -> ok; + _ -> rabbit_msg_store:client_delete_and_terminate( + MSCStateP, ?PERSISTENT_MSG_STORE, PRef), + rabbit_msg_store:client_terminate(MSCStateP) + end, + rabbit_msg_store:client_delete_and_terminate( + MSCStateT, ?TRANSIENT_MSG_STORE, TRef), + a(State2 #vqstate { index_state = IndexState1, + msg_store_clients = undefined }). + +purge(State = #vqstate { q4 = Q4, index_state = IndexState, len = Len }) -> + %% TODO: when there are no pending acks, which is a common case, + %% we could simply wipe the qi instead of issuing delivers and + %% acks for all the messages. + IndexState1 = remove_queue_entries(fun rabbit_misc:queue_fold/3, Q4, + IndexState), + State1 = #vqstate { q1 = Q1, index_state = IndexState2 } = + purge_betas_and_deltas(State #vqstate { q4 = queue:new(), + index_state = IndexState1 }), + IndexState3 = remove_queue_entries(fun rabbit_misc:queue_fold/3, Q1, + IndexState2), + {Len, a(State1 #vqstate { q1 = queue:new(), + index_state = IndexState3, + len = 0, + ram_msg_count = 0, + ram_index_count = 0, + persistent_count = 0 })}. + +publish(Msg, State) -> + {_SeqId, State1} = publish(Msg, false, false, State), + a(reduce_memory_use(State1)). + +publish_delivered(false, _Msg, State = #vqstate { len = 0 }) -> + {blank_ack, a(State)}; +publish_delivered(true, Msg = #basic_message { is_persistent = IsPersistent }, + State = #vqstate { len = 0, + next_seq_id = SeqId, + out_counter = OutCount, + in_counter = InCount, + persistent_count = PCount, + pending_ack = PA, + durable = IsDurable }) -> + IsPersistent1 = IsDurable andalso IsPersistent, + MsgStatus = (msg_status(IsPersistent1, SeqId, Msg)) + #msg_status { is_delivered = true }, + {MsgStatus1, State1} = maybe_write_to_disk(false, false, MsgStatus, State), + PA1 = record_pending_ack(m(MsgStatus1), PA), + PCount1 = PCount + one_if(IsPersistent1), + {SeqId, a(State1 #vqstate { next_seq_id = SeqId + 1, + out_counter = OutCount + 1, + in_counter = InCount + 1, + persistent_count = PCount1, + pending_ack = PA1 })}. + +fetch(AckRequired, State = #vqstate { q4 = Q4, + ram_msg_count = RamMsgCount, + out_counter = OutCount, + index_state = IndexState, + len = Len, + persistent_count = PCount, + pending_ack = PA }) -> + case queue:out(Q4) of + {empty, _Q4} -> + case fetch_from_q3_to_q4(State) of + {empty, State1} = Result -> a(State1), Result; + {loaded, State1} -> fetch(AckRequired, State1) + end; + {{value, MsgStatus = #msg_status { + msg = Msg, guid = Guid, seq_id = SeqId, + is_persistent = IsPersistent, is_delivered = IsDelivered, + msg_on_disk = MsgOnDisk, index_on_disk = IndexOnDisk }}, + Q4a} -> + + %% 1. Mark it delivered if necessary + IndexState1 = maybe_write_delivered( + IndexOnDisk andalso not IsDelivered, + SeqId, IndexState), + + %% 2. Remove from msg_store and queue index, if necessary + MsgStore = find_msg_store(IsPersistent), + Rem = fun () -> ok = rabbit_msg_store:remove(MsgStore, [Guid]) end, + Ack = fun () -> rabbit_queue_index:ack([SeqId], IndexState1) end, + IndexState2 = + case {AckRequired, MsgOnDisk, IndexOnDisk, IsPersistent} of + {false, true, false, _} -> Rem(), IndexState1; + {false, true, true, _} -> Rem(), Ack(); + { true, true, true, false} -> Ack(); + _ -> IndexState1 + end, + + %% 3. If an ack is required, add something sensible to PA + {AckTag, PA1} = case AckRequired of + true -> PA2 = record_pending_ack( + MsgStatus #msg_status { + is_delivered = true }, PA), + {SeqId, PA2}; + false -> {blank_ack, PA} + end, + + PCount1 = PCount - one_if(IsPersistent andalso not AckRequired), + Len1 = Len - 1, + {{Msg, IsDelivered, AckTag, Len1}, + a(State #vqstate { q4 = Q4a, + ram_msg_count = RamMsgCount - 1, + out_counter = OutCount + 1, + index_state = IndexState2, + len = Len1, + persistent_count = PCount1, + pending_ack = PA1 })} + end. + +ack(AckTags, State) -> + a(ack(fun rabbit_msg_store:remove/2, + fun (_AckEntry, State1) -> State1 end, + AckTags, State)). + +tx_publish(Txn, Msg = #basic_message { is_persistent = IsPersistent }, + State = #vqstate { durable = IsDurable, + msg_store_clients = MSCState }) -> + Tx = #tx { pending_messages = Pubs } = lookup_tx(Txn), + store_tx(Txn, Tx #tx { pending_messages = [Msg | Pubs] }), + a(case IsPersistent andalso IsDurable of + true -> MsgStatus = msg_status(true, undefined, Msg), + {#msg_status { msg_on_disk = true }, MSCState1} = + maybe_write_msg_to_disk(false, MsgStatus, MSCState), + State #vqstate { msg_store_clients = MSCState1 }; + false -> State + end). + +tx_ack(Txn, AckTags, State) -> + Tx = #tx { pending_acks = Acks } = lookup_tx(Txn), + store_tx(Txn, Tx #tx { pending_acks = [AckTags | Acks] }), + State. + +tx_rollback(Txn, State = #vqstate { durable = IsDurable }) -> + #tx { pending_acks = AckTags, pending_messages = Pubs } = lookup_tx(Txn), + erase_tx(Txn), + ok = case IsDurable of + true -> rabbit_msg_store:remove(?PERSISTENT_MSG_STORE, + persistent_guids(Pubs)); + false -> ok + end, + {lists:append(AckTags), a(State)}. + +tx_commit(Txn, Fun, State = #vqstate { durable = IsDurable }) -> + #tx { pending_acks = AckTags, pending_messages = Pubs } = lookup_tx(Txn), + erase_tx(Txn), + PubsOrdered = lists:reverse(Pubs), + AckTags1 = lists:append(AckTags), + PersistentGuids = persistent_guids(PubsOrdered), + HasPersistentPubs = PersistentGuids =/= [], + {AckTags1, + a(case IsDurable andalso HasPersistentPubs of + true -> ok = rabbit_msg_store:sync( + ?PERSISTENT_MSG_STORE, PersistentGuids, + msg_store_callback(PersistentGuids, + PubsOrdered, AckTags1, Fun)), + State; + false -> tx_commit_post_msg_store( + HasPersistentPubs, PubsOrdered, AckTags1, Fun, State) + end)}. + +requeue(AckTags, State) -> + a(reduce_memory_use( + ack(fun rabbit_msg_store:release/2, + fun (#msg_status { msg = Msg }, State1) -> + {_SeqId, State2} = publish(Msg, true, false, State1), + State2; + ({IsPersistent, Guid}, State1) -> + #vqstate { msg_store_clients = MSCState } = State1, + {{ok, Msg = #basic_message{}}, MSCState1} = + read_from_msg_store(MSCState, IsPersistent, Guid), + State2 = State1 #vqstate { msg_store_clients = MSCState1 }, + {_SeqId, State3} = publish(Msg, true, true, State2), + State3 + end, + AckTags, State))). + +len(#vqstate { len = Len }) -> Len. + +is_empty(State) -> 0 == len(State). + +set_ram_duration_target(DurationTarget, + State = #vqstate { + rates = #rates { avg_egress = AvgEgressRate, + avg_ingress = AvgIngressRate }, + target_ram_msg_count = TargetRamMsgCount }) -> + Rate = AvgEgressRate + AvgIngressRate, + TargetRamMsgCount1 = + case DurationTarget of + infinity -> infinity; + _ -> trunc(DurationTarget * Rate) %% msgs = sec * msgs/sec + end, + State1 = State #vqstate { target_ram_msg_count = TargetRamMsgCount1, + duration_target = DurationTarget }, + a(case TargetRamMsgCount1 == infinity orelse + (TargetRamMsgCount =/= infinity andalso + TargetRamMsgCount1 >= TargetRamMsgCount) of + true -> State1; + false -> reduce_memory_use(State1) + end). + +ram_duration(State = #vqstate { + rates = #rates { egress = Egress, + ingress = Ingress, + timestamp = Timestamp } = Rates, + in_counter = InCount, + out_counter = OutCount, + ram_msg_count = RamMsgCount, + duration_target = DurationTarget, + ram_msg_count_prev = RamMsgCountPrev }) -> + Now = now(), + {AvgEgressRate, Egress1} = update_rate(Now, Timestamp, OutCount, Egress), + {AvgIngressRate, Ingress1} = update_rate(Now, Timestamp, InCount, Ingress), + + Duration = %% msgs / (msgs/sec) == sec + case AvgEgressRate == 0 andalso AvgIngressRate == 0 of + true -> infinity; + false -> (RamMsgCountPrev + RamMsgCount) / + (2 * (AvgEgressRate + AvgIngressRate)) + end, + + {Duration, set_ram_duration_target( + DurationTarget, + State #vqstate { + rates = Rates #rates { + egress = Egress1, + ingress = Ingress1, + avg_egress = AvgEgressRate, + avg_ingress = AvgIngressRate, + timestamp = Now }, + in_counter = 0, + out_counter = 0, + ram_msg_count_prev = RamMsgCount })}. + +needs_idle_timeout(State = #vqstate { on_sync = ?BLANK_SYNC }) -> + {Res, _State} = reduce_memory_use(fun (_Quota, State1) -> State1 end, + fun (_Quota, State1) -> State1 end, + fun (State1) -> State1 end, + State), + Res; +needs_idle_timeout(_State) -> + true. + +idle_timeout(State) -> a(reduce_memory_use(tx_commit_index(State))). + +handle_pre_hibernate(State = #vqstate { index_state = IndexState }) -> + State #vqstate { index_state = rabbit_queue_index:flush(IndexState) }. + +status(#vqstate { q1 = Q1, q2 = Q2, delta = Delta, q3 = Q3, q4 = Q4, + len = Len, + pending_ack = PA, + on_sync = #sync { funs = From }, + target_ram_msg_count = TargetRamMsgCount, + ram_msg_count = RamMsgCount, + ram_index_count = RamIndexCount, + next_seq_id = NextSeqId, + persistent_count = PersistentCount, + rates = #rates { + avg_egress = AvgEgressRate, + avg_ingress = AvgIngressRate } }) -> + [ {q1 , queue:len(Q1)}, + {q2 , bpqueue:len(Q2)}, + {delta , Delta}, + {q3 , bpqueue:len(Q3)}, + {q4 , queue:len(Q4)}, + {len , Len}, + {pending_acks , dict:size(PA)}, + {outstanding_txns , length(From)}, + {target_ram_msg_count , TargetRamMsgCount}, + {ram_msg_count , RamMsgCount}, + {ram_index_count , RamIndexCount}, + {next_seq_id , NextSeqId}, + {persistent_count , PersistentCount}, + {avg_egress_rate , AvgEgressRate}, + {avg_ingress_rate , AvgIngressRate} ]. + +%%---------------------------------------------------------------------------- +%% Minor helpers +%%---------------------------------------------------------------------------- + +a(State = #vqstate { q1 = Q1, q2 = Q2, delta = Delta, q3 = Q3, q4 = Q4, + len = Len, + persistent_count = PersistentCount, + ram_msg_count = RamMsgCount, + ram_index_count = RamIndexCount }) -> + E1 = queue:is_empty(Q1), + E2 = bpqueue:is_empty(Q2), + ED = Delta#delta.count == 0, + E3 = bpqueue:is_empty(Q3), + E4 = queue:is_empty(Q4), + LZ = Len == 0, + + true = E1 or not E3, + true = E2 or not ED, + true = ED or not E3, + true = LZ == (E3 and E4), + + true = Len >= 0, + true = PersistentCount >= 0, + true = RamMsgCount >= 0, + true = RamIndexCount >= 0, + + State. + +m(MsgStatus = #msg_status { msg = Msg, + is_persistent = IsPersistent, + msg_on_disk = MsgOnDisk, + index_on_disk = IndexOnDisk }) -> + true = (not IsPersistent) or IndexOnDisk, + true = (not IndexOnDisk) or MsgOnDisk, + true = (Msg =/= undefined) or MsgOnDisk, + + MsgStatus. + +one_if(true ) -> 1; +one_if(false) -> 0. + +cons_if(true, E, L) -> [E | L]; +cons_if(false, _E, L) -> L. + +msg_status(IsPersistent, SeqId, Msg = #basic_message { guid = Guid }) -> + #msg_status { seq_id = SeqId, guid = Guid, msg = Msg, + is_persistent = IsPersistent, is_delivered = false, + msg_on_disk = false, index_on_disk = false }. + +find_msg_store(true) -> ?PERSISTENT_MSG_STORE; +find_msg_store(false) -> ?TRANSIENT_MSG_STORE. + +with_msg_store_state({{MSCStateP, PRef}, MSCStateT}, true, Fun) -> + {Result, MSCStateP1} = Fun(?PERSISTENT_MSG_STORE, MSCStateP), + {Result, {{MSCStateP1, PRef}, MSCStateT}}; +with_msg_store_state({MSCStateP, {MSCStateT, TRef}}, false, Fun) -> + {Result, MSCStateT1} = Fun(?TRANSIENT_MSG_STORE, MSCStateT), + {Result, {MSCStateP, {MSCStateT1, TRef}}}. + +read_from_msg_store(MSCState, IsPersistent, Guid) -> + with_msg_store_state( + MSCState, IsPersistent, + fun (MsgStore, MSCState1) -> + rabbit_msg_store:read(MsgStore, Guid, MSCState1) + end). + +maybe_write_delivered(false, _SeqId, IndexState) -> + IndexState; +maybe_write_delivered(true, SeqId, IndexState) -> + rabbit_queue_index:deliver([SeqId], IndexState). + +lookup_tx(Txn) -> case get({txn, Txn}) of + undefined -> #tx { pending_messages = [], + pending_acks = [] }; + V -> V + end. + +store_tx(Txn, Tx) -> put({txn, Txn}, Tx). + +erase_tx(Txn) -> erase({txn, Txn}). + +persistent_guids(Pubs) -> + [Guid || #basic_message { guid = Guid, is_persistent = true } <- Pubs]. + +betas_from_index_entries(List, TransientThreshold, IndexState) -> + {Filtered, Delivers, Acks} = + lists:foldr( + fun ({Guid, SeqId, IsPersistent, IsDelivered}, + {Filtered1, Delivers1, Acks1}) -> + case SeqId < TransientThreshold andalso not IsPersistent of + true -> {Filtered1, + cons_if(not IsDelivered, SeqId, Delivers1), + [SeqId | Acks1]}; + false -> {[m(#msg_status { msg = undefined, + guid = Guid, + seq_id = SeqId, + is_persistent = IsPersistent, + is_delivered = IsDelivered, + msg_on_disk = true, + index_on_disk = true + }) | Filtered1], + Delivers1, + Acks1} + end + end, {[], [], []}, List), + {bpqueue:from_list([{true, Filtered}]), + rabbit_queue_index:ack(Acks, + rabbit_queue_index:deliver(Delivers, IndexState))}. + +%% the first arg is the older delta +combine_deltas(?BLANK_DELTA_PATTERN(X), ?BLANK_DELTA_PATTERN(Y)) -> + ?BLANK_DELTA; +combine_deltas(?BLANK_DELTA_PATTERN(X), #delta { start_seq_id = Start, + count = Count, + end_seq_id = End } = B) -> + true = Start + Count =< End, %% ASSERTION + B; +combine_deltas(#delta { start_seq_id = Start, + count = Count, + end_seq_id = End } = A, ?BLANK_DELTA_PATTERN(Y)) -> + true = Start + Count =< End, %% ASSERTION + A; +combine_deltas(#delta { start_seq_id = StartLow, + count = CountLow, + end_seq_id = EndLow }, + #delta { start_seq_id = StartHigh, + count = CountHigh, + end_seq_id = EndHigh }) -> + Count = CountLow + CountHigh, + true = (StartLow =< StartHigh) %% ASSERTIONS + andalso ((StartLow + CountLow) =< EndLow) + andalso ((StartHigh + CountHigh) =< EndHigh) + andalso ((StartLow + Count) =< EndHigh), + #delta { start_seq_id = StartLow, count = Count, end_seq_id = EndHigh }. + +beta_fold(Fun, Init, Q) -> + bpqueue:foldr(fun (_Prefix, Value, Acc) -> Fun(Value, Acc) end, Init, Q). + +update_rate(Now, Then, Count, {OThen, OCount}) -> + %% avg over the current period and the previous + {1000000.0 * (Count + OCount) / timer:now_diff(Now, OThen), {Then, Count}}. + +%%---------------------------------------------------------------------------- +%% Internal major helpers for Public API +%%---------------------------------------------------------------------------- + +msg_store_callback(PersistentGuids, Pubs, AckTags, Fun) -> + Self = self(), + F = fun () -> rabbit_amqqueue:maybe_run_queue_via_backing_queue( + Self, fun (StateN) -> tx_commit_post_msg_store( + true, Pubs, AckTags, Fun, StateN) + end) + end, + fun () -> spawn(fun () -> ok = rabbit_misc:with_exit_handler( + fun () -> rabbit_msg_store:remove( + ?PERSISTENT_MSG_STORE, + PersistentGuids) + end, F) + end) + end. + +tx_commit_post_msg_store(HasPersistentPubs, Pubs, AckTags, Fun, + State = #vqstate { + on_sync = OnSync = #sync { + acks_persistent = SPAcks, + acks_all = SAcks, + pubs = SPubs, + funs = SFuns }, + pending_ack = PA, + durable = IsDurable }) -> + PersistentAcks = + case IsDurable of + true -> [AckTag || AckTag <- AckTags, + case dict:fetch(AckTag, PA) of + #msg_status {} -> false; + {IsPersistent, _Guid} -> IsPersistent + end]; + false -> [] + end, + case IsDurable andalso (HasPersistentPubs orelse PersistentAcks =/= []) of + true -> State #vqstate { on_sync = #sync { + acks_persistent = [PersistentAcks | SPAcks], + acks_all = [AckTags | SAcks], + pubs = [Pubs | SPubs], + funs = [Fun | SFuns] }}; + false -> State1 = tx_commit_index( + State #vqstate { on_sync = #sync { + acks_persistent = [], + acks_all = [AckTags], + pubs = [Pubs], + funs = [Fun] } }), + State1 #vqstate { on_sync = OnSync } + end. + +tx_commit_index(State = #vqstate { on_sync = ?BLANK_SYNC }) -> + State; +tx_commit_index(State = #vqstate { on_sync = #sync { + acks_persistent = SPAcks, + acks_all = SAcks, + pubs = SPubs, + funs = SFuns }, + durable = IsDurable }) -> + PAcks = lists:append(SPAcks), + Acks = lists:append(SAcks), + Pubs = lists:append(lists:reverse(SPubs)), + {SeqIds, State1 = #vqstate { index_state = IndexState }} = + lists:foldl( + fun (Msg = #basic_message { is_persistent = IsPersistent }, + {SeqIdsAcc, State2}) -> + IsPersistent1 = IsDurable andalso IsPersistent, + {SeqId, State3} = publish(Msg, false, IsPersistent1, State2), + {cons_if(IsPersistent1, SeqId, SeqIdsAcc), State3} + end, {PAcks, ack(Acks, State)}, Pubs), + IndexState1 = rabbit_queue_index:sync(SeqIds, IndexState), + [ Fun() || Fun <- lists:reverse(SFuns) ], + reduce_memory_use( + State1 #vqstate { index_state = IndexState1, on_sync = ?BLANK_SYNC }). + +purge_betas_and_deltas(State = #vqstate { q3 = Q3, + index_state = IndexState }) -> + case bpqueue:is_empty(Q3) of + true -> State; + false -> IndexState1 = remove_queue_entries(fun beta_fold/3, Q3, + IndexState), + purge_betas_and_deltas( + maybe_deltas_to_betas( + State #vqstate { q3 = bpqueue:new(), + index_state = IndexState1 })) + end. + +remove_queue_entries(Fold, Q, IndexState) -> + {GuidsByStore, Delivers, Acks} = + Fold(fun remove_queue_entries1/2, {orddict:new(), [], []}, Q), + ok = orddict:fold(fun (MsgStore, Guids, ok) -> + rabbit_msg_store:remove(MsgStore, Guids) + end, ok, GuidsByStore), + rabbit_queue_index:ack(Acks, + rabbit_queue_index:deliver(Delivers, IndexState)). + +remove_queue_entries1( + #msg_status { guid = Guid, seq_id = SeqId, + is_delivered = IsDelivered, msg_on_disk = MsgOnDisk, + index_on_disk = IndexOnDisk, is_persistent = IsPersistent }, + {GuidsByStore, Delivers, Acks}) -> + {case MsgOnDisk of + true -> rabbit_misc:orddict_cons(find_msg_store(IsPersistent), Guid, + GuidsByStore); + false -> GuidsByStore + end, + cons_if(IndexOnDisk andalso not IsDelivered, SeqId, Delivers), + cons_if(IndexOnDisk, SeqId, Acks)}. + +%%---------------------------------------------------------------------------- +%% Internal gubbins for publishing +%%---------------------------------------------------------------------------- + +publish(Msg = #basic_message { is_persistent = IsPersistent }, + IsDelivered, MsgOnDisk, + State = #vqstate { q1 = Q1, q3 = Q3, q4 = Q4, + next_seq_id = SeqId, + len = Len, + in_counter = InCount, + persistent_count = PCount, + durable = IsDurable, + ram_msg_count = RamMsgCount }) -> + IsPersistent1 = IsDurable andalso IsPersistent, + MsgStatus = (msg_status(IsPersistent1, SeqId, Msg)) + #msg_status { is_delivered = IsDelivered, msg_on_disk = MsgOnDisk }, + {MsgStatus1, State1} = maybe_write_to_disk(false, false, MsgStatus, State), + State2 = case bpqueue:is_empty(Q3) of + false -> State1 #vqstate { q1 = queue:in(m(MsgStatus1), Q1) }; + true -> State1 #vqstate { q4 = queue:in(m(MsgStatus1), Q4) } + end, + PCount1 = PCount + one_if(IsPersistent1), + {SeqId, State2 #vqstate { next_seq_id = SeqId + 1, + len = Len + 1, + in_counter = InCount + 1, + persistent_count = PCount1, + ram_msg_count = RamMsgCount + 1}}. + +maybe_write_msg_to_disk(_Force, MsgStatus = #msg_status { + msg_on_disk = true }, MSCState) -> + {MsgStatus, MSCState}; +maybe_write_msg_to_disk(Force, MsgStatus = #msg_status { + msg = Msg, guid = Guid, + is_persistent = IsPersistent }, MSCState) + when Force orelse IsPersistent -> + {ok, MSCState1} = + with_msg_store_state( + MSCState, IsPersistent, + fun (MsgStore, MSCState2) -> + Msg1 = Msg #basic_message { + %% don't persist any recoverable decoded properties + content = rabbit_binary_parser:clear_decoded_content( + Msg #basic_message.content)}, + rabbit_msg_store:write(MsgStore, Guid, Msg1, MSCState2) + end), + {MsgStatus #msg_status { msg_on_disk = true }, MSCState1}; +maybe_write_msg_to_disk(_Force, MsgStatus, MSCState) -> + {MsgStatus, MSCState}. + +maybe_write_index_to_disk(_Force, MsgStatus = #msg_status { + index_on_disk = true }, IndexState) -> + true = MsgStatus #msg_status.msg_on_disk, %% ASSERTION + {MsgStatus, IndexState}; +maybe_write_index_to_disk(Force, MsgStatus = #msg_status { + guid = Guid, seq_id = SeqId, + is_persistent = IsPersistent, + is_delivered = IsDelivered }, IndexState) + when Force orelse IsPersistent -> + true = MsgStatus #msg_status.msg_on_disk, %% ASSERTION + IndexState1 = rabbit_queue_index:publish(Guid, SeqId, IsPersistent, + IndexState), + {MsgStatus #msg_status { index_on_disk = true }, + maybe_write_delivered(IsDelivered, SeqId, IndexState1)}; +maybe_write_index_to_disk(_Force, MsgStatus, IndexState) -> + {MsgStatus, IndexState}. + +maybe_write_to_disk(ForceMsg, ForceIndex, MsgStatus, + State = #vqstate { index_state = IndexState, + msg_store_clients = MSCState }) -> + {MsgStatus1, MSCState1} = maybe_write_msg_to_disk( + ForceMsg, MsgStatus, MSCState), + {MsgStatus2, IndexState1} = maybe_write_index_to_disk( + ForceIndex, MsgStatus1, IndexState), + {MsgStatus2, State #vqstate { index_state = IndexState1, + msg_store_clients = MSCState1 }}. + +%%---------------------------------------------------------------------------- +%% Internal gubbins for acks +%%---------------------------------------------------------------------------- + +record_pending_ack(#msg_status { guid = Guid, seq_id = SeqId, + is_persistent = IsPersistent, + msg_on_disk = MsgOnDisk } = MsgStatus, PA) -> + AckEntry = case MsgOnDisk of + true -> {IsPersistent, Guid}; + false -> MsgStatus + end, + dict:store(SeqId, AckEntry, PA). + +remove_pending_ack(KeepPersistent, + State = #vqstate { pending_ack = PA, + index_state = IndexState }) -> + {SeqIds, GuidsByStore} = dict:fold(fun accumulate_ack/3, + {[], orddict:new()}, PA), + State1 = State #vqstate { pending_ack = dict:new() }, + case KeepPersistent of + true -> case orddict:find(?TRANSIENT_MSG_STORE, GuidsByStore) of + error -> State1; + {ok, Guids} -> ok = rabbit_msg_store:remove( + ?TRANSIENT_MSG_STORE, Guids), + State1 + end; + false -> IndexState1 = rabbit_queue_index:ack(SeqIds, IndexState), + ok = orddict:fold( + fun (MsgStore, Guids, ok) -> + rabbit_msg_store:remove(MsgStore, Guids) + end, ok, GuidsByStore), + State1 #vqstate { index_state = IndexState1 } + end. + +ack(_MsgStoreFun, _Fun, [], State) -> + State; +ack(MsgStoreFun, Fun, AckTags, State) -> + {{SeqIds, GuidsByStore}, State1 = #vqstate { index_state = IndexState, + persistent_count = PCount }} = + lists:foldl( + fun (SeqId, {Acc, State2 = #vqstate { pending_ack = PA }}) -> + {ok, AckEntry} = dict:find(SeqId, PA), + {accumulate_ack(SeqId, AckEntry, Acc), + Fun(AckEntry, State2 #vqstate { + pending_ack = dict:erase(SeqId, PA) })} + end, {{[], orddict:new()}, State}, AckTags), + IndexState1 = rabbit_queue_index:ack(SeqIds, IndexState), + ok = orddict:fold(fun (MsgStore, Guids, ok) -> + MsgStoreFun(MsgStore, Guids) + end, ok, GuidsByStore), + PCount1 = PCount - case orddict:find(?PERSISTENT_MSG_STORE, GuidsByStore) of + error -> 0; + {ok, Guids} -> length(Guids) + end, + State1 #vqstate { index_state = IndexState1, + persistent_count = PCount1 }. + +accumulate_ack(_SeqId, #msg_status { is_persistent = false, %% ASSERTIONS + msg_on_disk = false, + index_on_disk = false }, Acc) -> + Acc; +accumulate_ack(SeqId, {IsPersistent, Guid}, {SeqIdsAcc, Dict}) -> + {cons_if(IsPersistent, SeqId, SeqIdsAcc), + rabbit_misc:orddict_cons(find_msg_store(IsPersistent), Guid, Dict)}. + +%%---------------------------------------------------------------------------- +%% Phase changes +%%---------------------------------------------------------------------------- + +%% Determine whether a reduction in memory use is necessary, and call +%% functions to perform the required phase changes. The function can +%% also be used to just do the former, by passing in dummy phase +%% change functions. +%% +%% The function does not report on any needed beta->delta conversions, +%% though the conversion function for that is called as necessary. The +%% reason is twofold. Firstly, this is safe because the conversion is +%% only ever necessary just after a transition to a +%% target_ram_msg_count of zero or after an incremental alpha->beta +%% conversion. In the former case the conversion is performed straight +%% away (i.e. any betas present at the time are converted to deltas), +%% and in the latter case the need for a conversion is flagged up +%% anyway. Secondly, this is necessary because we do not have a +%% precise and cheap predicate for determining whether a beta->delta +%% conversion is necessary - due to the complexities of retaining up +%% one segment's worth of messages in q3 - and thus would risk +%% perpetually reporting the need for a conversion when no such +%% conversion is needed. That in turn could cause an infinite loop. +reduce_memory_use(AlphaBetaFun, BetaGammaFun, BetaDeltaFun, State) -> + {Reduce, State1} = case chunk_size(State #vqstate.ram_msg_count, + State #vqstate.target_ram_msg_count) of + 0 -> {false, State}; + S1 -> {true, AlphaBetaFun(S1, State)} + end, + case State1 #vqstate.target_ram_msg_count of + infinity -> {Reduce, State1}; + 0 -> {Reduce, BetaDeltaFun(State1)}; + _ -> case chunk_size(State1 #vqstate.ram_index_count, + permitted_ram_index_count(State1)) of + ?IO_BATCH_SIZE = S2 -> {true, BetaGammaFun(S2, State1)}; + _ -> {Reduce, State1} + end + end. + +reduce_memory_use(State) -> + {_, State1} = reduce_memory_use(fun push_alphas_to_betas/2, + fun limit_ram_index/2, + fun push_betas_to_deltas/1, + State), + State1. + +limit_ram_index(Quota, State = #vqstate { q2 = Q2, q3 = Q3, + index_state = IndexState, + ram_index_count = RamIndexCount }) -> + {Q2a, {Quota1, IndexState1}} = limit_ram_index( + fun bpqueue:map_fold_filter_r/4, + Q2, {Quota, IndexState}), + %% TODO: we shouldn't be writing index entries for messages that + %% can never end up in delta due them residing in the only segment + %% held by q3. + {Q3a, {Quota2, IndexState2}} = limit_ram_index( + fun bpqueue:map_fold_filter_r/4, + Q3, {Quota1, IndexState1}), + State #vqstate { q2 = Q2a, q3 = Q3a, + index_state = IndexState2, + ram_index_count = RamIndexCount - (Quota - Quota2) }. + +limit_ram_index(_MapFoldFilterFun, Q, {0, IndexState}) -> + {Q, {0, IndexState}}; +limit_ram_index(MapFoldFilterFun, Q, {Quota, IndexState}) -> + MapFoldFilterFun( + fun erlang:'not'/1, + fun (MsgStatus, {0, _IndexStateN}) -> + false = MsgStatus #msg_status.index_on_disk, %% ASSERTION + stop; + (MsgStatus, {N, IndexStateN}) when N > 0 -> + false = MsgStatus #msg_status.index_on_disk, %% ASSERTION + {MsgStatus1, IndexStateN1} = + maybe_write_index_to_disk(true, MsgStatus, IndexStateN), + {true, m(MsgStatus1), {N-1, IndexStateN1}} + end, {Quota, IndexState}, Q). + +permitted_ram_index_count(#vqstate { len = 0 }) -> + infinity; +permitted_ram_index_count(#vqstate { len = Len, + q2 = Q2, + q3 = Q3, + delta = #delta { count = DeltaCount } }) -> + BetaLen = bpqueue:len(Q2) + bpqueue:len(Q3), + BetaLen - trunc(BetaLen * BetaLen / (Len - DeltaCount)). + +chunk_size(Current, Permitted) + when Permitted =:= infinity orelse Permitted >= Current -> + 0; +chunk_size(Current, Permitted) -> + lists:min([Current - Permitted, ?IO_BATCH_SIZE]). + +fetch_from_q3_to_q4(State = #vqstate { + q1 = Q1, + q2 = Q2, + delta = #delta { count = DeltaCount }, + q3 = Q3, + q4 = Q4, + ram_msg_count = RamMsgCount, + ram_index_count = RamIndexCount, + msg_store_clients = MSCState }) -> + case bpqueue:out(Q3) of + {empty, _Q3} -> + {empty, State}; + {{value, IndexOnDisk, MsgStatus = #msg_status { + msg = undefined, guid = Guid, + is_persistent = IsPersistent }}, Q3a} -> + {{ok, Msg = #basic_message {}}, MSCState1} = + read_from_msg_store(MSCState, IsPersistent, Guid), + Q4a = queue:in(m(MsgStatus #msg_status { msg = Msg }), Q4), + RamIndexCount1 = RamIndexCount - one_if(not IndexOnDisk), + true = RamIndexCount1 >= 0, %% ASSERTION + State1 = State #vqstate { q3 = Q3a, + q4 = Q4a, + ram_msg_count = RamMsgCount + 1, + ram_index_count = RamIndexCount1, + msg_store_clients = MSCState1 }, + State2 = + case {bpqueue:is_empty(Q3a), 0 == DeltaCount} of + {true, true} -> + %% q3 is now empty, it wasn't before; delta is + %% still empty. So q2 must be empty, and q1 + %% can now be joined onto q4 + true = bpqueue:is_empty(Q2), %% ASSERTION + State1 #vqstate { q1 = queue:new(), + q4 = queue:join(Q4a, Q1) }; + {true, false} -> + maybe_deltas_to_betas(State1); + {false, _} -> + %% q3 still isn't empty, we've not touched + %% delta, so the invariants between q1, q2, + %% delta and q3 are maintained + State1 + end, + {loaded, State2} + end. + +maybe_deltas_to_betas(State = #vqstate { delta = ?BLANK_DELTA_PATTERN(X) }) -> + State; +maybe_deltas_to_betas(State = #vqstate { + q2 = Q2, + delta = Delta, + q3 = Q3, + index_state = IndexState, + target_ram_msg_count = TargetRamMsgCount, + transient_threshold = TransientThreshold }) -> + case bpqueue:is_empty(Q3) orelse (TargetRamMsgCount /= 0) of + false -> + State; + true -> + #delta { start_seq_id = DeltaSeqId, + count = DeltaCount, + end_seq_id = DeltaSeqIdEnd } = Delta, + DeltaSeqId1 = + lists:min([rabbit_queue_index:next_segment_boundary(DeltaSeqId), + DeltaSeqIdEnd]), + {List, IndexState1} = + rabbit_queue_index:read(DeltaSeqId, DeltaSeqId1, IndexState), + {Q3a, IndexState2} = betas_from_index_entries( + List, TransientThreshold, IndexState1), + State1 = State #vqstate { index_state = IndexState2 }, + case bpqueue:len(Q3a) of + 0 -> + %% we ignored every message in the segment due to + %% it being transient and below the threshold + maybe_deltas_to_betas( + State #vqstate { + delta = Delta #delta { start_seq_id = DeltaSeqId1 }}); + Q3aLen -> + Q3b = bpqueue:join(Q3, Q3a), + case DeltaCount - Q3aLen of + 0 -> + %% delta is now empty, but it wasn't + %% before, so can now join q2 onto q3 + State1 #vqstate { q2 = bpqueue:new(), + delta = ?BLANK_DELTA, + q3 = bpqueue:join(Q3b, Q2) }; + N when N > 0 -> + Delta1 = #delta { start_seq_id = DeltaSeqId1, + count = N, + end_seq_id = DeltaSeqIdEnd }, + State1 #vqstate { delta = Delta1, + q3 = Q3b } + end + end + end. + +push_alphas_to_betas(Quota, State) -> + { Quota1, State1} = maybe_push_q1_to_betas(Quota, State), + {_Quota2, State2} = maybe_push_q4_to_betas(Quota1, State1), + State2. + +maybe_push_q1_to_betas(Quota, State = #vqstate { q1 = Q1 }) -> + maybe_push_alphas_to_betas( + fun queue:out/1, + fun (MsgStatus = #msg_status { index_on_disk = IndexOnDisk }, + Q1a, State1 = #vqstate { q3 = Q3, delta = #delta { count = 0 } }) -> + State1 #vqstate { q1 = Q1a, + q3 = bpqueue:in(IndexOnDisk, MsgStatus, Q3) }; + (MsgStatus = #msg_status { index_on_disk = IndexOnDisk }, + Q1a, State1 = #vqstate { q2 = Q2 }) -> + State1 #vqstate { q1 = Q1a, + q2 = bpqueue:in(IndexOnDisk, MsgStatus, Q2) } + end, Quota, Q1, State). + +maybe_push_q4_to_betas(Quota, State = #vqstate { q4 = Q4 }) -> + maybe_push_alphas_to_betas( + fun queue:out_r/1, + fun (MsgStatus = #msg_status { index_on_disk = IndexOnDisk }, + Q4a, State1 = #vqstate { q3 = Q3 }) -> + State1 #vqstate { q3 = bpqueue:in_r(IndexOnDisk, MsgStatus, Q3), + q4 = Q4a } + end, Quota, Q4, State). + +maybe_push_alphas_to_betas(_Generator, _Consumer, Quota, _Q, + State = #vqstate { + ram_msg_count = RamMsgCount, + target_ram_msg_count = TargetRamMsgCount }) + when Quota =:= 0 orelse + TargetRamMsgCount =:= infinity orelse TargetRamMsgCount >= RamMsgCount -> + {Quota, State}; +maybe_push_alphas_to_betas(Generator, Consumer, Quota, Q, State) -> + case Generator(Q) of + {empty, _Q} -> + {Quota, State}; + {{value, MsgStatus}, Qa} -> + {MsgStatus1 = #msg_status { msg_on_disk = true, + index_on_disk = IndexOnDisk }, + State1 = #vqstate { ram_msg_count = RamMsgCount, + ram_index_count = RamIndexCount }} = + maybe_write_to_disk(true, false, MsgStatus, State), + MsgStatus2 = m(MsgStatus1 #msg_status { msg = undefined }), + RamIndexCount1 = RamIndexCount + one_if(not IndexOnDisk), + State2 = State1 #vqstate { ram_msg_count = RamMsgCount - 1, + ram_index_count = RamIndexCount1 }, + maybe_push_alphas_to_betas(Generator, Consumer, Quota - 1, Qa, + Consumer(MsgStatus2, Qa, State2)) + end. + +push_betas_to_deltas(State = #vqstate { q2 = Q2, + delta = Delta, + q3 = Q3, + index_state = IndexState, + ram_index_count = RamIndexCount }) -> + {Delta2, Q2a, RamIndexCount2, IndexState2} = + push_betas_to_deltas(fun (Q2MinSeqId) -> Q2MinSeqId end, + fun bpqueue:out/1, Q2, + RamIndexCount, IndexState), + {Delta3, Q3a, RamIndexCount3, IndexState3} = + push_betas_to_deltas(fun rabbit_queue_index:next_segment_boundary/1, + fun bpqueue:out_r/1, Q3, + RamIndexCount2, IndexState2), + Delta4 = combine_deltas(Delta3, combine_deltas(Delta, Delta2)), + State #vqstate { q2 = Q2a, + delta = Delta4, + q3 = Q3a, + index_state = IndexState3, + ram_index_count = RamIndexCount3 }. + +push_betas_to_deltas(LimitFun, Generator, Q, RamIndexCount, IndexState) -> + case bpqueue:out(Q) of + {empty, _Q} -> + {?BLANK_DELTA, Q, RamIndexCount, IndexState}; + {{value, _IndexOnDisk1, #msg_status { seq_id = MinSeqId }}, _Qa} -> + {{value, _IndexOnDisk2, #msg_status { seq_id = MaxSeqId }}, _Qb} = + bpqueue:out_r(Q), + Limit = LimitFun(MinSeqId), + case MaxSeqId < Limit of + true -> {?BLANK_DELTA, Q, RamIndexCount, IndexState}; + false -> {Len, Qc, RamIndexCount1, IndexState1} = + push_betas_to_deltas(Generator, Limit, Q, 0, + RamIndexCount, IndexState), + {#delta { start_seq_id = Limit, + count = Len, + end_seq_id = MaxSeqId + 1 }, + Qc, RamIndexCount1, IndexState1} + end + end. + +push_betas_to_deltas(Generator, Limit, Q, Count, RamIndexCount, IndexState) -> + case Generator(Q) of + {empty, _Q} -> + {Count, Q, RamIndexCount, IndexState}; + {{value, _IndexOnDisk, #msg_status { seq_id = SeqId }}, _Qa} + when SeqId < Limit -> + {Count, Q, RamIndexCount, IndexState}; + {{value, IndexOnDisk, MsgStatus}, Qa} -> + {RamIndexCount1, IndexState1} = + case IndexOnDisk of + true -> {RamIndexCount, IndexState}; + false -> {#msg_status { index_on_disk = true }, + IndexState2} = + maybe_write_index_to_disk(true, MsgStatus, + IndexState), + {RamIndexCount - 1, IndexState2} + end, + push_betas_to_deltas( + Generator, Limit, Qa, Count + 1, RamIndexCount1, IndexState1) + end. diff --git a/src/rabbit_writer.erl b/src/rabbit_writer.erl index 54c60f5b..f90ee734 100644 --- a/src/rabbit_writer.erl +++ b/src/rabbit_writer.erl @@ -33,14 +33,14 @@ -include("rabbit.hrl"). -include("rabbit_framing.hrl"). --export([start/3, start_link/3, shutdown/1, mainloop/1]). +-export([start/4, start_link/4, shutdown/1, mainloop/1]). -export([send_command/2, send_command/3, send_command_and_signal_back/3, send_command_and_signal_back/4, send_command_and_notify/5]). --export([internal_send_command/3, internal_send_command/5]). +-export([internal_send_command/4, internal_send_command/6]). -import(gen_tcp). --record(wstate, {sock, channel, frame_max}). +-record(wstate, {sock, channel, frame_max, protocol}). -define(HIBERNATE_AFTER, 5000). @@ -48,34 +48,53 @@ -ifdef(use_specs). --spec(start/3 :: (socket(), channel_number(), non_neg_integer()) -> pid()). --spec(start_link/3 :: (socket(), channel_number(), non_neg_integer()) -> pid()). --spec(send_command/2 :: (pid(), amqp_method()) -> 'ok'). --spec(send_command/3 :: (pid(), amqp_method(), content()) -> 'ok'). --spec(send_command_and_signal_back/3 :: (pid(), amqp_method(), pid()) -> 'ok'). +-spec(start/4 :: + (rabbit_net:socket(), rabbit_channel:channel_number(), + non_neg_integer(), rabbit_types:protocol()) + -> rabbit_types:ok(pid())). +-spec(start_link/4 :: + (rabbit_net:socket(), rabbit_channel:channel_number(), + non_neg_integer(), rabbit_types:protocol()) + -> rabbit_types:ok(pid())). +-spec(send_command/2 :: + (pid(), rabbit_framing:amqp_method_record()) -> 'ok'). +-spec(send_command/3 :: + (pid(), rabbit_framing:amqp_method_record(), rabbit_types:content()) + -> 'ok'). +-spec(send_command_and_signal_back/3 :: + (pid(), rabbit_framing:amqp_method(), pid()) -> 'ok'). -spec(send_command_and_signal_back/4 :: - (pid(), amqp_method(), content(), pid()) -> 'ok'). + (pid(), rabbit_framing:amqp_method(), rabbit_types:content(), pid()) + -> 'ok'). -spec(send_command_and_notify/5 :: - (pid(), pid(), pid(), amqp_method(), content()) -> 'ok'). --spec(internal_send_command/3 :: - (socket(), channel_number(), amqp_method()) -> 'ok'). --spec(internal_send_command/5 :: - (socket(), channel_number(), amqp_method(), - content(), non_neg_integer()) -> 'ok'). + (pid(), pid(), pid(), rabbit_framing:amqp_method_record(), + rabbit_types:content()) + -> 'ok'). +-spec(internal_send_command/4 :: + (rabbit_net:socket(), rabbit_channel:channel_number(), + rabbit_framing:amqp_method_record(), rabbit_types:protocol()) + -> 'ok'). +-spec(internal_send_command/6 :: + (rabbit_net:socket(), rabbit_channel:channel_number(), + rabbit_framing:amqp_method_record(), rabbit_types:content(), + non_neg_integer(), rabbit_types:protocol()) + -> 'ok'). -endif. %%---------------------------------------------------------------------------- -start(Sock, Channel, FrameMax) -> - spawn(?MODULE, mainloop, [#wstate{sock = Sock, - channel = Channel, - frame_max = FrameMax}]). - -start_link(Sock, Channel, FrameMax) -> - spawn_link(?MODULE, mainloop, [#wstate{sock = Sock, +start(Sock, Channel, FrameMax, Protocol) -> + {ok, spawn(?MODULE, mainloop, [#wstate{sock = Sock, channel = Channel, - frame_max = FrameMax}]). + frame_max = FrameMax, + protocol = Protocol}])}. + +start_link(Sock, Channel, FrameMax, Protocol) -> + {ok, spawn_link(?MODULE, mainloop, [#wstate{sock = Sock, + channel = Channel, + frame_max = FrameMax, + protocol = Protocol}])}. mainloop(State) -> receive @@ -85,35 +104,40 @@ mainloop(State) -> end. handle_message({send_command, MethodRecord}, - State = #wstate{sock = Sock, channel = Channel}) -> - ok = internal_send_command_async(Sock, Channel, MethodRecord), + State = #wstate{sock = Sock, channel = Channel, + protocol = Protocol}) -> + ok = internal_send_command_async(Sock, Channel, MethodRecord, Protocol), State; handle_message({send_command, MethodRecord, Content}, State = #wstate{sock = Sock, channel = Channel, - frame_max = FrameMax}) -> + frame_max = FrameMax, + protocol = Protocol}) -> ok = internal_send_command_async(Sock, Channel, MethodRecord, - Content, FrameMax), + Content, FrameMax, Protocol), State; handle_message({send_command_and_signal_back, MethodRecord, Parent}, - State = #wstate{sock = Sock, channel = Channel}) -> - ok = internal_send_command_async(Sock, Channel, MethodRecord), + State = #wstate{sock = Sock, channel = Channel, + protocol = Protocol}) -> + ok = internal_send_command_async(Sock, Channel, MethodRecord, Protocol), Parent ! rabbit_writer_send_command_signal, State; handle_message({send_command_and_signal_back, MethodRecord, Content, Parent}, State = #wstate{sock = Sock, channel = Channel, - frame_max = FrameMax}) -> + frame_max = FrameMax, + protocol = Protocol}) -> ok = internal_send_command_async(Sock, Channel, MethodRecord, - Content, FrameMax), + Content, FrameMax, Protocol), Parent ! rabbit_writer_send_command_signal, State; handle_message({send_command_and_notify, QPid, ChPid, MethodRecord, Content}, State = #wstate{sock = Sock, channel = Channel, - frame_max = FrameMax}) -> + frame_max = FrameMax, + protocol = Protocol}) -> ok = internal_send_command_async(Sock, Channel, MethodRecord, - Content, FrameMax), + Content, FrameMax, Protocol), rabbit_amqqueue:notify_sent(QPid, ChPid), State; handle_message({inet_reply, _, ok}, State) -> @@ -149,34 +173,37 @@ send_command_and_notify(W, Q, ChPid, MethodRecord, Content) -> shutdown(W) -> W ! shutdown, + rabbit_misc:unlink_and_capture_exit(W), ok. %--------------------------------------------------------------------------- -assemble_frames(Channel, MethodRecord) -> +assemble_frames(Channel, MethodRecord, Protocol) -> ?LOGMESSAGE(out, Channel, MethodRecord, none), - rabbit_binary_generator:build_simple_method_frame(Channel, MethodRecord). + rabbit_binary_generator:build_simple_method_frame(Channel, MethodRecord, + Protocol). -assemble_frames(Channel, MethodRecord, Content, FrameMax) -> +assemble_frames(Channel, MethodRecord, Content, FrameMax, Protocol) -> ?LOGMESSAGE(out, Channel, MethodRecord, Content), MethodName = rabbit_misc:method_record_type(MethodRecord), - true = rabbit_framing:method_has_content(MethodName), % assertion + true = Protocol:method_has_content(MethodName), % assertion MethodFrame = rabbit_binary_generator:build_simple_method_frame( - Channel, MethodRecord), + Channel, MethodRecord, Protocol), ContentFrames = rabbit_binary_generator:build_simple_content_frames( - Channel, Content, FrameMax), + Channel, Content, FrameMax, Protocol), [MethodFrame | ContentFrames]. tcp_send(Sock, Data) -> rabbit_misc:throw_on_error(inet_error, fun () -> rabbit_net:send(Sock, Data) end). -internal_send_command(Sock, Channel, MethodRecord) -> - ok = tcp_send(Sock, assemble_frames(Channel, MethodRecord)). +internal_send_command(Sock, Channel, MethodRecord, Protocol) -> + ok = tcp_send(Sock, assemble_frames(Channel, MethodRecord, Protocol)). -internal_send_command(Sock, Channel, MethodRecord, Content, FrameMax) -> +internal_send_command(Sock, Channel, MethodRecord, Content, FrameMax, + Protocol) -> ok = tcp_send(Sock, assemble_frames(Channel, MethodRecord, - Content, FrameMax)). + Content, FrameMax, Protocol)). %% gen_tcp:send/2 does a selective receive of {inet_reply, Sock, %% Status} to obtain the result. That is bad when it is called from @@ -196,13 +223,14 @@ internal_send_command(Sock, Channel, MethodRecord, Content, FrameMax) -> %% Also note that the port has bounded buffers and port_command blocks %% when these are full. So the fact that we process the result %% asynchronously does not impact flow control. -internal_send_command_async(Sock, Channel, MethodRecord) -> - true = port_cmd(Sock, assemble_frames(Channel, MethodRecord)), +internal_send_command_async(Sock, Channel, MethodRecord, Protocol) -> + true = port_cmd(Sock, assemble_frames(Channel, MethodRecord, Protocol)), ok. -internal_send_command_async(Sock, Channel, MethodRecord, Content, FrameMax) -> +internal_send_command_async(Sock, Channel, MethodRecord, Content, FrameMax, + Protocol) -> true = port_cmd(Sock, assemble_frames(Channel, MethodRecord, - Content, FrameMax)), + Content, FrameMax, Protocol)), ok. port_cmd(Sock, Data) -> diff --git a/src/supervisor2.erl b/src/supervisor2.erl index 55753512..fb4c9b02 100644 --- a/src/supervisor2.erl +++ b/src/supervisor2.erl @@ -4,27 +4,50 @@ %% 1) the module name is supervisor2 %% %% 2) there is a new strategy called -%% simple_one_for_one_terminate. This is exactly the same as for -%% simple_one_for_one, except that children *are* explicitly -%% terminated as per the shutdown component of the child_spec. +%% simple_one_for_one_terminate. This is exactly the same as for +%% simple_one_for_one, except that children *are* explicitly +%% terminated as per the shutdown component of the child_spec. %% -%% All modifications are (C) 2010 LShift Ltd. +%% 3) child specifications can contain, as the restart type, a tuple +%% {permanent, Delay} | {transient, Delay} where Delay >= 0. The +%% delay, in seconds, indicates what should happen if a child, upon +%% being restarted, exceeds the MaxT and MaxR parameters. Thus, if +%% a child exits, it is restarted as normal. If it exits +%% sufficiently quickly and often to exceed the boundaries set by +%% the MaxT and MaxR parameters, and a Delay is specified, then +%% rather than stopping the supervisor, the supervisor instead +%% continues and tries to start up the child again, Delay seconds +%% later. +%% +%% Note that you can never restart more frequently than the MaxT +%% and MaxR parameters allow: i.e. you must wait until *both* the +%% Delay has passed *and* the MaxT and MaxR parameters allow the +%% child to be restarted. +%% +%% Also note that the Delay is a *minimum*. There is no guarantee +%% that the child will be restarted within that time, especially if +%% other processes are dying and being restarted at the same time - +%% essentially we have to wait for the delay to have passed and for +%% the MaxT and MaxR parameters to permit the child to be +%% restarted. This may require waiting for longer than Delay. +%% +%% All modifications are (C) 2010 Rabbit Technologies Ltd. %% %% %CopyrightBegin% -%% +%% %% Copyright Ericsson AB 1996-2009. All Rights Reserved. -%% +%% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in %% compliance with the License. You should have received a copy of the %% Erlang Public License along with this software. If not, it can be %% retrieved online at http://www.erlang.org/. -%% +%% %% Software distributed under the License is distributed on an "AS IS" %% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See %% the License for the specific language governing rights and limitations %% under the License. -%% +%% %% %CopyrightEnd% %% -module(supervisor2). @@ -43,6 +66,7 @@ %% Internal exports -export([init/1, handle_call/3, handle_info/2, terminate/2, code_change/3]). -export([handle_cast/2]). +-export([delayed_restart/2]). -define(DICT, dict). @@ -119,6 +143,9 @@ check_childspecs(ChildSpecs) when is_list(ChildSpecs) -> end; check_childspecs(X) -> {error, {badarg, X}}. +delayed_restart(Supervisor, RestartDetails) -> + gen_server:cast(Supervisor, {delayed_restart, RestartDetails}). + %%% --------------------------------------------------- %%% %%% Initialize the supervisor. @@ -301,13 +328,13 @@ handle_call({terminate_child, Name}, _From, State) -> handle_call(which_children, _From, State) when ?is_simple(State) -> [#child{child_type = CT, modules = Mods}] = State#state.children, - Reply = lists:map(fun({Pid, _}) -> {undefined, Pid, CT, Mods} end, + Reply = lists:map(fun ({Pid, _}) -> {undefined, Pid, CT, Mods} end, ?DICT:to_list(State#state.dynamics)), {reply, Reply, State}; handle_call(which_children, _From, State) -> Resp = - lists:map(fun(#child{pid = Pid, name = Name, + lists:map(fun (#child{pid = Pid, name = Name, child_type = ChildType, modules = Mods}) -> {Name, Pid, ChildType, Mods} end, @@ -315,6 +342,20 @@ handle_call(which_children, _From, State) -> {reply, Resp, State}. +handle_cast({delayed_restart, {RestartType, Reason, Child}}, State) + when ?is_simple(State) -> + {ok, NState} = do_restart(RestartType, Reason, Child, State), + {noreply, NState}; +handle_cast({delayed_restart, {RestartType, Reason, Child}}, State) + when not (?is_simple(State)) -> + case get_child(Child#child.name, State) of + {value, Child} -> + {ok, NState} = do_restart(RestartType, Reason, Child, State), + {noreply, NState}; + _ -> + {noreply, State} + end; + %%% Hopefully cause a function-clause as there is no API function %%% that utilizes cast. handle_cast(null, State) -> @@ -415,7 +456,7 @@ update_childspec1([], Children, KeepOld) -> lists:reverse(Children ++ KeepOld). update_chsp(OldCh, Children) -> - case lists:map(fun(Ch) when OldCh#child.name =:= Ch#child.name -> + case lists:map(fun (Ch) when OldCh#child.name =:= Ch#child.name -> Ch#child{pid = OldCh#child.pid}; (Ch) -> Ch @@ -480,6 +521,16 @@ restart_child(Pid, Reason, State) -> {ok, State} end. +do_restart({RestartType, Delay}, Reason, Child, State) -> + case restart1(Child, State) of + {ok, NState} -> + {ok, NState}; + {terminate, NState} -> + {ok, _TRef} = timer:apply_after( + trunc(Delay*1000), ?MODULE, delayed_restart, + [self(), {{RestartType, Delay}, Reason, Child}]), + {ok, NState} + end; do_restart(permanent, Reason, Child, State) -> report_error(child_terminated, Reason, Child, State#state.name), restart(Child, State); @@ -500,14 +551,27 @@ do_restart(temporary, Reason, Child, State) -> restart(Child, State) -> case add_restart(State) of {ok, NState} -> - restart(NState#state.strategy, Child, NState); + restart(NState#state.strategy, Child, NState, fun restart/2); {terminate, NState} -> report_error(shutdown, reached_max_restart_intensity, Child, State#state.name), {shutdown, remove_child(Child, NState)} end. -restart(Strategy, Child, State) +restart1(Child, State) -> + case add_restart(State) of + {ok, NState} -> + restart(NState#state.strategy, Child, NState, fun restart1/2); + {terminate, _NState} -> + %% we've reached the max restart intensity, but the + %% add_restart will have added to the restarts + %% field. Given we don't want to die here, we need to go + %% back to the old restarts field otherwise we'll never + %% attempt to restart later. + {terminate, State} + end. + +restart(Strategy, Child, State, Restart) when Strategy =:= simple_one_for_one orelse Strategy =:= simple_one_for_one_terminate -> #child{mfa = {M, F, A}} = Child, @@ -521,9 +585,9 @@ restart(Strategy, Child, State) {ok, NState}; {error, Error} -> report_error(start_error, Error, Child, State#state.name), - restart(Child, State) + Restart(Child, State) end; -restart(one_for_one, Child, State) -> +restart(one_for_one, Child, State, Restart) -> case do_start_child(State#state.name, Child) of {ok, Pid} -> NState = replace_child(Child#child{pid = Pid}, State), @@ -533,25 +597,25 @@ restart(one_for_one, Child, State) -> {ok, NState}; {error, Reason} -> report_error(start_error, Reason, Child, State#state.name), - restart(Child, State) + Restart(Child, State) end; -restart(rest_for_one, Child, State) -> +restart(rest_for_one, Child, State, Restart) -> {ChAfter, ChBefore} = split_child(Child#child.pid, State#state.children), ChAfter2 = terminate_children(ChAfter, State#state.name), case start_children(ChAfter2, State#state.name) of {ok, ChAfter3} -> {ok, State#state{children = ChAfter3 ++ ChBefore}}; {error, ChAfter3} -> - restart(Child, State#state{children = ChAfter3 ++ ChBefore}) + Restart(Child, State#state{children = ChAfter3 ++ ChBefore}) end; -restart(one_for_all, Child, State) -> +restart(one_for_all, Child, State, Restart) -> Children1 = del_child(Child#child.pid, State#state.children), Children2 = terminate_children(Children1, State#state.name), case start_children(Children2, State#state.name) of {ok, NChs} -> {ok, State#state{children = NChs}}; {error, NChs} -> - restart(Child, State#state{children = NChs}) + Restart(Child, State#state{children = NChs}) end. %%----------------------------------------------------------------- @@ -769,7 +833,9 @@ supname(N,_) -> N. %%% {Name, Func, RestartType, Shutdown, ChildType, Modules} %%% where Name is an atom %%% Func is {Mod, Fun, Args} == {atom, atom, list} -%%% RestartType is permanent | temporary | transient +%%% RestartType is permanent | temporary | transient | +%%% {permanent, Delay} | +%%% {transient, Delay} where Delay >= 0 %%% Shutdown = integer() | infinity | brutal_kill %%% ChildType = supervisor | worker %%% Modules = [atom()] | dynamic @@ -815,10 +881,17 @@ validFunc({M, F, A}) when is_atom(M), is_list(A) -> true; validFunc(Func) -> throw({invalid_mfa, Func}). -validRestartType(permanent) -> true; -validRestartType(temporary) -> true; -validRestartType(transient) -> true; -validRestartType(RestartType) -> throw({invalid_restart_type, RestartType}). +validRestartType(permanent) -> true; +validRestartType(temporary) -> true; +validRestartType(transient) -> true; +validRestartType({permanent, Delay}) -> validDelay(Delay); +validRestartType({transient, Delay}) -> validDelay(Delay); +validRestartType(RestartType) -> throw({invalid_restart_type, + RestartType}). + +validDelay(Delay) when is_number(Delay), + Delay >= 0 -> true; +validDelay(What) -> throw({invalid_delay, What}). validShutdown(Shutdown, _) when is_integer(Shutdown), Shutdown > 0 -> true; @@ -828,7 +901,7 @@ validShutdown(Shutdown, _) -> throw({invalid_shutdown, Shutdown}). validMods(dynamic) -> true; validMods(Mods) when is_list(Mods) -> - lists:foreach(fun(Mod) -> + lists:foreach(fun (Mod) -> if is_atom(Mod) -> ok; true -> throw({invalid_module, Mod}) diff --git a/src/tcp_acceptor.erl b/src/tcp_acceptor.erl index 3b23daa5..cc4982c9 100644 --- a/src/tcp_acceptor.erl +++ b/src/tcp_acceptor.erl @@ -75,6 +75,13 @@ handle_info({inet_async, LSock, Ref, {ok, Sock}}, error_logger:info_msg("accepted TCP connection on ~s:~p from ~s:~p~n", [inet_parse:ntoa(Address), Port, inet_parse:ntoa(PeerAddress), PeerPort]), + %% In the event that somebody floods us with connections we can spew + %% the above message at error_logger faster than it can keep up. + %% So error_logger's mailbox grows unbounded until we eat all the + %% memory available and crash. So here's a meaningless synchronous call + %% to the underlying gen_event mechanism - when it returns the mailbox + %% is drained. + gen_event:which_handlers(error_logger), %% handle file_handle_cache:release_on_death(apply(M, F, A ++ [Sock])) catch {inet_error, Reason} -> diff --git a/src/test_sup.erl b/src/test_sup.erl new file mode 100644 index 00000000..f41793bc --- /dev/null +++ b/src/test_sup.erl @@ -0,0 +1,94 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developers of the Original Code are LShift Ltd, +%% Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd, +%% Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd +%% are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial +%% Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift +%% Ltd. Portions created by Cohesive Financial Technologies LLC are +%% Copyright (C) 2007-2010 Cohesive Financial Technologies +%% LLC. Portions created by Rabbit Technologies Ltd are Copyright +%% (C) 2007-2010 Rabbit Technologies Ltd. +%% +%% All Rights Reserved. +%% +%% Contributor(s): ______________________________________. +%% + +-module(test_sup). + +-behaviour(supervisor2). + +-export([test_supervisor_delayed_restart/0, + init/1, start_child/0]). + +test_supervisor_delayed_restart() -> + passed = with_sup(simple_one_for_one_terminate, + fun (SupPid) -> + {ok, _ChildPid} = + supervisor2:start_child(SupPid, []), + test_supervisor_delayed_restart(SupPid) + end), + passed = with_sup(one_for_one, fun test_supervisor_delayed_restart/1). + +test_supervisor_delayed_restart(SupPid) -> + ok = ping_child(SupPid), + ok = exit_child(SupPid), + timer:sleep(10), + ok = ping_child(SupPid), + ok = exit_child(SupPid), + timer:sleep(10), + timeout = ping_child(SupPid), + timer:sleep(1010), + ok = ping_child(SupPid), + passed. + +with_sup(RestartStrategy, Fun) -> + {ok, SupPid} = supervisor2:start_link(?MODULE, [RestartStrategy]), + Res = Fun(SupPid), + exit(SupPid, shutdown), + rabbit_misc:unlink_and_capture_exit(SupPid), + Res. + +init([RestartStrategy]) -> + {ok, {{RestartStrategy, 1, 1}, + [{test, {test_sup, start_child, []}, {permanent, 1}, + 16#ffffffff, worker, [test_sup]}]}}. + +start_child() -> + {ok, proc_lib:spawn_link(fun run_child/0)}. + +ping_child(SupPid) -> + Ref = make_ref(), + get_child_pid(SupPid) ! {ping, Ref, self()}, + receive {pong, Ref} -> ok + after 1000 -> timeout + end. + +exit_child(SupPid) -> + true = exit(get_child_pid(SupPid), abnormal), + ok. + +get_child_pid(SupPid) -> + [{_Id, ChildPid, worker, [test_sup]}] = + supervisor2:which_children(SupPid), + ChildPid. + +run_child() -> + receive {ping, Ref, Pid} -> Pid ! {pong, Ref}, + run_child() + end. diff --git a/src/vm_memory_monitor.erl b/src/vm_memory_monitor.erl index cd03fcc6..bbc3a8c0 100644 --- a/src/vm_memory_monitor.erl +++ b/src/vm_memory_monitor.erl @@ -72,8 +72,10 @@ -ifdef(use_specs). --spec(start_link/1 :: (float()) -> - ('ignore' | {'error', any()} | {'ok', pid()})). +-spec(start_link/1 :: + (float()) -> 'ignore' | + rabbit_types:error(any()) | + rabbit_types:ok(pid())). -spec(update/0 :: () -> 'ok'). -spec(get_total_memory/0 :: () -> (non_neg_integer() | 'unknown')). -spec(get_vm_limit/0 :: () -> (non_neg_integer() | 'unknown')). diff --git a/src/worker_pool.erl b/src/worker_pool.erl index 97e07545..01ce3535 100644 --- a/src/worker_pool.erl +++ b/src/worker_pool.erl @@ -52,7 +52,7 @@ -ifdef(use_specs). --spec(start_link/0 :: () -> {'ok', pid()} | 'ignore' | {'error', any()}). +-spec(start_link/0 :: () -> 'ignore' | rabbit_types:ok_or_error2(pid(), any())). -spec(submit/1 :: (fun (() -> A) | {atom(), atom(), [any()]}) -> A). -spec(submit_async/1 :: (fun (() -> any()) | {atom(), atom(), [any()]}) -> 'ok'). diff --git a/src/worker_pool_sup.erl b/src/worker_pool_sup.erl index 4ded63a8..afa21164 100644 --- a/src/worker_pool_sup.erl +++ b/src/worker_pool_sup.erl @@ -41,9 +41,9 @@ -ifdef(use_specs). --spec(start_link/0 :: () -> {'ok', pid()} | 'ignore' | {'error', any()}). --spec(start_link/1 :: - (non_neg_integer()) -> {'ok', pid()} | 'ignore' | {'error', any()}). +-spec(start_link/0 :: () -> 'ignore' | rabbit_types:ok_or_error2(pid(), any())). +-spec(start_link/1 :: (non_neg_integer()) -> + 'ignore' | rabbit_types:ok_or_error2(pid(), any())). -endif. diff --git a/src/worker_pool_worker.erl b/src/worker_pool_worker.erl index 57901fd5..a61e4cc3 100644 --- a/src/worker_pool_worker.erl +++ b/src/worker_pool_worker.erl @@ -44,7 +44,8 @@ -ifdef(use_specs). --spec(start_link/1 :: (any()) -> {'ok', pid()} | 'ignore' | {'error', any()}). +-spec(start_link/1 :: + (any()) -> {'ok', pid()} | 'ignore' | rabbit_types:error(any())). -spec(submit/2 :: (pid(), fun (() -> A) | {atom(), atom(), [any()]}) -> A). -spec(submit_async/2 :: (pid(), fun (() -> any()) | {atom(), atom(), [any()]}) -> 'ok'). |