diff options
79 files changed, 7628 insertions, 1150 deletions
@@ -11,7 +11,7 @@ syntax: regexp ^dist/ ^include/rabbit_framing\.hrl$ ^include/rabbit_framing_spec\.hrl$ -^src/rabbit_framing\.erl$ +^src/rabbit_framing_amqp.*\.erl$ ^src/.*\_usage.erl$ ^rabbit\.plt$ ^basic.plt$ @@ -11,11 +11,11 @@ SOURCE_DIR=src EBIN_DIR=ebin INCLUDE_DIR=include DOCS_DIR=docs -INCLUDES=$(wildcard $(INCLUDE_DIR)/*.hrl) $(INCLUDE_DIR)/rabbit_framing.hrl $(INCLUDE_DIR)/rabbit_framing_spec.hrl -SOURCES=$(wildcard $(SOURCE_DIR)/*.erl) $(SOURCE_DIR)/rabbit_framing.erl $(USAGES_ERL) +INCLUDES=$(wildcard $(INCLUDE_DIR)/*.hrl) $(INCLUDE_DIR)/rabbit_framing.hrl +SOURCES=$(wildcard $(SOURCE_DIR)/*.erl) $(SOURCE_DIR)/rabbit_framing_amqp_0_9_1.erl $(SOURCE_DIR)/rabbit_framing_amqp_0_8.erl $(USAGES_ERL) BEAM_TARGETS=$(patsubst $(SOURCE_DIR)/%.erl, $(EBIN_DIR)/%.beam, $(SOURCES)) -TARGETS=$(EBIN_DIR)/rabbit.app $(INCLUDE_DIR)/rabbit_framing.hrl $(INCLUDE_DIR)/rabbit_framing_spec.hrl $(BEAM_TARGETS) -WEB_URL=http://stage.rabbitmq.com/ +TARGETS=$(EBIN_DIR)/rabbit.app $(INCLUDE_DIR)/rabbit_framing.hrl $(BEAM_TARGETS) +WEB_URL=http://www.rabbitmq.com/ MANPAGES=$(patsubst %.xml, %.gz, $(wildcard $(DOCS_DIR)/*.[0-9].xml)) WEB_MANPAGES=$(patsubst %.xml, %.man.xml, $(wildcard $(DOCS_DIR)/*.[0-9].xml) $(DOCS_DIR)/rabbitmq-service.xml) USAGES_XML=$(DOCS_DIR)/rabbitmqctl.1.xml $(DOCS_DIR)/rabbitmq-multi.1.xml @@ -41,10 +41,10 @@ RABBIT_PLT=rabbit.plt ifndef USE_SPECS # our type specs rely on features and bug fixes in dialyzer that are -# only available in R13B01 upwards (R13B01 is eshell 5.7.2) +# only available in R14A upwards (R13B04 is erts 5.7.5) # # NB: the test assumes that version number will only contain single digits -USE_SPECS=$(shell if [ $$(erl -noshell -eval 'io:format(erlang:system_info(version)), halt().') \> "5.7.1" ]; then echo "true"; else echo "false"; fi) +USE_SPECS=$(shell if [ $$(erl -noshell -eval 'io:format(erlang:system_info(version)), halt().') \> "5.7.5" ]; then echo "true"; else echo "false"; fi) endif #other args: +native +"{hipe,[o3,verbose]}" -Ddebug=true +debug_info +no_strict_record_tests @@ -56,7 +56,8 @@ TARGET_SRC_DIR=dist/$(TARBALL_NAME) SIBLING_CODEGEN_DIR=../rabbitmq-codegen/ AMQP_CODEGEN_DIR=$(shell [ -d $(SIBLING_CODEGEN_DIR) ] && echo $(SIBLING_CODEGEN_DIR) || echo codegen) -AMQP_SPEC_JSON_FILES=$(AMQP_CODEGEN_DIR)/amqp-0.9.1.json +AMQP_SPEC_JSON_FILES_0_9_1=$(AMQP_CODEGEN_DIR)/amqp-rabbitmq-0.9.1.json +AMQP_SPEC_JSON_FILES_0_8=$(AMQP_CODEGEN_DIR)/amqp-rabbitmq-0.8.json ERL_CALL=erl_call -sname $(RABBITMQ_NODENAME) -e @@ -76,6 +77,18 @@ SCRIPTS_REL_PATH=$(shell ./calculate-relative $(TARGET_DIR)/sbin $(SBIN_DIR)) endif endif +# Versions prior to this are not supported +NEED_MAKE := 3.80 +ifneq "$(NEED_MAKE)" "$(firstword $(sort $(NEED_MAKE) $(MAKE_VERSION)))" +$(error Versions of make prior to $(NEED_MAKE) are not supported) +endif + +# .DEFAULT_GOAL introduced in 3.81 +DEFAULT_GOAL_MAKE := 3.81 +ifneq "$(DEFAULT_GOAL_MAKE)" "$(firstword $(sort $(DEFAULT_GOAL_MAKE) $(MAKE_VERSION)))" +.DEFAULT_GOAL=all +endif + all: $(TARGETS) $(DEPS_FILE): $(SOURCES) $(INCLUDES) @@ -87,14 +100,14 @@ $(EBIN_DIR)/rabbit.app: $(EBIN_DIR)/rabbit_app.in $(BEAM_TARGETS) generate_app $(EBIN_DIR)/%.beam: erlc $(ERLC_OPTS) -pa $(EBIN_DIR) $< -$(INCLUDE_DIR)/rabbit_framing.hrl: codegen.py $(AMQP_CODEGEN_DIR)/amqp_codegen.py $(AMQP_SPEC_JSON_FILES) - $(PYTHON) codegen.py header $(AMQP_SPEC_JSON_FILES) $@ +$(INCLUDE_DIR)/rabbit_framing.hrl: codegen.py $(AMQP_CODEGEN_DIR)/amqp_codegen.py $(AMQP_SPEC_JSON_FILES_0_9_1) $(AMQP_SPEC_JSON_FILES_0_8) + $(PYTHON) codegen.py --ignore-conflicts header $(AMQP_SPEC_JSON_FILES_0_9_1) $(AMQP_SPEC_JSON_FILES_0_8) $@ -$(INCLUDE_DIR)/rabbit_framing_spec.hrl: codegen.py $(AMQP_CODEGEN_DIR)/amqp_codegen.py $(AMQP_SPEC_JSON_FILES) - $(PYTHON) codegen.py spec $(AMQP_SPEC_JSON_FILES) $@ +$(SOURCE_DIR)/rabbit_framing_amqp_0_9_1.erl: codegen.py $(AMQP_CODEGEN_DIR)/amqp_codegen.py $(AMQP_SPEC_JSON_FILES_0_9_1) + $(PYTHON) codegen.py body $(AMQP_SPEC_JSON_FILES_0_9_1) $@ -$(SOURCE_DIR)/rabbit_framing.erl: codegen.py $(AMQP_CODEGEN_DIR)/amqp_codegen.py $(AMQP_SPEC_JSON_FILES) - $(PYTHON) codegen.py body $(AMQP_SPEC_JSON_FILES) $@ +$(SOURCE_DIR)/rabbit_framing_amqp_0_8.erl: codegen.py $(AMQP_CODEGEN_DIR)/amqp_codegen.py $(AMQP_SPEC_JSON_FILES_0_8) + $(PYTHON) codegen.py body $(AMQP_SPEC_JSON_FILES_0_8) $@ dialyze: $(BEAM_TARGETS) $(BASIC_PLT) $(ERL_EBIN) -eval \ @@ -119,7 +132,7 @@ $(BASIC_PLT): $(BEAM_TARGETS) clean: rm -f $(EBIN_DIR)/*.beam rm -f $(EBIN_DIR)/rabbit.app $(EBIN_DIR)/rabbit.boot $(EBIN_DIR)/rabbit.script $(EBIN_DIR)/rabbit.rel - rm -f $(INCLUDE_DIR)/rabbit_framing.hrl $(INCLUDE_DIR)/rabbit_framing_spec.hrl $(SOURCE_DIR)/rabbit_framing.erl codegen.pyc + rm -f $(INCLUDE_DIR)/rabbit_framing.hrl $(SOURCE_DIR)/rabbit_framing_amqp_*.erl codegen.pyc rm -f $(DOCS_DIR)/*.[0-9].gz $(DOCS_DIR)/*.man.xml $(DOCS_DIR)/*.erl $(USAGES_ERL) rm -f $(RABBIT_PLT) rm -f $(DEPS_FILE) @@ -193,7 +206,7 @@ srcdist: distclean >> $(TARGET_SRC_DIR)/INSTALL cp README.in $(TARGET_SRC_DIR)/README elinks -dump -no-references -no-numbering $(WEB_URL)build-server.html \ - >> $(TARGET_SRC_DIR)/BUILD + >> $(TARGET_SRC_DIR)/README sed -i.save 's/%%VSN%%/$(VERSION)/' $(TARGET_SRC_DIR)/ebin/rabbit_app.in && rm -f $(TARGET_SRC_DIR)/ebin/rabbit_app.in.save cp -r $(AMQP_CODEGEN_DIR)/* $(TARGET_SRC_DIR)/codegen/ @@ -214,9 +227,10 @@ distclean: clean # xmlto can not read from standard input, so we mess with a tmp file. %.gz: %.xml $(DOCS_DIR)/examples-to-end.xsl - xsltproc $(DOCS_DIR)/examples-to-end.xsl $< > $<.tmp && \ - xmlto man -o $(DOCS_DIR) --stringparam man.indent.verbatims=0 $<.tmp && \ - gzip -f $(DOCS_DIR)/`basename $< .xml` + xmlto --version | grep -E '^xmlto version 0\.0\.([0-9]|1[1-8])$$' >/dev/null || opt='--stringparam man.indent.verbatims=0' ; \ + xsltproc $(DOCS_DIR)/examples-to-end.xsl $< > $<.tmp && \ + xmlto man -o $(DOCS_DIR) $$opt $<.tmp && \ + gzip -f $(DOCS_DIR)/`basename $< .xml` rm -f $<.tmp # Use tmp files rather than a pipeline so that we get meaningful errors @@ -268,7 +282,7 @@ install_dirs: mkdir -p $(SBIN_DIR) mkdir -p $(MAN_DIR) -$(foreach XML, $(USAGES_XML), $(eval $(call usage_dep, $(XML)))) +$(foreach XML,$(USAGES_XML),$(eval $(call usage_dep, $(XML)))) # Note that all targets which depend on clean must have clean in their # name. Also any target that doesn't depend on clean should not have @@ -315,11 +315,16 @@ def genErl(spec): methods = spec.allMethods() printFileHeader() - print """-module(rabbit_framing). --include("rabbit_framing.hrl"). - + module = "rabbit_framing_amqp_%d_%d" % (spec.major, spec.minor) + if spec.revision != 0: + module = "%s_%d" % (module, spec.revision) + if module == "rabbit_framing_amqp_8_0": + module = "rabbit_framing_amqp_0_8" + print "-module(%s)." % module + print """-include("rabbit_framing.hrl"). + +-export([version/0]). -export([lookup_method_name/1]). - -export([method_id/1]). -export([method_has_content/1]). -export([is_method_synchronous/1]). @@ -332,12 +337,70 @@ def genErl(spec): -export([lookup_amqp_exception/1]). -export([amqp_exception/1]). -bitvalue(true) -> 1; -bitvalue(false) -> 0; -bitvalue(undefined) -> 0. +""" + print "%% Various types" + print "-ifdef(use_specs)." + + print """-export_type([amqp_table/0, amqp_property_type/0, amqp_method_record/0, + amqp_method_name/0, amqp_method/0, amqp_class_id/0, + amqp_value/0, amqp_array/0, amqp_exception/0, amqp_property_record/0]). + +-type(amqp_field_type() :: + 'longstr' | 'signedint' | 'decimal' | 'timestamp' | + 'table' | 'byte' | 'double' | 'float' | 'long' | + 'short' | 'bool' | 'binary' | 'void'). +-type(amqp_property_type() :: + 'shortstr' | 'longstr' | 'octet' | 'shortint' | 'longint' | + 'longlongint' | 'timestamp' | 'bit' | 'table'). + +-type(amqp_table() :: [{binary(), amqp_field_type(), amqp_value()}]). +-type(amqp_array() :: [{amqp_field_type(), amqp_value()}]). +-type(amqp_value() :: binary() | % longstr + integer() | % signedint + {non_neg_integer(), non_neg_integer()} | % decimal + amqp_table() | + amqp_array() | + byte() | % byte + float() | % double + integer() | % long + integer() | % short + boolean() | % bool + binary() | % binary + 'undefined' | % void + non_neg_integer() % timestamp + ). +""" + + print prettyType("amqp_method_name()", + [m.erlangName() for m in methods]) + print prettyType("amqp_method()", + ["{%s, %s}" % (m.klass.index, m.index) for m in methods], + 6) + print prettyType("amqp_method_record()", + ["#%s{}" % (m.erlangName()) for m in methods]) + fieldNames = set() + for m in methods: + fieldNames.update(m.arguments) + fieldNames = [erlangize(f.name) for f in fieldNames] + print prettyType("amqp_method_field_name()", + fieldNames) + print prettyType("amqp_property_record()", + ["#'P_%s'{}" % erlangize(c.name) for c in spec.allClasses()]) + print prettyType("amqp_exception()", + ["'%s'" % erlangConstantName(c).lower() for (c, v, cls) in spec.constants]) + print prettyType("amqp_exception_code()", + ["%i" % v for (c, v, cls) in spec.constants]) + classIds = set() + for m in spec.allMethods(): + classIds.add(m.klass.index) + print prettyType("amqp_class_id()", + ["%i" % ci for ci in classIds]) + print "-endif. % use_specs" + print """ %% Method signatures -ifdef(use_specs). +-spec(version/0 :: () -> {non_neg_integer(), non_neg_integer(), non_neg_integer()}). -spec(lookup_method_name/1 :: (amqp_method()) -> amqp_method_name()). -spec(method_id/1 :: (amqp_method_name()) -> amqp_method()). -spec(method_has_content/1 :: (amqp_method_name()) -> boolean()). @@ -351,7 +414,15 @@ bitvalue(undefined) -> 0. -spec(lookup_amqp_exception/1 :: (amqp_exception()) -> {boolean(), amqp_exception_code(), binary()}). -spec(amqp_exception/1 :: (amqp_exception_code()) -> amqp_exception()). -endif. % use_specs + +bitvalue(true) -> 1; +bitvalue(false) -> 0; +bitvalue(undefined) -> 0. """ + version = "{%d, %d, %d}" % (spec.major, spec.minor, spec.revision) + if version == '{8, 0, 0}': version = '{0, 8, 0}' + print "version() -> %s." % (version) + for m in methods: genLookupMethodName(m) print "lookup_method_name({_ClassId, _MethodId} = Id) -> exit({unknown_method_id, Id})." @@ -410,9 +481,6 @@ def genHrl(spec): methods = spec.allMethods() printFileHeader() - print "-define(PROTOCOL_VERSION_MAJOR, %d)." % (spec.major) - print "-define(PROTOCOL_VERSION_MINOR, %d)." % (spec.minor) - print "-define(PROTOCOL_VERSION_REVISION, %d)." % (spec.revision) print "-define(PROTOCOL_PORT, %d)." % (spec.port) for (c,v,cls) in spec.constants: @@ -426,63 +494,6 @@ def genHrl(spec): for c in spec.allClasses(): print "-record('P_%s', {%s})." % (erlangize(c.name), fieldNameList(c.fields)) - print "-ifdef(use_specs)." - print "%% Various types" - print prettyType("amqp_method_name()", - [m.erlangName() for m in methods]) - print prettyType("amqp_method()", - ["{%s, %s}" % (m.klass.index, m.index) for m in methods], - 6) - print prettyType("amqp_method_record()", - ["#%s{}" % (m.erlangName()) for m in methods]) - fieldNames = set() - for m in methods: - fieldNames.update(m.arguments) - fieldNames = [erlangize(f.name) for f in fieldNames] - print prettyType("amqp_method_field_name()", - fieldNames) - print prettyType("amqp_property_record()", - ["#'P_%s'{}" % erlangize(c.name) for c in spec.allClasses()]) - print prettyType("amqp_exception()", - ["'%s'" % erlangConstantName(c).lower() for (c, v, cls) in spec.constants]) - print prettyType("amqp_exception_code()", - ["%i" % v for (c, v, cls) in spec.constants]) - print "-endif. % use_specs" - -def genSpec(spec): - methods = spec.allMethods() - - printFileHeader() - print """% Hard-coded types --type(amqp_field_type() :: - 'longstr' | 'signedint' | 'decimal' | 'timestamp' | - 'table' | 'byte' | 'double' | 'float' | 'long' | - 'short' | 'bool' | 'binary' | 'void'). --type(amqp_property_type() :: - 'shortstr' | 'longstr' | 'octet' | 'shortint' | 'longint' | - 'longlongint' | 'timestamp' | 'bit' | 'table'). -%% we could make this more precise but ultimately are limited by -%% dialyzer's lack of support for recursive types --type(amqp_table() :: [{binary(), amqp_field_type(), any()}]). -%% TODO: make this more precise --type(amqp_properties() :: tuple()). - --type(channel_number() :: non_neg_integer()). --type(resource_name() :: binary()). --type(routing_key() :: binary()). --type(username() :: binary()). --type(password() :: binary()). --type(vhost() :: binary()). --type(ctag() :: binary()). --type(exchange_type() :: atom()). --type(binding_key() :: binary()). -""" - print "% Auto-generated types" - classIds = set() - for m in spec.allMethods(): - classIds.add(m.klass.index) - print prettyType("amqp_class_id()", - ["%i" % ci for ci in classIds]) def generateErl(specPath): genErl(AmqpSpec(specPath)) @@ -490,11 +501,7 @@ def generateErl(specPath): def generateHrl(specPath): genHrl(AmqpSpec(specPath)) -def generateSpec(specPath): - genSpec(AmqpSpec(specPath)) - if __name__ == "__main__": do_main_dict({"header": generateHrl, - "spec": generateSpec, "body": generateErl}) diff --git a/docs/html-to-website-xml.xsl b/docs/html-to-website-xml.xsl index f2117e26..662dbea0 100644 --- a/docs/html-to-website-xml.xsl +++ b/docs/html-to-website-xml.xsl @@ -58,13 +58,13 @@ <!-- Specific instructions to revert the DocBook HTML to be more like our ad-hoc XML schema --> <xsl:template match="div[@class='refsect1'] | div[@class='refnamediv'] | div[@class='refsynopsisdiv']"> - <doc:section name="{@title}"> + <doc:section name="{h2}"> <xsl:apply-templates select="node()"/> </doc:section> </xsl:template> <xsl:template match="div[@class='refsect2']"> - <doc:subsection name="{@title}"> + <doc:subsection name="{h3}"> <xsl:apply-templates select="node()"/> </doc:subsection> </xsl:template> diff --git a/docs/rabbitmqctl.1.xml b/docs/rabbitmqctl.1.xml index a2038cf0..a7d064f1 100644 --- a/docs/rabbitmqctl.1.xml +++ b/docs/rabbitmqctl.1.xml @@ -270,8 +270,8 @@ <title>Cluster management</title> <variablelist> - <varlistentry> - <term><cmdsynopsis><command>cluster</command> <arg choice="req" role="usage-option-list"><replaceable>clusternode</replaceable> ...</arg></cmdsynopsis></term> + <varlistentry id="cluster"> + <term><cmdsynopsis><command>cluster</command><arg choice="req" role="usage-option-list"><replaceable>clusternode</replaceable> ...</arg></cmdsynopsis></term> <listitem> <variablelist> <varlistentry> @@ -281,7 +281,8 @@ </variablelist> <para> Instruct the node to become member of a cluster with the - specified nodes. + specified nodes. To cluster with currently offline nodes, + use <link linkend="force_cluster"><command>force_cluster</command></link>. </para> <para> Cluster nodes can be of two types: disk or ram. Disk nodes @@ -334,6 +335,29 @@ </para> </listitem> </varlistentry> + <varlistentry id="force_cluster"> + <term><cmdsynopsis><command>force_cluster</command><arg choice="req" role="usage-option-list"><replaceable>clusternode</replaceable> ...</arg></cmdsynopsis></term> + <listitem> + <variablelist> + <varlistentry> + <term>clusternode</term> + <listitem><para>Subset of the nodes of the cluster to which this node should be connected.</para></listitem> + </varlistentry> + </variablelist> + <para> + Instruct the node to become member of a cluster with the + specified nodes. This will succeed even if the specified nodes + are offline. For a more detailed description, see + <link linkend="cluster"><command>cluster</command>.</link> + </para> + <para> + Note that this variant of the cluster command just + ignores the current status of the specified nodes. + Clustering may still fail for a variety of other + reasons. + </para> + </listitem> + </varlistentry> </variablelist> </refsect2> @@ -603,10 +627,12 @@ <para role="example-prefix">For example:</para> <screen role="example">rabbitmqctl list_permissions -p /myvhost</screen> <para role="example"> - This command instructs the RabbitMQ broker to list all the - users which have been granted access to the virtual host - called <command>/myvhost</command>, and the permissions they - have for operations on resources in that virtual host. + This command instructs the RabbitMQ broker to list all + the users which have been granted access to the virtual + host called <command>/myvhost</command>, and the + permissions they have for operations on resources in + that virtual host. Note that an empty string means no + permissions granted. </para> </listitem> </varlistentry> @@ -862,6 +888,10 @@ <listitem><para>Number of channels using the connection.</para></listitem> </varlistentry> <varlistentry> + <term>protocol</term> + <listitem><para>Version of the AMQP protocol in use (currently one of <command>{0,9,1}</command> or <command>{0,8,0}</command>). Note that if a client requests an AMQP 0-9 connection, we treat it as AMQP 0-9-1.</para></listitem> + </varlistentry> + <varlistentry> <term>user</term> <listitem><para>Username associated with the connection.</para></listitem> </varlistentry> diff --git a/ebin/rabbit_app.in b/ebin/rabbit_app.in index bdf407eb..2cd28abb 100644 --- a/ebin/rabbit_app.in +++ b/ebin/rabbit_app.in @@ -11,16 +11,19 @@ rabbit_sup, rabbit_tcp_client_sup]}, {applications, [kernel, stdlib, sasl, mnesia, os_mon]}, -%% we also depend on ssl but it shouldn't be in here as we don't -%% actually want to start it +%% we also depend on crypto, public_key and ssl but they shouldn't be +%% in here as we don't actually want to start it {mod, {rabbit, []}}, {env, [{tcp_listeners, [{"0.0.0.0", 5672}]}, {ssl_listeners, []}, {ssl_options, []}, {vm_memory_high_watermark, 0.4}, - {backing_queue_module, rabbit_invariable_queue}, + {msg_store_index_module, rabbit_msg_store_ets_index}, + {backing_queue_module, rabbit_variable_queue}, {persister_max_wrap_entries, 500}, {persister_hibernate_after, 10000}, + {msg_store_file_size_limit, 16777216}, + {queue_index_max_journal_entries, 262144}, {default_user, <<"guest">>}, {default_pass, <<"guest">>}, {default_vhost, <<"/">>}, diff --git a/include/rabbit.hrl b/include/rabbit.hrl index d4327980..6364d60f 100644 --- a/include/rabbit.hrl +++ b/include/rabbit.hrl @@ -36,7 +36,8 @@ -record(vhost, {virtual_host, dummy}). --record(connection, {user, timeout_sec, frame_max, vhost, client_properties}). +-record(connection, {protocol, user, timeout_sec, frame_max, vhost, + client_properties}). -record(content, {class_id, @@ -44,12 +45,13 @@ properties_bin, %% either 'none', or an encoded properties binary %% Note: at most one of properties and properties_bin can be %% 'none' at once. + protocol, %% The protocol under which properties_bin was encoded payload_fragments_rev %% list of binaries, in reverse order (!) }). -record(resource, {virtual_host, kind, name}). --record(exchange, {name, type, durable, arguments}). +-record(exchange, {name, type, durable, auto_delete, arguments}). -record(amqqueue, {name, durable, auto_delete, exclusive_owner = none, arguments, pid}). @@ -68,115 +70,13 @@ -record(ssl_socket, {tcp, ssl}). -record(delivery, {mandatory, immediate, txn, sender, message}). - -record(amqp_error, {name, explanation, method = none}). %%---------------------------------------------------------------------------- --ifdef(use_specs). - --include("rabbit_framing_spec.hrl"). - --type(maybe(T) :: T | 'none'). --type(erlang_node() :: atom()). --type(ssl_socket() :: #ssl_socket{}). --type(socket() :: port() | ssl_socket()). --type(thunk(T) :: fun(() -> T)). --type(info_key() :: atom()). --type(info() :: {info_key(), any()}). --type(regexp() :: binary()). --type(file_path() :: string()). - -%% this is really an abstract type, but dialyzer does not support them --type(guid() :: binary()). --type(txn() :: guid()). --type(pkey() :: guid()). --type(r(Kind) :: - #resource{virtual_host :: vhost(), - kind :: Kind, - name :: resource_name()}). --type(queue_name() :: r('queue')). --type(exchange_name() :: r('exchange')). --type(user() :: - #user{username :: username(), - password :: password()}). --type(permission() :: - #permission{configure :: regexp(), - write :: regexp(), - read :: regexp()}). --type(amqqueue() :: - #amqqueue{name :: queue_name(), - durable :: boolean(), - auto_delete :: boolean(), - exclusive_owner :: maybe(pid()), - arguments :: amqp_table(), - pid :: maybe(pid())}). --type(exchange() :: - #exchange{name :: exchange_name(), - type :: exchange_type(), - durable :: boolean(), - arguments :: amqp_table()}). --type(binding() :: - #binding{exchange_name :: exchange_name(), - queue_name :: queue_name(), - key :: binding_key()}). -%% TODO: make this more precise by tying specific class_ids to -%% specific properties --type(undecoded_content() :: - #content{class_id :: amqp_class_id(), - properties :: 'none', - properties_bin :: binary(), - payload_fragments_rev :: [binary()]} | - #content{class_id :: amqp_class_id(), - properties :: amqp_properties(), - properties_bin :: 'none', - payload_fragments_rev :: [binary()]}). --type(unencoded_content() :: undecoded_content()). --type(decoded_content() :: - #content{class_id :: amqp_class_id(), - properties :: amqp_properties(), - properties_bin :: maybe(binary()), - payload_fragments_rev :: [binary()]}). --type(encoded_content() :: - #content{class_id :: amqp_class_id(), - properties :: maybe(amqp_properties()), - properties_bin :: binary(), - payload_fragments_rev :: [binary()]}). --type(content() :: undecoded_content() | decoded_content()). --type(basic_message() :: - #basic_message{exchange_name :: exchange_name(), - routing_key :: routing_key(), - content :: content(), - guid :: guid(), - is_persistent :: boolean()}). --type(message() :: basic_message()). --type(delivery() :: - #delivery{mandatory :: boolean(), - immediate :: boolean(), - txn :: maybe(txn()), - sender :: pid(), - message :: message()}). -%% this really should be an abstract type --type(msg_id() :: non_neg_integer()). --type(qmsg() :: {queue_name(), pid(), msg_id(), boolean(), message()}). --type(listener() :: - #listener{node :: erlang_node(), - protocol :: atom(), - host :: string() | atom(), - port :: non_neg_integer()}). --type(not_found() :: {'error', 'not_found'}). --type(routing_result() :: 'routed' | 'unroutable' | 'not_delivered'). --type(amqp_error() :: - #amqp_error{name :: atom(), - explanation :: string(), - method :: atom()}). - --endif. - -%%---------------------------------------------------------------------------- - -define(COPYRIGHT_MESSAGE, "Copyright (C) 2007-2010 LShift Ltd., Cohesive Financial Technologies LLC., and Rabbit Technologies Ltd."). -define(INFORMATION_MESSAGE, "Licensed under the MPL. See http://www.rabbitmq.com/"). +-define(PROTOCOL_VERSION, "AMQP 0-9-1 / 0-9 / 0-8"). -define(ERTS_MINIMUM, "5.6.3"). -define(MAX_WAIT, 16#ffffffff). diff --git a/include/rabbit_backing_queue_spec.hrl b/include/rabbit_backing_queue_spec.hrl index 55cd126e..005994f0 100644 --- a/include/rabbit_backing_queue_spec.hrl +++ b/include/rabbit_backing_queue_spec.hrl @@ -31,33 +31,34 @@ -type(fetch_result() :: %% Message, IsDelivered, AckTag, Remaining_Len - ('empty'|{basic_message(), boolean(), ack(), non_neg_integer()})). + ('empty'|{rabbit_types:basic_message(), boolean(), ack(), non_neg_integer()})). -type(is_durable() :: boolean()). -type(attempt_recovery() :: boolean()). -type(purged_msg_count() :: non_neg_integer()). -type(ack_required() :: boolean()). --spec(start/1 :: ([queue_name()]) -> 'ok'). --spec(init/3 :: (queue_name(), is_durable(), attempt_recovery()) -> state()). +-spec(start/1 :: ([rabbit_amqqueue:name()]) -> 'ok'). +-spec(stop/0 :: () -> 'ok'). +-spec(init/3 :: (rabbit_amqqueue:name(), is_durable(), attempt_recovery()) -> state()). -spec(terminate/1 :: (state()) -> state()). -spec(delete_and_terminate/1 :: (state()) -> state()). -spec(purge/1 :: (state()) -> {purged_msg_count(), state()}). --spec(publish/2 :: (basic_message(), state()) -> state()). +-spec(publish/2 :: (rabbit_types:basic_message(), state()) -> state()). -spec(publish_delivered/3 :: - (ack_required(), basic_message(), state()) -> {ack(), state()}). + (ack_required(), rabbit_types:basic_message(), state()) -> {ack(), state()}). -spec(fetch/2 :: (ack_required(), state()) -> {fetch_result(), state()}). -spec(ack/2 :: ([ack()], state()) -> state()). --spec(tx_publish/3 :: (txn(), basic_message(), state()) -> state()). --spec(tx_ack/3 :: (txn(), [ack()], state()) -> state()). --spec(tx_rollback/2 :: (txn(), state()) -> {[ack()], state()}). --spec(tx_commit/3 :: (txn(), fun (() -> any()), state()) -> {[ack()], state()}). +-spec(tx_publish/3 :: (rabbit_types:txn(), rabbit_types:basic_message(), state()) -> state()). +-spec(tx_ack/3 :: (rabbit_types:txn(), [ack()], state()) -> state()). +-spec(tx_rollback/2 :: (rabbit_types:txn(), state()) -> {[ack()], state()}). +-spec(tx_commit/3 :: (rabbit_types:txn(), fun (() -> any()), state()) -> {[ack()], state()}). -spec(requeue/2 :: ([ack()], state()) -> state()). -spec(len/1 :: (state()) -> non_neg_integer()). -spec(is_empty/1 :: (state()) -> boolean()). -spec(set_ram_duration_target/2 :: (('undefined' | 'infinity' | number()), state()) -> state()). -spec(ram_duration/1 :: (state()) -> {number(), state()}). --spec(needs_sync/1 :: (state()) -> boolean()). --spec(sync/1 :: (state()) -> state()). +-spec(needs_idle_timeout/1 :: (state()) -> boolean()). +-spec(idle_timeout/1 :: (state()) -> state()). -spec(handle_pre_hibernate/1 :: (state()) -> state()). -spec(status/1 :: (state()) -> [{atom(), any()}]). diff --git a/include/rabbit_exchange_type_spec.hrl b/include/rabbit_exchange_type_spec.hrl index cb564365..f05bcb84 100644 --- a/include/rabbit_exchange_type_spec.hrl +++ b/include/rabbit_exchange_type_spec.hrl @@ -31,13 +31,19 @@ -ifdef(use_specs). -spec(description/0 :: () -> [{atom(), any()}]). --spec(publish/2 :: (exchange(), delivery()) -> {routing_result(), [pid()]}). --spec(validate/1 :: (exchange()) -> 'ok'). --spec(create/1 :: (exchange()) -> 'ok'). --spec(recover/2 :: (exchange(), list(binding())) -> 'ok'). --spec(delete/2 :: (exchange(), list(binding())) -> 'ok'). --spec(add_binding/2 :: (exchange(), binding()) -> 'ok'). --spec(remove_bindings/2 :: (exchange(), list(binding())) -> 'ok'). --spec(assert_args_equivalence/2 :: (exchange(), amqp_table()) -> 'ok'). +-spec(publish/2 :: (rabbit_types:exchange(), rabbit_types:delivery()) + -> {rabbit_router:routing_result(), [pid()]}). +-spec(validate/1 :: (rabbit_types:exchange()) -> 'ok'). +-spec(create/1 :: (rabbit_types:exchange()) -> 'ok'). +-spec(recover/2 :: (rabbit_types:exchange(), + [rabbit_types:binding()]) -> 'ok'). +-spec(delete/2 :: (rabbit_types:exchange(), + [rabbit_types:binding()]) -> 'ok'). +-spec(add_binding/2 :: (rabbit_types:exchange(), + rabbit_types:binding()) -> 'ok'). +-spec(remove_bindings/2 :: (rabbit_types:exchange(), + [rabbit_types:binding()]) -> 'ok'). +-spec(assert_args_equivalence/2 :: (rabbit_types:exchange(), + rabbit_framing:amqp_table()) -> 'ok'). -endif. diff --git a/include/rabbit_msg_store.hrl b/include/rabbit_msg_store.hrl new file mode 100644 index 00000000..d96fa758 --- /dev/null +++ b/include/rabbit_msg_store.hrl @@ -0,0 +1,41 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developers of the Original Code are LShift Ltd, +%% Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd, +%% Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd +%% are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial +%% Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift +%% Ltd. Portions created by Cohesive Financial Technologies LLC are +%% Copyright (C) 2007-2010 Cohesive Financial Technologies +%% LLC. Portions created by Rabbit Technologies Ltd are Copyright +%% (C) 2007-2010 Rabbit Technologies Ltd. +%% +%% All Rights Reserved. +%% +%% Contributor(s): ______________________________________. +%% + +-include("rabbit.hrl"). + +-ifdef(use_specs). + +-type(msg() :: any()). + +-endif. + +-record(msg_location, + {guid, ref_count, file, offset, total_size}). diff --git a/include/rabbit_msg_store_index.hrl b/include/rabbit_msg_store_index.hrl new file mode 100644 index 00000000..fba0b7cd --- /dev/null +++ b/include/rabbit_msg_store_index.hrl @@ -0,0 +1,59 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developers of the Original Code are LShift Ltd, +%% Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd, +%% Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd +%% are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial +%% Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift +%% Ltd. Portions created by Cohesive Financial Technologies LLC are +%% Copyright (C) 2007-2010 Cohesive Financial Technologies +%% LLC. Portions created by Rabbit Technologies Ltd are Copyright +%% (C) 2007-2010 Rabbit Technologies Ltd. +%% +%% All Rights Reserved. +%% +%% Contributor(s): ______________________________________. +%% + +-include("rabbit_msg_store.hrl"). + +%%---------------------------------------------------------------------------- + +-ifdef(use_specs). + +-type(dir() :: any()). +-type(index_state() :: any()). +-type(keyvalue() :: any()). +-type(fieldpos() :: non_neg_integer()). +-type(fieldvalue() :: any()). + +-spec(new/1 :: (dir()) -> index_state()). +-spec(recover/1 :: (dir()) -> rabbit_types:ok_or_error2(index_state(), any())). +-spec(lookup/2 :: + (rabbit_guid:guid(), index_state()) -> ('not_found' | keyvalue())). +-spec(insert/2 :: (keyvalue(), index_state()) -> 'ok'). +-spec(update/2 :: (keyvalue(), index_state()) -> 'ok'). +-spec(update_fields/3 :: (rabbit_guid:guid(), ({fieldpos(), fieldvalue()} | + [{fieldpos(), fieldvalue()}]), + index_state()) -> 'ok'). +-spec(delete/2 :: (rabbit_guid:guid(), index_state()) -> 'ok'). +-spec(delete_by_file/2 :: (fieldvalue(), index_state()) -> 'ok'). +-spec(terminate/1 :: (index_state()) -> any()). + +-endif. + +%%---------------------------------------------------------------------------- diff --git a/packaging/RPMS/Fedora/rabbitmq-server.spec b/packaging/RPMS/Fedora/rabbitmq-server.spec index 00066a15..86675e1e 100644 --- a/packaging/RPMS/Fedora/rabbitmq-server.spec +++ b/packaging/RPMS/Fedora/rabbitmq-server.spec @@ -128,6 +128,12 @@ done rm -rf %{buildroot} %changelog +* Wed Jul 14 2010 Emile Joubert <emile@rabbitmq.com> 1.8.1-1 +- New Upstream Release + +* Tue Jun 15 2010 Matthew Sackman <matthew@rabbitmq.com> 1.8.0-1 +- New Upstream Release + * Mon Feb 15 2010 Matthew Sackman <matthew@lshift.net> 1.7.2-1 - New Upstream Release diff --git a/packaging/common/rabbitmq-server.ocf b/packaging/common/rabbitmq-server.ocf index db0ed70b..b969535a 100755 --- a/packaging/common/rabbitmq-server.ocf +++ b/packaging/common/rabbitmq-server.ocf @@ -40,7 +40,6 @@ ## OCF_RESKEY_nodename ## OCF_RESKEY_ip ## OCF_RESKEY_port -## OCF_RESKEY_cluster_config_file ## OCF_RESKEY_config_file ## OCF_RESKEY_log_base ## OCF_RESKEY_mnesia_base @@ -117,14 +116,6 @@ The IP Port for rabbitmq-server to listen on <content type="integer" default="" /> </parameter> -<parameter name="cluster_config_file" unique="0" required="0"> -<longdesc lang="en"> -Location of the cluster config file -</longdesc> -<shortdesc lang="en">Cluster config file path</shortdesc> -<content type="string" default="" /> -</parameter> - <parameter name="config_file" unique="0" required="0"> <longdesc lang="en"> Location of the config file @@ -184,7 +175,6 @@ RABBITMQ_CTL=$OCF_RESKEY_ctl RABBITMQ_NODENAME=$OCF_RESKEY_nodename RABBITMQ_NODE_IP_ADDRESS=$OCF_RESKEY_ip RABBITMQ_NODE_PORT=$OCF_RESKEY_port -RABBITMQ_CLUSTER_CONFIG_FILE=$OCF_RESKEY_cluster_config_file RABBITMQ_CONFIG_FILE=$OCF_RESKEY_config_file RABBITMQ_LOG_BASE=$OCF_RESKEY_log_base RABBITMQ_MNESIA_BASE=$OCF_RESKEY_mnesia_base @@ -195,7 +185,6 @@ RABBITMQ_SERVER_START_ARGS=$OCF_RESKEY_server_start_args export_vars() { [ ! -z $RABBITMQ_NODE_IP_ADDRESS ] && export RABBITMQ_NODE_IP_ADDRESS [ ! -z $RABBITMQ_NODE_PORT ] && export RABBITMQ_NODE_PORT - [ ! -z $RABBITMQ_CLUSTER_CONFIG_FILE ] && export RABBITMQ_CLUSTER_CONFIG_FILE [ ! -z $RABBITMQ_CONFIG_FILE ] && export RABBITMQ_CONFIG_FILE [ ! -z $RABBITMQ_LOG_BASE ] && export RABBITMQ_LOG_BASE [ ! -z $RABBITMQ_MNESIA_BASE ] && export RABBITMQ_MNESIA_BASE @@ -215,11 +204,6 @@ rabbit_validate_partial() { } rabbit_validate_full() { - if [ ! -z $RABBITMQ_CLUSTER_CONFIG_FILE ] && [ ! -e $RABBITMQ_CLUSTER_CONFIG_FILE ]; then - ocf_log err "rabbitmq-server cluster_config_file $RABBITMQ_CLUSTER_CONFIG_FILE does not exist or is not a file"; - exit $OCF_ERR_INSTALLED; - fi - if [ ! -z $RABBITMQ_CONFIG_FILE ] && [ ! -e $RABBITMQ_CONFIG_FILE ]; then ocf_log err "rabbitmq-server config_file $RABBITMQ_CONFIG_FILE does not exist or is not a file"; exit $OCF_ERR_INSTALLED; diff --git a/packaging/debs/Debian/debian/changelog b/packaging/debs/Debian/debian/changelog index 63b50749..0dccf938 100644 --- a/packaging/debs/Debian/debian/changelog +++ b/packaging/debs/Debian/debian/changelog @@ -1,3 +1,15 @@ +rabbitmq-server (1.8.1-1) lucid; urgency=low + + * New Upstream Release + + -- Emile Joubert <emile@rabbitmq.com> Wed, 14 Jul 2010 15:05:24 +0100 + +rabbitmq-server (1.8.0-1) intrepid; urgency=low + + * New Upstream Release + + -- Matthew Sackman <matthew@rabbitmq.com> Tue, 15 Jun 2010 12:48:48 +0100 + rabbitmq-server (1.7.2-1) intrepid; urgency=low * New Upstream Release diff --git a/packaging/macports/Makefile b/packaging/macports/Makefile index 4ad4c30b..3a22eef0 100644 --- a/packaging/macports/Makefile +++ b/packaging/macports/Makefile @@ -31,11 +31,18 @@ $(DEST)/Portfile: Portfile.in -f checksums.sed <$^ >$@ rm checksums.sed +# The purpose of the intricate substitution below is to set up similar +# environment vars to the ones that su will on Linux. On OS X, we +# have to use the -m option to su in order to be able to set the shell +# (which for the rabbitmq user would otherwise be /dev/null). But the +# -m option means that *all* environment vars get preserved. Erlang +# needs vars such as HOME to be set. So we have to set them +# explicitly. macports: dirs $(DEST)/Portfile for f in rabbitmq-asroot-script-wrapper rabbitmq-script-wrapper ; do \ cp $(COMMON_DIR)/$$f $(DEST)/files ; \ done - sed -i -e 's|@SU_RABBITMQ_SH_C@|SHELL=/bin/sh su -m rabbitmq -c|' \ + sed -i -e 's|@SU_RABBITMQ_SH_C@|SHELL=/bin/sh HOME=/var/lib/rabbitmq USER=rabbitmq LOGNAME=rabbitmq PATH="$$(eval `PATH=MACPORTS_PREFIX/bin /usr/libexec/path_helper -s`; echo $$PATH)" su -m rabbitmq -c|' \ $(DEST)/files/rabbitmq-script-wrapper cp patch-org.macports.rabbitmq-server.plist.diff $(DEST)/files if [ -n "$(MACPORTS_USERHOST)" ] ; then \ @@ -52,4 +59,4 @@ macports: dirs $(DEST)/Portfile fi clean: - rm -rf $(DEST) checksums.sed + rm -rf $(MACPORTS_DIR) checksums.sed diff --git a/packaging/macports/Portfile.in b/packaging/macports/Portfile.in index 153727be..082d712f 100644 --- a/packaging/macports/Portfile.in +++ b/packaging/macports/Portfile.in @@ -4,9 +4,8 @@ PortSystem 1.0 name rabbitmq-server version @VERSION@ -revision 1 categories net -maintainers rabbitmq.com:tonyg +maintainers paperplanes.de:meyer rabbitmq.com:tonyg openmaintainer platforms darwin description The RabbitMQ AMQP Server long_description \ @@ -23,8 +22,8 @@ checksums \ sha1 @sha1@ \ rmd160 @rmd160@ -depends_build port:erlang port:xmlto port:libxslt -depends_run port:erlang +depends_lib port:erlang +depends_build port:xmlto port:libxslt platform darwin 7 { depends_build-append port:py25-simplejson @@ -76,28 +75,20 @@ post-destroot { reinplace -E "s:(/etc/rabbitmq/rabbitmq.conf):${prefix}\\1:g" \ ${realsbin}/rabbitmq-env - reinplace -E "s:(CLUSTER_CONFIG_FILE)=/:\\1=${prefix}/:" \ - ${realsbin}/rabbitmq-multi \ - ${realsbin}/rabbitmq-server \ - ${realsbin}/rabbitmqctl - reinplace -E "s:(LOG_BASE)=/:\\1=${prefix}/:" \ - ${realsbin}/rabbitmq-multi \ - ${realsbin}/rabbitmq-server \ - ${realsbin}/rabbitmqctl - reinplace -E "s:(MNESIA_BASE)=/:\\1=${prefix}/:" \ - ${realsbin}/rabbitmq-multi \ - ${realsbin}/rabbitmq-server \ - ${realsbin}/rabbitmqctl - reinplace -E "s:(PIDS_FILE)=/:\\1=${prefix}/:" \ - ${realsbin}/rabbitmq-multi \ - ${realsbin}/rabbitmq-server \ - ${realsbin}/rabbitmqctl + foreach var {CONFIG_FILE CLUSTER_CONFIG_FILE LOG_BASE MNESIA_BASE PIDS_FILE} { + reinplace -E "s:^($var)=/:\\1=${prefix}/:" \ + ${realsbin}/rabbitmq-multi \ + ${realsbin}/rabbitmq-server \ + ${realsbin}/rabbitmqctl + } xinstall -m 555 ${filespath}/rabbitmq-script-wrapper \ ${wrappersbin}/rabbitmq-multi xinstall -m 555 ${filespath}/rabbitmq-asroot-script-wrapper \ ${wrappersbin}/rabbitmq-activate-plugins + reinplace -E "s:MACPORTS_PREFIX/bin:${prefix}/bin:" \ + ${wrappersbin}/rabbitmq-multi reinplace -E "s:/usr/lib/rabbitmq/bin/:${prefix}/lib/rabbitmq/bin/:" \ ${wrappersbin}/rabbitmq-multi reinplace -E "s:/var/lib/rabbitmq:${prefix}/var/lib/rabbitmq:" \ diff --git a/packaging/macports/make-port-diff.sh b/packaging/macports/make-port-diff.sh new file mode 100755 index 00000000..3eb1b9f5 --- /dev/null +++ b/packaging/macports/make-port-diff.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +# This script grabs the latest rabbitmq-server bits from the main +# macports subversion repo, and from the rabbitmq.com macports repo, +# and produces a diff from the former to the latter for submission +# through the macports trac. + +set -e + +dir=/tmp/$(basename $0).$$ +mkdir -p $dir/macports $dir/rabbitmq + +# Get the files from the macports subversion repo +cd $dir/macports +svn checkout http://svn.macports.org/repository/macports/trunk/dports/net/rabbitmq-server/ 2>&1 >/dev/null + +# Clear out the svn $id tag +sed -i -e 's|^# \$.*$|# $Id$|' rabbitmq-server/Portfile + +# Get the files from the rabbitmq.com macports repo +cd ../rabbitmq +curl -s http://www.rabbitmq.com/releases/macports/net/rabbitmq-server.tgz | tar xzf - + +cd .. +diff -Naur --exclude=.svn macports rabbitmq +cd / +rm -rf $dir diff --git a/scripts/rabbitmq-server b/scripts/rabbitmq-server index 2261b56e..7283f0e8 100755 --- a/scripts/rabbitmq-server +++ b/scripts/rabbitmq-server @@ -79,12 +79,6 @@ fi [ -f "${RABBITMQ_LOGS}" ] && cat "${RABBITMQ_LOGS}" >> "${RABBITMQ_LOGS}${RABBITMQ_BACKUP_EXTENSION}" [ -f "${RABBITMQ_SASL_LOGS}" ] && cat "${RABBITMQ_SASL_LOGS}" >> "${RABBITMQ_SASL_LOGS}${RABBITMQ_BACKUP_EXTENSION}" -if [ -f "$RABBITMQ_CLUSTER_CONFIG_FILE" ]; then - RABBITMQ_CLUSTER_CONFIG_OPTION="-rabbit cluster_config \"$RABBITMQ_CLUSTER_CONFIG_FILE\"" -else - RABBITMQ_CLUSTER_CONFIG_OPTION="" -fi - RABBITMQ_START_RABBIT= [ "x" = "x$RABBITMQ_ALLOW_INPUT" ] && RABBITMQ_START_RABBIT='-noinput' @@ -124,6 +118,5 @@ exec erl \ -os_mon start_disksup false \ -os_mon start_memsup false \ -mnesia dir "\"${RABBITMQ_MNESIA_DIR}\"" \ - ${RABBITMQ_CLUSTER_CONFIG_OPTION} \ ${RABBITMQ_SERVER_START_ARGS} \ "$@" diff --git a/scripts/rabbitmq-server.bat b/scripts/rabbitmq-server.bat index a290f935..cebd7d1d 100644 --- a/scripts/rabbitmq-server.bat +++ b/scripts/rabbitmq-server.bat @@ -103,14 +103,6 @@ if exist "!SASL_LOGS!" ( rem End of log management
-if "!RABBITMQ_CLUSTER_CONFIG_FILE!"=="" (
- set RABBITMQ_CLUSTER_CONFIG_FILE=!RABBITMQ_BASE!\rabbitmq_cluster.config
-)
-set CLUSTER_CONFIG=
-if not exist "!RABBITMQ_CLUSTER_CONFIG_FILE!" GOTO L1
-set CLUSTER_CONFIG=-rabbit cluster_config \""!RABBITMQ_CLUSTER_CONFIG_FILE:\=/!"\"
-:L1
-
if "!RABBITMQ_MNESIA_DIR!"=="" (
set RABBITMQ_MNESIA_DIR=!RABBITMQ_MNESIA_BASE!/!RABBITMQ_NODENAME!-mnesia
)
diff --git a/scripts/rabbitmq-service.bat b/scripts/rabbitmq-service.bat index bd117b83..96248f6a 100644 --- a/scripts/rabbitmq-service.bat +++ b/scripts/rabbitmq-service.bat @@ -136,14 +136,6 @@ if exist "!SASL_LOGS!" ( rem End of log management
-if "!RABBITMQ_CLUSTER_CONFIG_FILE!"=="" (
- set RABBITMQ_CLUSTER_CONFIG_FILE=!RABBITMQ_BASE!\rabbitmq_cluster.config
-)
-set CLUSTER_CONFIG=
-if not exist "!RABBITMQ_CLUSTER_CONFIG_FILE!" GOTO L1
-set CLUSTER_CONFIG=-rabbit cluster_config \""!RABBITMQ_CLUSTER_CONFIG_FILE:\=/!"\"
-:L1
-
if "!RABBITMQ_MNESIA_DIR!"=="" (
set RABBITMQ_MNESIA_DIR=!RABBITMQ_MNESIA_BASE!/!RABBITMQ_NODENAME!-mnesia
)
diff --git a/src/bpqueue.erl b/src/bpqueue.erl new file mode 100644 index 00000000..49874aa6 --- /dev/null +++ b/src/bpqueue.erl @@ -0,0 +1,286 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developers of the Original Code are LShift Ltd, +%% Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd, +%% Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd +%% are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial +%% Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift +%% Ltd. Portions created by Cohesive Financial Technologies LLC are +%% Copyright (C) 2007-2010 Cohesive Financial Technologies +%% LLC. Portions created by Rabbit Technologies Ltd are Copyright +%% (C) 2007-2010 Rabbit Technologies Ltd. +%% +%% All Rights Reserved. +%% +%% Contributor(s): ______________________________________. +%% + +-module(bpqueue). + +%% Block-prefixed queue. From the perspective of the queue interface +%% the datastructure acts like a regular queue where each value is +%% paired with the prefix. +%% +%% This is implemented as a queue of queues, which is more space and +%% time efficient, whilst supporting the normal queue interface. Each +%% inner queue has a prefix, which does not need to be unique, and it +%% is guaranteed that no two consecutive blocks have the same +%% prefix. len/1 returns the flattened length of the queue and is +%% O(1). + +-export([new/0, is_empty/1, len/1, in/3, in_r/3, out/1, out_r/1, join/2, + foldl/3, foldr/3, from_list/1, to_list/1, map_fold_filter_l/4, + map_fold_filter_r/4]). + +%%---------------------------------------------------------------------------- + +-ifdef(use_specs). + +-export_type([bpqueue/0]). + +-type(bpqueue() :: {non_neg_integer(), queue()}). +-type(prefix() :: any()). +-type(value() :: any()). +-type(result() :: ({'empty', bpqueue()} | + {{'value', prefix(), value()}, bpqueue()})). + +-spec(new/0 :: () -> bpqueue()). +-spec(is_empty/1 :: (bpqueue()) -> boolean()). +-spec(len/1 :: (bpqueue()) -> non_neg_integer()). +-spec(in/3 :: (prefix(), value(), bpqueue()) -> bpqueue()). +-spec(in_r/3 :: (prefix(), value(), bpqueue()) -> bpqueue()). +-spec(out/1 :: (bpqueue()) -> result()). +-spec(out_r/1 :: (bpqueue()) -> result()). +-spec(join/2 :: (bpqueue(), bpqueue()) -> bpqueue()). +-spec(foldl/3 :: (fun ((prefix(), value(), B) -> B), B, bpqueue()) -> B). +-spec(foldr/3 :: (fun ((prefix(), value(), B) -> B), B, bpqueue()) -> B). +-spec(from_list/1 :: ([{prefix(), [value()]}]) -> bpqueue()). +-spec(to_list/1 :: (bpqueue()) -> [{prefix(), [value()]}]). +-spec(map_fold_filter_l/4 :: ((fun ((prefix()) -> boolean())), + (fun ((value(), B) -> + ({prefix(), value(), B} | 'stop'))), + B, + bpqueue()) -> + {bpqueue(), B}). +-spec(map_fold_filter_r/4 :: ((fun ((prefix()) -> boolean())), + (fun ((value(), B) -> + ({prefix(), value(), B} | 'stop'))), + B, + bpqueue()) -> + {bpqueue(), B}). + +-endif. + +%%---------------------------------------------------------------------------- + +new() -> {0, queue:new()}. + +is_empty({0, _Q}) -> true; +is_empty(_BPQ) -> false. + +len({N, _Q}) -> N. + +in(Prefix, Value, {0, Q}) -> + {1, queue:in({Prefix, queue:from_list([Value])}, Q)}; +in(Prefix, Value, BPQ) -> + in1({fun queue:in/2, fun queue:out_r/1}, Prefix, Value, BPQ). + +in_r(Prefix, Value, BPQ = {0, _Q}) -> + in(Prefix, Value, BPQ); +in_r(Prefix, Value, BPQ) -> + in1({fun queue:in_r/2, fun queue:out/1}, Prefix, Value, BPQ). + +in1({In, Out}, Prefix, Value, {N, Q}) -> + {N+1, case Out(Q) of + {{value, {Prefix, InnerQ}}, Q1} -> + In({Prefix, In(Value, InnerQ)}, Q1); + {{value, {_Prefix, _InnerQ}}, _Q1} -> + In({Prefix, queue:in(Value, queue:new())}, Q) + end}. + +in_q(Prefix, Queue, BPQ = {0, Q}) -> + case queue:len(Queue) of + 0 -> BPQ; + N -> {N, queue:in({Prefix, Queue}, Q)} + end; +in_q(Prefix, Queue, BPQ) -> + in_q1({fun queue:in/2, fun queue:out_r/1, + fun queue:join/2}, + Prefix, Queue, BPQ). + +in_q_r(Prefix, Queue, BPQ = {0, _Q}) -> + in_q(Prefix, Queue, BPQ); +in_q_r(Prefix, Queue, BPQ) -> + in_q1({fun queue:in_r/2, fun queue:out/1, + fun (T, H) -> queue:join(H, T) end}, + Prefix, Queue, BPQ). + +in_q1({In, Out, Join}, Prefix, Queue, BPQ = {N, Q}) -> + case queue:len(Queue) of + 0 -> BPQ; + M -> {N + M, case Out(Q) of + {{value, {Prefix, InnerQ}}, Q1} -> + In({Prefix, Join(InnerQ, Queue)}, Q1); + {{value, {_Prefix, _InnerQ}}, _Q1} -> + In({Prefix, Queue}, Q) + end} + end. + +out({0, _Q} = BPQ) -> {empty, BPQ}; +out(BPQ) -> out1({fun queue:in_r/2, fun queue:out/1}, BPQ). + +out_r({0, _Q} = BPQ) -> {empty, BPQ}; +out_r(BPQ) -> out1({fun queue:in/2, fun queue:out_r/1}, BPQ). + +out1({In, Out}, {N, Q}) -> + {{value, {Prefix, InnerQ}}, Q1} = Out(Q), + {{value, Value}, InnerQ1} = Out(InnerQ), + Q2 = case queue:is_empty(InnerQ1) of + true -> Q1; + false -> In({Prefix, InnerQ1}, Q1) + end, + {{value, Prefix, Value}, {N-1, Q2}}. + +join({0, _Q}, BPQ) -> + BPQ; +join(BPQ, {0, _Q}) -> + BPQ; +join({NHead, QHead}, {NTail, QTail}) -> + {{value, {Prefix, InnerQHead}}, QHead1} = queue:out_r(QHead), + {NHead + NTail, + case queue:out(QTail) of + {{value, {Prefix, InnerQTail}}, QTail1} -> + queue:join( + queue:in({Prefix, queue:join(InnerQHead, InnerQTail)}, QHead1), + QTail1); + {{value, {_Prefix, _InnerQTail}}, _QTail1} -> + queue:join(QHead, QTail) + end}. + +foldl(_Fun, Init, {0, _Q}) -> Init; +foldl( Fun, Init, {_N, Q}) -> fold1(fun queue:out/1, Fun, Init, Q). + +foldr(_Fun, Init, {0, _Q}) -> Init; +foldr( Fun, Init, {_N, Q}) -> fold1(fun queue:out_r/1, Fun, Init, Q). + +fold1(Out, Fun, Init, Q) -> + case Out(Q) of + {empty, _Q} -> + Init; + {{value, {Prefix, InnerQ}}, Q1} -> + fold1(Out, Fun, fold1(Out, Fun, Prefix, Init, InnerQ), Q1) + end. + +fold1(Out, Fun, Prefix, Init, InnerQ) -> + case Out(InnerQ) of + {empty, _Q} -> + Init; + {{value, Value}, InnerQ1} -> + fold1(Out, Fun, Prefix, Fun(Prefix, Value, Init), InnerQ1) + end. + +from_list(List) -> + {FinalPrefix, FinalInnerQ, ListOfPQs1, Len} = + lists:foldl( + fun ({_Prefix, []}, Acc) -> + Acc; + ({Prefix, InnerList}, {Prefix, InnerQ, ListOfPQs, LenAcc}) -> + {Prefix, queue:join(InnerQ, queue:from_list(InnerList)), + ListOfPQs, LenAcc + length(InnerList)}; + ({Prefix1, InnerList}, {Prefix, InnerQ, ListOfPQs, LenAcc}) -> + {Prefix1, queue:from_list(InnerList), + [{Prefix, InnerQ} | ListOfPQs], LenAcc + length(InnerList)} + end, {undefined, queue:new(), [], 0}, List), + ListOfPQs2 = [{FinalPrefix, FinalInnerQ} | ListOfPQs1], + [{undefined, InnerQ1} | Rest] = All = lists:reverse(ListOfPQs2), + {Len, queue:from_list(case queue:is_empty(InnerQ1) of + true -> Rest; + false -> All + end)}. + +to_list({0, _Q}) -> []; +to_list({_N, Q}) -> [{Prefix, queue:to_list(InnerQ)} || + {Prefix, InnerQ} <- queue:to_list(Q)]. + +%% map_fold_filter_[lr](FilterFun, Fun, Init, BPQ) -> {BPQ, Init} +%% where FilterFun(Prefix) -> boolean() +%% Fun(Value, Init) -> {Prefix, Value, Init} | stop +%% +%% The filter fun allows you to skip very quickly over blocks that +%% you're not interested in. Such blocks appear in the resulting bpq +%% without modification. The Fun is then used both to map the value, +%% which also allows you to change the prefix (and thus block) of the +%% value, and also to modify the Init/Acc (just like a fold). If the +%% Fun returns 'stop' then it is not applied to any further items. +map_fold_filter_l(_PFilter, _Fun, Init, BPQ = {0, _Q}) -> + {BPQ, Init}; +map_fold_filter_l(PFilter, Fun, Init, {N, Q}) -> + map_fold_filter1({fun queue:out/1, fun queue:in/2, + fun in_q/3, fun join/2}, + N, PFilter, Fun, Init, Q, new()). + +map_fold_filter_r(_PFilter, _Fun, Init, BPQ = {0, _Q}) -> + {BPQ, Init}; +map_fold_filter_r(PFilter, Fun, Init, {N, Q}) -> + map_fold_filter1({fun queue:out_r/1, fun queue:in_r/2, + fun in_q_r/3, fun (T, H) -> join(H, T) end}, + N, PFilter, Fun, Init, Q, new()). + +map_fold_filter1(Funs = {Out, _In, InQ, Join}, Len, PFilter, Fun, + Init, Q, QNew) -> + case Out(Q) of + {empty, _Q} -> + {QNew, Init}; + {{value, {Prefix, InnerQ}}, Q1} -> + case PFilter(Prefix) of + true -> + {Init1, QNew1, Cont} = + map_fold_filter2(Funs, Fun, Prefix, Prefix, + Init, InnerQ, QNew, queue:new()), + case Cont of + false -> {Join(QNew1, {Len - len(QNew1), Q1}), Init1}; + true -> map_fold_filter1(Funs, Len, PFilter, Fun, + Init1, Q1, QNew1) + end; + false -> + map_fold_filter1(Funs, Len, PFilter, Fun, + Init, Q1, InQ(Prefix, InnerQ, QNew)) + end + end. + +map_fold_filter2(Funs = {Out, In, InQ, _Join}, Fun, OrigPrefix, Prefix, + Init, InnerQ, QNew, InnerQNew) -> + case Out(InnerQ) of + {empty, _Q} -> + {Init, InQ(OrigPrefix, InnerQ, + InQ(Prefix, InnerQNew, QNew)), true}; + {{value, Value}, InnerQ1} -> + case Fun(Value, Init) of + stop -> + {Init, InQ(OrigPrefix, InnerQ, + InQ(Prefix, InnerQNew, QNew)), false}; + {Prefix1, Value1, Init1} -> + {Prefix2, QNew1, InnerQNew1} = + case Prefix1 =:= Prefix of + true -> {Prefix, QNew, In(Value1, InnerQNew)}; + false -> {Prefix1, InQ(Prefix, InnerQNew, QNew), + In(Value1, queue:new())} + end, + map_fold_filter2(Funs, Fun, OrigPrefix, Prefix2, + Init1, InnerQ1, QNew1, InnerQNew1) + end + end. diff --git a/src/delegate.erl b/src/delegate.erl index 8af28127..3f57953b 100644 --- a/src/delegate.erl +++ b/src/delegate.erl @@ -44,8 +44,9 @@ -ifdef(use_specs). --spec(start_link/1 :: (non_neg_integer()) -> {'ok', pid()}). --spec(invoke_no_result/2 :: (pid() | [pid()], fun ((pid()) -> any())) -> 'ok'). +-spec(start_link/1 :: (non_neg_integer()) -> rabbit_types:ok(pid())). +-spec(invoke_no_result/2 :: + (pid() | [pid()], fun ((pid()) -> any())) -> 'ok'). -spec(invoke/2 :: (pid() | [pid()], fun ((pid()) -> A)) -> A). -spec(process_count/0 :: () -> non_neg_integer()). diff --git a/src/delegate_sup.erl b/src/delegate_sup.erl index 1c1d62a9..39ef3f85 100644 --- a/src/delegate_sup.erl +++ b/src/delegate_sup.erl @@ -43,7 +43,7 @@ -ifdef(use_specs). --spec(start_link/0 :: () -> {'ok', pid()} | 'ignore' | {'error', any()}). +-spec(start_link/0 :: () -> rabbit_types:ok_or_error2(pid(), any()) | 'ignore'). -endif. diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl index 0f648dcd..e209ee6b 100644 --- a/src/file_handle_cache.erl +++ b/src/file_handle_cache.erl @@ -182,18 +182,18 @@ -ifdef(use_specs). -type(ref() :: any()). --type(error() :: {'error', any()}). --type(ok_or_error() :: ('ok' | error())). --type(val_or_error(T) :: ({'ok', T} | error())). +-type(ok_or_error() :: rabbit_types:ok_or_error(any())). +-type(val_or_error(T) :: rabbit_types:ok_or_error2(T, any())). -type(position() :: ('bof' | 'eof' | non_neg_integer() | - {('bof' |'eof'), non_neg_integer()} | {'cur', integer()})). + {('bof' |'eof'), non_neg_integer()} | + {'cur', integer()})). -type(offset() :: non_neg_integer()). -spec(register_callback/3 :: (atom(), atom(), [any()]) -> 'ok'). -spec(open/3 :: - (string(), [any()], - [{'write_buffer', (non_neg_integer() | 'infinity' | 'unbuffered')}]) -> - val_or_error(ref())). + (string(), [any()], + [{'write_buffer', (non_neg_integer() | 'infinity' | 'unbuffered')}]) + -> val_or_error(ref())). -spec(close/1 :: (ref()) -> ok_or_error()). -spec(read/2 :: (ref(), non_neg_integer()) -> val_or_error([char()] | binary()) | 'eof'). diff --git a/src/gatherer.erl b/src/gatherer.erl new file mode 100644 index 00000000..31dda16e --- /dev/null +++ b/src/gatherer.erl @@ -0,0 +1,145 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developers of the Original Code are LShift Ltd, +%% Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd, +%% Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd +%% are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial +%% Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift +%% Ltd. Portions created by Cohesive Financial Technologies LLC are +%% Copyright (C) 2007-2010 Cohesive Financial Technologies +%% LLC. Portions created by Rabbit Technologies Ltd are Copyright +%% (C) 2007-2010 Rabbit Technologies Ltd. +%% +%% All Rights Reserved. +%% +%% Contributor(s): ______________________________________. +%% + +-module(gatherer). + +-behaviour(gen_server2). + +-export([start_link/0, stop/1, fork/1, finish/1, in/2, out/1]). + +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). + +%%---------------------------------------------------------------------------- + +-ifdef(use_specs). + +-spec(start_link/0 :: () -> {'ok', pid()} | 'ignore' | {'error', any()}). +-spec(stop/1 :: (pid()) -> 'ok'). +-spec(fork/1 :: (pid()) -> 'ok'). +-spec(finish/1 :: (pid()) -> 'ok'). +-spec(in/2 :: (pid(), any()) -> 'ok'). +-spec(out/1 :: (pid()) -> {'value', any()} | 'empty'). + +-endif. + +%%---------------------------------------------------------------------------- + +-define(HIBERNATE_AFTER_MIN, 1000). +-define(DESIRED_HIBERNATE, 10000). + +%%---------------------------------------------------------------------------- + +-record(gstate, { forks, values, blocked }). + +%%---------------------------------------------------------------------------- + +start_link() -> + gen_server2:start_link(?MODULE, [], [{timeout, infinity}]). + +stop(Pid) -> + gen_server2:call(Pid, stop, infinity). + +fork(Pid) -> + gen_server2:call(Pid, fork, infinity). + +finish(Pid) -> + gen_server2:cast(Pid, finish). + +in(Pid, Value) -> + gen_server2:cast(Pid, {in, Value}). + +out(Pid) -> + gen_server2:call(Pid, out, infinity). + +%%---------------------------------------------------------------------------- + +init([]) -> + {ok, #gstate { forks = 0, values = queue:new(), blocked = queue:new() }, + hibernate, + {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. + +handle_call(stop, _From, State) -> + {stop, normal, ok, State}; + +handle_call(fork, _From, State = #gstate { forks = Forks }) -> + {reply, ok, State #gstate { forks = Forks + 1 }, hibernate}; + +handle_call(out, From, State = #gstate { forks = Forks, + values = Values, + blocked = Blocked }) -> + case queue:out(Values) of + {empty, _} -> + case Forks of + 0 -> {reply, empty, State, hibernate}; + _ -> {noreply, + State #gstate { blocked = queue:in(From, Blocked) }, + hibernate} + end; + {{value, _Value} = V, NewValues} -> + {reply, V, State #gstate { values = NewValues }, hibernate} + end; + +handle_call(Msg, _From, State) -> + {stop, {unexpected_call, Msg}, State}. + +handle_cast(finish, State = #gstate { forks = Forks, blocked = Blocked }) -> + NewForks = Forks - 1, + NewBlocked = case NewForks of + 0 -> [gen_server2:reply(From, empty) || + From <- queue:to_list(Blocked)], + queue:new(); + _ -> Blocked + end, + {noreply, State #gstate { forks = NewForks, blocked = NewBlocked }, + hibernate}; + +handle_cast({in, Value}, State = #gstate { values = Values, + blocked = Blocked }) -> + {noreply, case queue:out(Blocked) of + {empty, _} -> + State #gstate { values = queue:in(Value, Values) }; + {{value, From}, NewBlocked} -> + gen_server2:reply(From, {value, Value}), + State #gstate { blocked = NewBlocked } + end, hibernate}; + +handle_cast(Msg, State) -> + {stop, {unexpected_cast, Msg}, State}. + +handle_info(Msg, State) -> + {stop, {unexpected_info, Msg}, State}. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +terminate(_Reason, State) -> + State. diff --git a/src/gen_server2.erl b/src/gen_server2.erl index 547f0a42..49ae63c1 100644 --- a/src/gen_server2.erl +++ b/src/gen_server2.erl @@ -186,7 +186,7 @@ -ifdef(use_specs). -spec(handle_common_termination/6 :: - (any(), any(), any(), atom(), any(), any()) -> no_return()). + (any(), any(), any(), atom(), any(), any()) -> no_return()). -spec(hibernate/7 :: (pid(), any(), any(), atom(), any(), queue(), any()) -> no_return()). diff --git a/src/pg_local.erl b/src/pg_local.erl index 1501331d..f5ded123 100644 --- a/src/pg_local.erl +++ b/src/pg_local.erl @@ -36,8 +36,8 @@ -export([join/2, leave/2, get_members/1]). -export([sync/0]). %% intended for testing only; not part of official API --export([start/0,start_link/0,init/1,handle_call/3,handle_cast/2,handle_info/2, - terminate/2]). +-export([start/0, start_link/0, init/1, handle_call/3, handle_cast/2, + handle_info/2, terminate/2]). %%---------------------------------------------------------------------------- @@ -45,8 +45,8 @@ -type(name() :: term()). --spec(start_link/0 :: () -> {'ok', pid()} | {'error', term()}). --spec(start/0 :: () -> {'ok', pid()} | {'error', term()}). +-spec(start_link/0 :: () -> rabbit_types:ok_or_error2(pid(), term())). +-spec(start/0 :: () -> rabbit_types:ok_or_error2(pid(), term())). -spec(join/2 :: (name(), pid()) -> 'ok'). -spec(leave/2 :: (name(), pid()) -> 'ok'). -spec(get_members/1 :: (name()) -> [pid()]). diff --git a/src/rabbit.erl b/src/rabbit.erl index 09a19014..ada2c38e 100644 --- a/src/rabbit.erl +++ b/src/rabbit.erl @@ -33,7 +33,8 @@ -behaviour(application). --export([prepare/0, start/0, stop/0, stop_and_halt/0, status/0, rotate_logs/1]). +-export([prepare/0, start/0, stop/0, stop_and_halt/0, status/0, + rotate_logs/1]). -export([start/2, stop/1]). @@ -183,18 +184,19 @@ -ifdef(use_specs). --type(log_location() :: 'tty' | 'undefined' | string()). -type(file_suffix() :: binary()). +%% this really should be an abstract type +-type(log_location() :: 'tty' | 'undefined' | file:filename()). -spec(prepare/0 :: () -> 'ok'). -spec(start/0 :: () -> 'ok'). -spec(stop/0 :: () -> 'ok'). -spec(stop_and_halt/0 :: () -> 'ok'). --spec(rotate_logs/1 :: (file_suffix()) -> 'ok' | {'error', any()}). --spec(status/0 :: () -> - [{running_applications, [{atom(), string(), string()}]} | - {nodes, [erlang_node()]} | - {running_nodes, [erlang_node()]}]). +-spec(rotate_logs/1 :: (file_suffix()) -> rabbit_types:ok_or_error(any())). +-spec(status/0 :: + () -> [{running_applications, [{atom(), string(), string()}]} | + {nodes, [{rabbit_mnesia:node_type(), [node()]}]} | + {running_nodes, [node()]}]). -spec(log_location/1 :: ('sasl' | 'kernel') -> log_location()). -endif. @@ -424,10 +426,9 @@ print_banner() -> "| ~s +---+ |~n" "| |~n" "+-------------------+~n" - "AMQP ~p-~p-~p~n~s~n~s~n~n", + "~s~n~s~n~s~n~n", [Product, string:right([$v|Version], ProductLen), - ?PROTOCOL_VERSION_MAJOR, ?PROTOCOL_VERSION_MINOR, - ?PROTOCOL_VERSION_REVISION, + ?PROTOCOL_VERSION, ?COPYRIGHT_MESSAGE, ?INFORMATION_MESSAGE]), Settings = [{"node", node()}, {"app descriptor", app_location()}, diff --git a/src/rabbit_access_control.erl b/src/rabbit_access_control.erl index 23b84afb..3aaf5928 100644 --- a/src/rabbit_access_control.erl +++ b/src/rabbit_access_control.erl @@ -45,28 +45,38 @@ -ifdef(use_specs). +-export_type([username/0, password/0]). + -type(permission_atom() :: 'configure' | 'read' | 'write'). +-type(username() :: binary()). +-type(password() :: binary()). +-type(regexp() :: binary()). --spec(check_login/2 :: (binary(), binary()) -> user()). --spec(user_pass_login/2 :: (username(), password()) -> user()). --spec(check_vhost_access/2 :: (user(), vhost()) -> 'ok'). +-spec(check_login/2 :: (binary(), binary()) -> rabbit_types:user()). +-spec(user_pass_login/2 :: (username(), password()) -> rabbit_types:user()). +-spec(check_vhost_access/2 :: + (rabbit_types:user(), rabbit_types:vhost()) -> 'ok'). -spec(check_resource_access/3 :: - (username(), r(atom()), permission_atom()) -> 'ok'). + (username(), rabbit_types:r(atom()), permission_atom()) -> 'ok'). -spec(add_user/2 :: (username(), password()) -> 'ok'). -spec(delete_user/1 :: (username()) -> 'ok'). -spec(change_password/2 :: (username(), password()) -> 'ok'). -spec(list_users/0 :: () -> [username()]). --spec(lookup_user/1 :: (username()) -> {'ok', user()} | not_found()). --spec(add_vhost/1 :: (vhost()) -> 'ok'). --spec(delete_vhost/1 :: (vhost()) -> 'ok'). --spec(list_vhosts/0 :: () -> [vhost()]). --spec(set_permissions/5 :: - (username(), vhost(), regexp(), regexp(), regexp()) -> 'ok'). --spec(clear_permissions/2 :: (username(), vhost()) -> 'ok'). +-spec(lookup_user/1 :: + (username()) -> rabbit_types:ok(rabbit_types:user()) + | rabbit_types:error('not_found')). +-spec(add_vhost/1 :: (rabbit_types:vhost()) -> 'ok'). +-spec(delete_vhost/1 :: (rabbit_types:vhost()) -> 'ok'). +-spec(list_vhosts/0 :: () -> [rabbit_types:vhost()]). +-spec(set_permissions/5 ::(username(), rabbit_types:vhost(), regexp(), + regexp(), regexp()) -> 'ok'). +-spec(clear_permissions/2 :: (username(), rabbit_types:vhost()) -> 'ok'). -spec(list_vhost_permissions/1 :: - (vhost()) -> [{username(), regexp(), regexp(), regexp()}]). + (rabbit_types:vhost()) + -> [{username(), regexp(), regexp(), regexp()}]). -spec(list_user_permissions/1 :: - (username()) -> [{vhost(), regexp(), regexp(), regexp()}]). + (username()) + -> [{rabbit_types:vhost(), regexp(), regexp(), regexp()}]). -endif. @@ -162,11 +172,14 @@ check_resource_access(Username, [] -> false; [#user_permission{permission = P}] -> - case regexp:match( - binary_to_list(Name), - binary_to_list(element(permission_index(Permission), P))) of - {match, _, _} -> true; - nomatch -> false + PermRegexp = case element(permission_index(Permission), P) of + %% <<"^$">> breaks Emacs' erlang mode + <<"">> -> <<$^, $$>>; + RE -> RE + end, + case re:run(Name, PermRegexp, [{capture, none}]) of + match -> true; + nomatch -> false end end, if Res -> ok; @@ -240,7 +253,7 @@ add_vhost(VHostPath) -> write), [rabbit_exchange:declare( rabbit_misc:r(VHostPath, exchange, Name), - Type, true, []) || + Type, true, false, []) || {Name,Type} <- [{<<"">>, direct}, {<<"amq.direct">>, direct}, @@ -291,7 +304,7 @@ list_vhosts() -> validate_regexp(RegexpBin) -> Regexp = binary_to_list(RegexpBin), - case regexp:parse(Regexp) of + case re:compile(Regexp) of {ok, _} -> ok; {error, Reason} -> throw({error, {invalid_regexp, Regexp, Reason}}) end. diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl index 3c9c41bd..6bf2f6db 100644 --- a/src/rabbit_amqqueue.erl +++ b/src/rabbit_amqqueue.erl @@ -31,14 +31,15 @@ -module(rabbit_amqqueue). --export([start/0, declare/5, delete/3, purge/1]). +-export([start/0, stop/0, declare/5, delete/3, purge/1]). -export([internal_declare/2, internal_delete/1, maybe_run_queue_via_backing_queue/2, update_ram_duration/1, set_ram_duration_target/2, - set_maximum_since_use/2]). + set_maximum_since_use/2, maybe_expire/1]). -export([pseudo_queue/2]). --export([lookup/1, with/2, with_or_die/2, - stat/1, stat_all/0, deliver/2, requeue/3, ack/4]). +-export([lookup/1, with/2, with_or_die/2, assert_equivalence/5, + check_exclusive_access/2, with_exclusive_access_or_die/3, + stat/1, deliver/2, requeue/3, ack/4]). -export([list/1, info_keys/0, info/1, info/2, info_all/1, info_all/2]). -export([consumers/1, consumers_all/1]). -export([basic_get/3, basic_consume/7, basic_cancel/4]). @@ -54,66 +55,102 @@ -include("rabbit.hrl"). -include_lib("stdlib/include/qlc.hrl"). +-define(EXPIRES_TYPE, long). + %%---------------------------------------------------------------------------- -ifdef(use_specs). --type(qstats() :: {'ok', queue_name(), non_neg_integer(), non_neg_integer()}). --type(qlen() :: {'ok', non_neg_integer()}). --type(qfun(A) :: fun ((amqqueue()) -> A)). +-export_type([name/0, qmsg/0]). + +-type(name() :: rabbit_types:r('queue')). + +-type(qlen() :: rabbit_types:ok(non_neg_integer())). +-type(qfun(A) :: fun ((rabbit_types:amqqueue()) -> A)). +-type(qmsg() :: {name(), pid(), msg_id(), boolean(), rabbit_types:message()}). +-type(msg_id() :: non_neg_integer()). -type(ok_or_errors() :: 'ok' | {'error', [{'error' | 'exit' | 'throw', any()}]}). -spec(start/0 :: () -> 'ok'). --spec(declare/5 :: (queue_name(), boolean(), boolean(), amqp_table(), - maybe(pid())) -> amqqueue()). --spec(lookup/1 :: (queue_name()) -> {'ok', amqqueue()} | not_found()). --spec(with/2 :: (queue_name(), qfun(A)) -> A | not_found()). --spec(with_or_die/2 :: (queue_name(), qfun(A)) -> A). --spec(list/1 :: (vhost()) -> [amqqueue()]). --spec(info_keys/0 :: () -> [info_key()]). --spec(info/1 :: (amqqueue()) -> [info()]). --spec(info/2 :: (amqqueue(), [info_key()]) -> [info()]). --spec(info_all/1 :: (vhost()) -> [[info()]]). --spec(info_all/2 :: (vhost(), [info_key()]) -> [[info()]]). --spec(consumers/1 :: (amqqueue()) -> [{pid(), ctag(), boolean()}]). +-spec(stop/0 :: () -> 'ok'). +-spec(declare/5 :: + (name(), boolean(), boolean(), + rabbit_framing:amqp_table(), rabbit_types:maybe(pid())) + -> {'new' | 'existing', rabbit_types:amqqueue()}). +-spec(lookup/1 :: + (name()) -> rabbit_types:ok(rabbit_types:amqqueue()) | + rabbit_types:error('not_found')). +-spec(with/2 :: (name(), qfun(A)) -> A | rabbit_types:error('not_found')). +-spec(with_or_die/2 :: (name(), qfun(A)) -> A). +-spec(assert_equivalence/5 :: + (rabbit_types:amqqueue(), boolean(), boolean(), + rabbit_framing:amqp_table(), rabbit_types:maybe(pid())) + -> 'ok' | no_return()). +-spec(check_exclusive_access/2 :: (rabbit_types:amqqueue(), pid()) -> 'ok'). +-spec(with_exclusive_access_or_die/3 :: (name(), pid(), qfun(A)) -> A). +-spec(list/1 :: (rabbit_types:vhost()) -> [rabbit_types:amqqueue()]). +-spec(info_keys/0 :: () -> [rabbit_types:info_key()]). +-spec(info/1 :: (rabbit_types:amqqueue()) -> [rabbit_types:info()]). +-spec(info/2 :: + (rabbit_types:amqqueue(), [rabbit_types:info_key()]) + -> [rabbit_types:info()]). +-spec(info_all/1 :: (rabbit_types:vhost()) -> [[rabbit_types:info()]]). +-spec(info_all/2 :: (rabbit_types:vhost(), [rabbit_types:info_key()]) + -> [[rabbit_types:info()]]). +-spec(consumers/1 :: + (rabbit_types:amqqueue()) + -> [{pid(), rabbit_types:ctag(), boolean()}]). -spec(consumers_all/1 :: - (vhost()) -> [{queue_name(), pid(), ctag(), boolean()}]). --spec(stat/1 :: (amqqueue()) -> qstats()). --spec(stat_all/0 :: () -> [qstats()]). + (rabbit_types:vhost()) + -> [{name(), pid(), rabbit_types:ctag(), boolean()}]). +-spec(stat/1 :: + (rabbit_types:amqqueue()) + -> {'ok', non_neg_integer(), non_neg_integer()}). -spec(delete/3 :: - (amqqueue(), 'false', 'false') -> qlen(); - (amqqueue(), 'true' , 'false') -> qlen() | {'error', 'in_use'}; - (amqqueue(), 'false', 'true' ) -> qlen() | {'error', 'not_empty'}; - (amqqueue(), 'true' , 'true' ) -> qlen() | - {'error', 'in_use'} | - {'error', 'not_empty'}). --spec(purge/1 :: (amqqueue()) -> qlen()). --spec(deliver/2 :: (pid(), delivery()) -> boolean()). + (rabbit_types:amqqueue(), 'false', 'false') + -> qlen(); + (rabbit_types:amqqueue(), 'true' , 'false') + -> qlen() | rabbit_types:error('in_use'); + (rabbit_types:amqqueue(), 'false', 'true' ) + -> qlen() | rabbit_types:error('not_empty'); + (rabbit_types:amqqueue(), 'true' , 'true' ) + -> qlen() | + rabbit_types:error('in_use') | + rabbit_types:error('not_empty')). +-spec(purge/1 :: (rabbit_types:amqqueue()) -> qlen()). +-spec(deliver/2 :: (pid(), rabbit_types:delivery()) -> boolean()). -spec(requeue/3 :: (pid(), [msg_id()], pid()) -> 'ok'). --spec(ack/4 :: (pid(), maybe(txn()), [msg_id()], pid()) -> 'ok'). --spec(commit_all/3 :: ([pid()], txn(), pid()) -> ok_or_errors()). --spec(rollback_all/3 :: ([pid()], txn(), pid()) -> 'ok'). +-spec(ack/4 :: + (pid(), rabbit_types:maybe(rabbit_types:txn()), [msg_id()], pid()) + -> 'ok'). +-spec(commit_all/3 :: ([pid()], rabbit_types:txn(), pid()) -> ok_or_errors()). +-spec(rollback_all/3 :: ([pid()], rabbit_types:txn(), pid()) -> 'ok'). -spec(notify_down_all/2 :: ([pid()], pid()) -> ok_or_errors()). -spec(limit_all/3 :: ([pid()], pid(), pid() | 'undefined') -> ok_or_errors()). --spec(basic_get/3 :: (amqqueue(), pid(), boolean()) -> +-spec(basic_get/3 :: (rabbit_types:amqqueue(), pid(), boolean()) -> {'ok', non_neg_integer(), qmsg()} | 'empty'). -spec(basic_consume/7 :: - (amqqueue(), boolean(), pid(), pid() | 'undefined', ctag(), - boolean(), any()) -> - 'ok' | {'error', 'exclusive_consume_unavailable'}). --spec(basic_cancel/4 :: (amqqueue(), pid(), ctag(), any()) -> 'ok'). + (rabbit_types:amqqueue(), boolean(), pid(), pid() | 'undefined', + rabbit_types:ctag(), boolean(), any()) + -> rabbit_types:ok_or_error('exclusive_consume_unavailable')). +-spec(basic_cancel/4 :: + (rabbit_types:amqqueue(), pid(), rabbit_types:ctag(), any()) -> 'ok'). -spec(notify_sent/2 :: (pid(), pid()) -> 'ok'). -spec(unblock/2 :: (pid(), pid()) -> 'ok'). -spec(flush_all/2 :: ([pid()], pid()) -> 'ok'). --spec(internal_declare/2 :: (amqqueue(), boolean()) -> amqqueue() | 'not_found'). --spec(internal_delete/1 :: (queue_name()) -> 'ok' | not_found()). --spec(maybe_run_queue_via_backing_queue/2 :: (pid(), (fun ((A) -> A))) -> 'ok'). +-spec(internal_declare/2 :: + (rabbit_types:amqqueue(), boolean()) + -> rabbit_types:amqqueue() | 'not_found'). +-spec(internal_delete/1 :: (name()) -> rabbit_types:ok_or_error('not_found')). +-spec(maybe_run_queue_via_backing_queue/2 :: + (pid(), (fun ((A) -> A))) -> 'ok'). -spec(update_ram_duration/1 :: (pid()) -> 'ok'). --spec(set_ram_duration_target/2 :: (pid(), number()) -> 'ok'). +-spec(set_ram_duration_target/2 :: (pid(), number() | 'infinity') -> 'ok'). -spec(set_maximum_since_use/2 :: (pid(), non_neg_integer()) -> 'ok'). --spec(on_node_down/1 :: (erlang_node()) -> 'ok'). --spec(pseudo_queue/2 :: (binary(), pid()) -> amqqueue()). +-spec(maybe_expire/1 :: (pid()) -> 'ok'). +-spec(on_node_down/1 :: (node()) -> 'ok'). +-spec(pseudo_queue/2 :: (binary(), pid()) -> rabbit_types:amqqueue()). -endif. @@ -121,7 +158,7 @@ start() -> DurableQueues = find_durable_queues(), - {ok, BQ} = application:get_env(backing_queue_module), + {ok, BQ} = application:get_env(rabbit, backing_queue_module), ok = BQ:start([QName || #amqqueue{name = QName} <- DurableQueues]), {ok,_} = supervisor:start_child( rabbit_sup, @@ -131,6 +168,12 @@ start() -> _RealDurableQueues = recover_durable_queues(DurableQueues), ok. +stop() -> + ok = supervisor:terminate_child(rabbit_sup, rabbit_amqqueue_sup), + ok = supervisor:delete_child(rabbit_sup, rabbit_amqqueue_sup), + {ok, BQ} = application:get_env(rabbit, backing_queue_module), + ok = BQ:stop(). + find_durable_queues() -> Node = node(), %% TODO: use dirty ops instead @@ -146,6 +189,7 @@ recover_durable_queues(DurableQueues) -> [Q || Q <- Qs, gen_server2:call(Q#amqqueue.pid, {init, true}) == Q]. declare(QueueName, Durable, AutoDelete, Args, Owner) -> + ok = check_declare_arguments(QueueName, Args), Q = start_queue_process(#amqqueue{name = QueueName, durable = Durable, auto_delete = AutoDelete, @@ -213,6 +257,59 @@ with(Name, F) -> with_or_die(Name, F) -> with(Name, F, fun () -> rabbit_misc:not_found(Name) end). +assert_equivalence(#amqqueue{durable = Durable, + auto_delete = AutoDelete} = Q, + Durable, AutoDelete, RequiredArgs, Owner) -> + assert_args_equivalence(Q, RequiredArgs), + check_exclusive_access(Q, Owner, strict); +assert_equivalence(#amqqueue{name = QueueName}, + _Durable, _AutoDelete, _RequiredArgs, _Owner) -> + rabbit_misc:protocol_error( + not_allowed, "parameters for ~s not equivalent", + [rabbit_misc:rs(QueueName)]). + +check_exclusive_access(Q, Owner) -> check_exclusive_access(Q, Owner, lax). + +check_exclusive_access(#amqqueue{exclusive_owner = Owner}, Owner, _MatchType) -> + ok; +check_exclusive_access(#amqqueue{exclusive_owner = none}, _ReaderPid, lax) -> + ok; +check_exclusive_access(#amqqueue{name = QueueName}, _ReaderPid, _MatchType) -> + rabbit_misc:protocol_error( + resource_locked, + "cannot obtain exclusive access to locked ~s", + [rabbit_misc:rs(QueueName)]). + +with_exclusive_access_or_die(Name, ReaderPid, F) -> + with_or_die(Name, + fun (Q) -> check_exclusive_access(Q, ReaderPid), F(Q) end). + +assert_args_equivalence(#amqqueue{name = QueueName, arguments = Args}, + RequiredArgs) -> + rabbit_misc:assert_args_equivalence(Args, RequiredArgs, QueueName, + [<<"x-expires">>]). + +check_declare_arguments(QueueName, Args) -> + [case Fun(rabbit_misc:table_lookup(Args, Key)) of + ok -> ok; + {error, Error} -> rabbit_misc:protocol_error( + precondition_failed, + "Invalid arguments in declaration of queue ~s: " + "~w (on argument: ~w)", + [rabbit_misc:rs(QueueName), Error, Key]) + end || {Key, Fun} <- [{<<"x-expires">>, fun check_expires_argument/1}]], + ok. + +check_expires_argument(undefined) -> + ok; +check_expires_argument({?EXPIRES_TYPE, Expires}) + when is_integer(Expires) andalso Expires > 0 -> + ok; +check_expires_argument({?EXPIRES_TYPE, _Expires}) -> + {error, expires_zero_or_less}; +check_expires_argument(_) -> + {error, expires_not_of_type_long}. + list(VHostPath) -> mnesia:dirty_match_object( rabbit_queue, @@ -247,9 +344,6 @@ consumers_all(VHostPath) -> stat(#amqqueue{pid = QPid}) -> delegate_call(QPid, stat, infinity). -stat_all() -> - lists:map(fun stat/1, rabbit_misc:dirty_read_all(rabbit_queue)). - delete(#amqqueue{ pid = QPid }, IfUnused, IfEmpty) -> delegate_call(QPid, {delete, IfUnused, IfEmpty}, infinity). @@ -356,6 +450,9 @@ set_ram_duration_target(QPid, Duration) -> set_maximum_since_use(QPid, Age) -> gen_server2:pcast(QPid, 8, {set_maximum_since_use, Age}). +maybe_expire(QPid) -> + gen_server2:pcast(QPid, 8, maybe_expire). + on_node_down(Node) -> [Hook() || Hook <- rabbit_misc:execute_mnesia_transaction( @@ -395,7 +492,7 @@ delegate_call(Pid, Msg, Timeout) -> delegate:invoke(Pid, fun (P) -> gen_server2:call(P, Msg, Timeout) end). delegate_pcall(Pid, Pri, Msg, Timeout) -> - delegate:invoke(Pid, + delegate:invoke(Pid, fun (P) -> gen_server2:pcall(P, Pri, Msg, Timeout) end). delegate_pcast(Pid, Pri, Msg) -> diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl index 5fdf0ffa..67f0fcf5 100644 --- a/src/rabbit_amqqueue_process.erl +++ b/src/rabbit_amqqueue_process.erl @@ -35,7 +35,7 @@ -behaviour(gen_server2). --define(UNSENT_MESSAGE_LIMIT, 100). +-define(UNSENT_MESSAGE_LIMIT, 100). -define(SYNC_INTERVAL, 5). %% milliseconds -define(RAM_DURATION_UPDATE_INTERVAL, 5000). @@ -56,8 +56,10 @@ backing_queue_state, active_consumers, blocked_consumers, + expires, sync_timer_ref, - rate_timer_ref + rate_timer_ref, + expiry_timer_ref }). -record(consumer, {tag, ack_required}). @@ -102,15 +104,17 @@ init(Q) -> process_flag(trap_exit, true), {ok, BQ} = application:get_env(backing_queue_module), - {ok, #q{q = Q#amqqueue{pid = self()}, - exclusive_consumer = none, - has_had_consumers = false, - backing_queue = BQ, + {ok, #q{q = Q#amqqueue{pid = self()}, + exclusive_consumer = none, + has_had_consumers = false, + backing_queue = BQ, backing_queue_state = undefined, - active_consumers = queue:new(), - blocked_consumers = queue:new(), - sync_timer_ref = undefined, - rate_timer_ref = undefined}, hibernate, + active_consumers = queue:new(), + blocked_consumers = queue:new(), + expires = undefined, + sync_timer_ref = undefined, + rate_timer_ref = undefined, + expiry_timer_ref = undefined}, hibernate, {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. terminate(shutdown, State = #q{backing_queue = BQ}) -> @@ -132,12 +136,18 @@ code_change(_OldVsn, State, _Extra) -> %%---------------------------------------------------------------------------- +init_expires(State = #q{q = #amqqueue{arguments = Arguments}}) -> + case rabbit_misc:table_lookup(Arguments, <<"x-expires">>) of + {long, Expires} -> ensure_expiry_timer(State#q{expires = Expires}); + undefined -> State + end. + declare(Recover, From, State = #q{q = Q = #amqqueue{name = QName, durable = IsDurable}, backing_queue = BQ, backing_queue_state = undefined}) -> case rabbit_amqqueue:internal_declare(Q, Recover) of not_found -> {stop, normal, not_found, State}; - Q -> gen_server2:reply(From, Q), + Q -> gen_server2:reply(From, {new, Q}), ok = file_handle_cache:register_callback( rabbit_amqqueue, set_maximum_since_use, [self()]), @@ -145,8 +155,8 @@ declare(Recover, From, self(), {rabbit_amqqueue, set_ram_duration_target, [self()]}), BQS = BQ:init(QName, IsDurable, Recover), - noreply(State#q{backing_queue_state = BQS}); - Q1 -> {stop, normal, Q1, State} + noreply(init_expires(State#q{backing_queue_state = BQS})); + Q1 -> {stop, normal, {existing, Q1}, State} end. terminate_shutdown(Fun, State) -> @@ -179,7 +189,7 @@ noreply(NewState) -> next_state(State) -> State1 = #q{backing_queue = BQ, backing_queue_state = BQS} = ensure_rate_timer(State), - case BQ:needs_sync(BQS)of + case BQ:needs_idle_timeout(BQS)of true -> {ensure_sync_timer(State1), 0}; false -> {stop_sync_timer(State1), hibernate} end. @@ -188,7 +198,7 @@ ensure_sync_timer(State = #q{sync_timer_ref = undefined, backing_queue = BQ}) -> {ok, TRef} = timer:apply_after( ?SYNC_INTERVAL, rabbit_amqqueue, maybe_run_queue_via_backing_queue, - [self(), fun (BQS) -> BQ:sync(BQS) end]), + [self(), fun (BQS) -> BQ:idle_timeout(BQS) end]), State#q{sync_timer_ref = TRef}; ensure_sync_timer(State) -> State. @@ -218,6 +228,27 @@ stop_rate_timer(State = #q{rate_timer_ref = TRef}) -> {ok, cancel} = timer:cancel(TRef), State#q{rate_timer_ref = undefined}. +stop_expiry_timer(State = #q{expiry_timer_ref = undefined}) -> + State; +stop_expiry_timer(State = #q{expiry_timer_ref = TRef}) -> + {ok, cancel} = timer:cancel(TRef), + State#q{expiry_timer_ref = undefined}. + +%% We only wish to expire where there are no consumers *and* when +%% basic.get hasn't been called for the configured period. +ensure_expiry_timer(State = #q{expires = undefined}) -> + State; +ensure_expiry_timer(State = #q{expires = Expires}) -> + case is_unused(State) of + true -> + NewState = stop_expiry_timer(State), + {ok, TRef} = timer:apply_after( + Expires, rabbit_amqqueue, maybe_expire, [self()]), + NewState#q{expiry_timer_ref = TRef}; + false -> + State + end. + assert_invariant(#q{active_consumers = AC, backing_queue = BQ, backing_queue_state = BQS}) -> true = (queue:is_empty(AC) orelse BQ:is_empty(BQS)). @@ -439,7 +470,8 @@ handle_ch_down(DownPid, State = #q{exclusive_consumer = Holder}) -> _ -> rollback_transaction(Txn, ChPid, State1) end, - {ok, requeue_and_run(sets:to_list(ChAckTags), State2)} + {ok, requeue_and_run(sets:to_list(ChAckTags), + ensure_expiry_timer(State2))} end end. @@ -610,8 +642,9 @@ handle_call({basic_get, ChPid, NoAck}, _From, State = #q{q = #amqqueue{name = QName}, backing_queue_state = BQS, backing_queue = BQ}) -> AckRequired = not NoAck, + State1 = ensure_expiry_timer(State), case BQ:fetch(AckRequired, BQS) of - {empty, BQS1} -> reply(empty, State#q{backing_queue_state = BQS1}); + {empty, BQS1} -> reply(empty, State1#q{backing_queue_state = BQS1}); {{Message, IsDelivered, AckTag, Remaining}, BQS1} -> case AckRequired of true -> C = #cr{acktags = ChAckTags} = ch_record(ChPid), @@ -620,7 +653,7 @@ handle_call({basic_get, ChPid, NoAck}, _From, false -> ok end, Msg = {QName, self(), AckTag, IsDelivered, Message}, - reply({ok, Remaining, Msg}, State#q{backing_queue_state = BQS1}) + reply({ok, Remaining, Msg}, State1#q{backing_queue_state = BQS1}) end; handle_call({basic_consume, NoAck, ChPid, LimiterPid, @@ -687,16 +720,15 @@ handle_call({basic_cancel, ChPid, ConsumerTag, OkMsg}, _From, ChPid, ConsumerTag, State#q.blocked_consumers)}, case should_auto_delete(NewState) of - false -> reply(ok, NewState); + false -> reply(ok, ensure_expiry_timer(NewState)); true -> {stop, normal, ok, NewState} end end; -handle_call(stat, _From, State = #q{q = #amqqueue{name = Name}, - backing_queue = BQ, +handle_call(stat, _From, State = #q{backing_queue = BQ, backing_queue_state = BQS, active_consumers = ActiveConsumers}) -> - reply({ok, Name, BQ:len(BQS), queue:len(ActiveConsumers)}, State); + reply({ok, BQ:len(BQS), queue:len(ActiveConsumers)}, State); handle_call({delete, IfUnused, IfEmpty}, _From, State = #q{backing_queue_state = BQS, backing_queue = BQ}) -> @@ -720,8 +752,6 @@ handle_call({requeue, AckTags, ChPid}, From, State) -> gen_server2:reply(From, ok), case lookup_ch(ChPid) of not_found -> - rabbit_log:warning("Ignoring requeue from unknown ch: ~p~n", - [ChPid]), noreply(State); C = #cr{acktags = ChAckTags} -> ChAckTags1 = subtract_acks(ChAckTags, AckTags), @@ -750,7 +780,7 @@ handle_cast({ack, Txn, AckTags, ChPid}, _ -> {C#cr{txn = Txn}, BQ:tx_ack(Txn, AckTags, BQS)} end, store_ch_record(C1), - noreply(State #q { backing_queue_state = BQS1 }) + noreply(State#q{backing_queue_state = BQS1}) end; handle_cast({rollback, Txn, ChPid}, State) -> @@ -804,7 +834,14 @@ handle_cast({set_ram_duration_target, Duration}, handle_cast({set_maximum_since_use, Age}, State) -> ok = file_handle_cache:set_maximum_since_use(Age), - noreply(State). + noreply(State); + +handle_cast(maybe_expire, State) -> + case is_unused(State) of + true -> ?LOGDEBUG("Queue lease expired for ~p~n", [State#q.q]), + {stop, normal, State}; + false -> noreply(ensure_expiry_timer(State)) + end. handle_info({'DOWN', _MonitorRef, process, DownPid, _Reason}, State = #q{q = #amqqueue{exclusive_owner = DownPid}}) -> @@ -823,7 +860,7 @@ handle_info({'DOWN', _MonitorRef, process, DownPid, _Reason}, State) -> handle_info(timeout, State = #q{backing_queue = BQ}) -> noreply(maybe_run_queue_via_backing_queue( - fun (BQS) -> BQ:sync(BQS) end, State)); + fun (BQS) -> BQ:idle_timeout(BQS) end, State)); handle_info({'EXIT', _Pid, Reason}, State) -> {stop, Reason, State}; diff --git a/src/rabbit_backing_queue.erl b/src/rabbit_backing_queue.erl index 432d6290..2230c507 100644 --- a/src/rabbit_backing_queue.erl +++ b/src/rabbit_backing_queue.erl @@ -42,6 +42,11 @@ behaviour_info(callbacks) -> %% shared resources. {start, 1}, + %% Called to tear down any state/resources. NB: Implementations + %% should not depend on this function being called on shutdown + %% and instead should hook into the rabbit supervision hierarchy. + {stop, 0}, + %% Initialise the backing queue and its state. {init, 3}, @@ -113,14 +118,15 @@ behaviour_info(callbacks) -> %% queue. {ram_duration, 1}, - %% Should 'sync' be called as soon as the queue process can - %% manage (either on an empty mailbox, or when a timer fires)? - {needs_sync, 1}, + %% Should 'idle_timeout' be called as soon as the queue process + %% can manage (either on an empty mailbox, or when a timer + %% fires)? + {needs_idle_timeout, 1}, - %% Called (eventually) after needs_sync returns 'true'. Note this - %% may be called more than once for each 'true' returned from - %% needs_sync. - {sync, 1}, + %% Called (eventually) after needs_idle_timeout returns + %% 'true'. Note this may be called more than once for each 'true' + %% returned from needs_idle_timeout. + {idle_timeout, 1}, %% Called immediately before the queue hibernates. {handle_pre_hibernate, 1}, diff --git a/src/rabbit_basic.erl b/src/rabbit_basic.erl index 4ab7a2a0..c76c01ac 100644 --- a/src/rabbit_basic.erl +++ b/src/rabbit_basic.erl @@ -42,24 +42,41 @@ -ifdef(use_specs). --type(properties_input() :: (amqp_properties() | [{atom(), any()}])). --type(publish_result() :: ({ok, routing_result(), [pid()]} | not_found())). - --spec(publish/1 :: (delivery()) -> publish_result()). --spec(delivery/4 :: (boolean(), boolean(), maybe(txn()), message()) -> - delivery()). --spec(message/4 :: (exchange_name(), routing_key(), properties_input(), - binary()) -> (message() | {'error', any()})). --spec(properties/1 :: (properties_input()) -> amqp_properties()). --spec(publish/4 :: (exchange_name(), routing_key(), properties_input(), - binary()) -> publish_result()). --spec(publish/7 :: (exchange_name(), routing_key(), boolean(), boolean(), - maybe(txn()), properties_input(), binary()) -> - publish_result()). --spec(build_content/2 :: (amqp_properties(), binary()) -> content()). --spec(from_content/1 :: (content()) -> {amqp_properties(), binary()}). +-type(properties_input() :: + (rabbit_framing:amqp_property_record() | [{atom(), any()}])). +-type(publish_result() :: + ({ok, rabbit_router:routing_result(), [pid()]} + | rabbit_types:error('not_found'))). + +-spec(publish/1 :: (rabbit_types:delivery()) -> publish_result()). +-spec(delivery/4 :: + (boolean(), boolean(), rabbit_types:maybe(rabbit_types:txn()), + rabbit_types:message()) + -> rabbit_types:delivery()). +-spec(message/4 :: + (rabbit_exchange:name(), rabbit_router:routing_key(), + properties_input(), binary()) + -> (rabbit_types:message() | rabbit_types:error(any()))). +-spec(properties/1 :: + (properties_input()) -> rabbit_framing:amqp_property_record()). +-spec(publish/4 :: + (rabbit_exchange:name(), rabbit_router:routing_key(), + properties_input(), binary()) + -> publish_result()). +-spec(publish/7 :: + (rabbit_exchange:name(), rabbit_router:routing_key(), + boolean(), boolean(), rabbit_types:maybe(rabbit_types:txn()), + properties_input(), binary()) + -> publish_result()). +-spec(build_content/2 :: + (rabbit_framing:amqp_property_record(), binary()) + -> rabbit_types:content()). +-spec(from_content/1 :: + (rabbit_types:content()) + -> {rabbit_framing:amqp_property_record(), binary()}). -spec(is_message_persistent/1 :: - (decoded_content()) -> (boolean() | {'invalid', non_neg_integer()})). + (rabbit_types:decoded_content()) + -> (boolean() | {'invalid', non_neg_integer()})). -endif. @@ -80,18 +97,24 @@ delivery(Mandatory, Immediate, Txn, Message) -> sender = self(), message = Message}. build_content(Properties, BodyBin) -> - {ClassId, _MethodId} = rabbit_framing:method_id('basic.publish'), + %% basic.publish hasn't changed so we can just hard-code amqp_0_9_1 + {ClassId, _MethodId} = + rabbit_framing_amqp_0_9_1:method_id('basic.publish'), #content{class_id = ClassId, properties = Properties, properties_bin = none, + protocol = none, payload_fragments_rev = [BodyBin]}. from_content(Content) -> #content{class_id = ClassId, properties = Props, payload_fragments_rev = FragmentsRev} = - rabbit_binary_parser:ensure_content_decoded(Content), - {ClassId, _MethodId} = rabbit_framing:method_id('basic.publish'), + %% basic.publish hasn't changed so we can just hard-code amqp_0_9_1 + rabbit_binary_parser:ensure_content_decoded(Content, + rabbit_framing_amqp_0_9_1), + {ClassId, _MethodId} = + rabbit_framing_amqp_0_9_1:method_id('basic.publish'), {Props, list_to_binary(lists:reverse(FragmentsRev))}. message(ExchangeName, RoutingKeyBin, RawProperties, BodyBin) -> diff --git a/src/rabbit_binary_generator.erl b/src/rabbit_binary_generator.erl index 81cf3cee..f0ec6180 100644 --- a/src/rabbit_binary_generator.erl +++ b/src/rabbit_binary_generator.erl @@ -41,12 +41,12 @@ % See definition of check_empty_content_body_frame_size/0, an assertion called at startup. -define(EMPTY_CONTENT_BODY_FRAME_SIZE, 8). --export([build_simple_method_frame/2, - build_simple_content_frames/3, +-export([build_simple_method_frame/3, + build_simple_content_frames/4, build_heartbeat_frame/0]). -export([generate_table/1, encode_properties/2]). -export([check_empty_content_body_frame_size/0]). --export([ensure_content_encoded/1, clear_encoded_content/1]). +-export([ensure_content_encoded/2, clear_encoded_content/1]). -import(lists). @@ -56,45 +56,47 @@ -type(frame() :: [binary()]). --spec(build_simple_method_frame/2 :: - (channel_number(), amqp_method_record()) -> frame()). --spec(build_simple_content_frames/3 :: - (channel_number(), content(), non_neg_integer()) -> [frame()]). +-spec(build_simple_method_frame/3 :: + (rabbit_channel:channel_number(), rabbit_framing:amqp_method_record(), + rabbit_types:protocol()) + -> frame()). +-spec(build_simple_content_frames/4 :: + (rabbit_channel:channel_number(), rabbit_types:content(), + non_neg_integer(), rabbit_types:protocol()) + -> [frame()]). -spec(build_heartbeat_frame/0 :: () -> frame()). --spec(generate_table/1 :: (amqp_table()) -> binary()). --spec(encode_properties/2 :: ([amqp_property_type()], [any()]) -> binary()). +-spec(generate_table/1 :: (rabbit_framing:amqp_table()) -> binary()). +-spec(encode_properties/2 :: + ([rabbit_framing:amqp_property_type()], [any()]) -> binary()). -spec(check_empty_content_body_frame_size/0 :: () -> 'ok'). --spec(ensure_content_encoded/1 :: (content()) -> encoded_content()). --spec(clear_encoded_content/1 :: (content()) -> unencoded_content()). +-spec(ensure_content_encoded/2 :: + (rabbit_types:content(), rabbit_types:protocol()) -> + rabbit_types:encoded_content()). +-spec(clear_encoded_content/1 :: + (rabbit_types:content()) -> rabbit_types:unencoded_content()). -endif. %%---------------------------------------------------------------------------- -build_simple_method_frame(ChannelInt, MethodRecord) -> - MethodFields = rabbit_framing:encode_method_fields(MethodRecord), +build_simple_method_frame(ChannelInt, MethodRecord, Protocol) -> + MethodFields = Protocol:encode_method_fields(MethodRecord), MethodName = rabbit_misc:method_record_type(MethodRecord), - {ClassId, MethodId} = rabbit_framing:method_id(MethodName), + {ClassId, MethodId} = Protocol:method_id(MethodName), create_frame(1, ChannelInt, [<<ClassId:16, MethodId:16>>, MethodFields]). -build_simple_content_frames(ChannelInt, - #content{class_id = ClassId, - properties = ContentProperties, - properties_bin = ContentPropertiesBin, - payload_fragments_rev = PayloadFragmentsRev}, - FrameMax) -> - {BodySize, ContentFrames} = build_content_frames(PayloadFragmentsRev, FrameMax, ChannelInt), +build_simple_content_frames(ChannelInt, Content, FrameMax, Protocol) -> + #content{class_id = ClassId, + properties_bin = ContentPropertiesBin, + payload_fragments_rev = PayloadFragmentsRev} = + ensure_content_encoded(Content, Protocol), + {BodySize, ContentFrames} = + build_content_frames(PayloadFragmentsRev, FrameMax, ChannelInt), HeaderFrame = create_frame(2, ChannelInt, [<<ClassId:16, 0:16, BodySize:64>>, - maybe_encode_properties(ContentProperties, ContentPropertiesBin)]), + ContentPropertiesBin]), [HeaderFrame | ContentFrames]. -maybe_encode_properties(_ContentProperties, ContentPropertiesBin) - when is_binary(ContentPropertiesBin) -> - ContentPropertiesBin; -maybe_encode_properties(ContentProperties, none) -> - rabbit_framing:encode_properties(ContentProperties). - build_content_frames(FragsRev, FrameMax, ChannelInt) -> BodyPayloadMax = if FrameMax == 0 -> iolist_size(FragsRev); @@ -277,13 +279,16 @@ check_empty_content_body_frame_size() -> ComputedSize, ?EMPTY_CONTENT_BODY_FRAME_SIZE}) end. -ensure_content_encoded(Content = #content{properties_bin = PropsBin}) +ensure_content_encoded(Content = #content{properties_bin = PropsBin, + protocol = Protocol}, Protocol) when PropsBin =/= 'none' -> Content; -ensure_content_encoded(Content = #content{properties = Props}) -> - Content #content{properties_bin = rabbit_framing:encode_properties(Props)}. +ensure_content_encoded(Content = #content{properties = Props}, Protocol) -> + Content#content{properties_bin = Protocol:encode_properties(Props), + protocol = Protocol}. -clear_encoded_content(Content = #content{properties_bin = none}) -> +clear_encoded_content(Content = #content{properties_bin = none, + protocol = none}) -> Content; clear_encoded_content(Content = #content{properties = none}) -> %% Only clear when we can rebuild the properties_bin later in @@ -291,4 +296,4 @@ clear_encoded_content(Content = #content{properties = none}) -> %% one of properties and properties_bin can be 'none' Content; clear_encoded_content(Content = #content{}) -> - Content#content{properties_bin = none}. + Content#content{properties_bin = none, protocol = none}. diff --git a/src/rabbit_binary_parser.erl b/src/rabbit_binary_parser.erl index e022a1fa..1d0a62af 100644 --- a/src/rabbit_binary_parser.erl +++ b/src/rabbit_binary_parser.erl @@ -34,7 +34,7 @@ -include("rabbit.hrl"). -export([parse_table/1, parse_properties/2]). --export([ensure_content_decoded/1, clear_decoded_content/1]). +-export([ensure_content_decoded/2, clear_decoded_content/1]). -import(lists). @@ -42,10 +42,14 @@ -ifdef(use_specs). --spec(parse_table/1 :: (binary()) -> amqp_table()). --spec(parse_properties/2 :: ([amqp_property_type()], binary()) -> [any()]). --spec(ensure_content_decoded/1 :: (content()) -> decoded_content()). --spec(clear_decoded_content/1 :: (content()) -> undecoded_content()). +-spec(parse_table/1 :: (binary()) -> rabbit_framing:amqp_table()). +-spec(parse_properties/2 :: + ([rabbit_framing:amqp_property_type()], binary()) -> [any()]). +-spec(ensure_content_decoded/2 :: + (rabbit_types:content(), rabbit_types:protocol()) + -> rabbit_types:decoded_content()). +-spec(clear_decoded_content/1 :: + (rabbit_types:content()) -> rabbit_types:undecoded_content()). -endif. @@ -159,12 +163,12 @@ parse_property(bit, Rest) -> parse_property(table, <<Len:32/unsigned, Table:Len/binary, Rest/binary>>) -> {parse_table(Table), Rest}. -ensure_content_decoded(Content = #content{properties = Props}) +ensure_content_decoded(Content = #content{properties = Props}, _Protocol) when Props =/= 'none' -> Content; -ensure_content_decoded(Content = #content{properties_bin = PropBin}) +ensure_content_decoded(Content = #content{properties_bin = PropBin}, Protocol) when is_binary(PropBin) -> - Content#content{properties = rabbit_framing:decode_properties( + Content#content{properties = Protocol:decode_properties( Content#content.class_id, PropBin)}. clear_decoded_content(Content = #content{properties = none}) -> diff --git a/src/rabbit_channel.erl b/src/rabbit_channel.erl index 3dfc026b..dd20d915 100644 --- a/src/rabbit_channel.erl +++ b/src/rabbit_channel.erl @@ -41,8 +41,8 @@ -export([flow_timeout/2]). --export([init/1, terminate/2, code_change/3, - handle_call/3, handle_cast/2, handle_info/2, handle_pre_hibernate/1]). +-export([init/1, terminate/2, code_change/3, handle_call/3, handle_cast/2, + handle_info/2, handle_pre_hibernate/1]). -record(ch, {state, channel, reader_pid, writer_pid, limiter_pid, transaction_id, tx_participants, next_tag, @@ -71,34 +71,39 @@ -ifdef(use_specs). +-export_type([channel_number/0]). + -type(ref() :: any()). +-type(channel_number() :: non_neg_integer()). -spec(start_link/6 :: - (channel_number(), pid(), pid(), username(), vhost(), pid()) -> pid()). --spec(do/2 :: (pid(), amqp_method_record()) -> 'ok'). --spec(do/3 :: (pid(), amqp_method_record(), maybe(content())) -> 'ok'). + (channel_number(), pid(), pid(), rabbit_access_control:username(), + rabbit_types:vhost(), pid()) -> rabbit_types:ok(pid())). +-spec(do/2 :: (pid(), rabbit_framing:amqp_method_record()) -> 'ok'). +-spec(do/3 :: (pid(), rabbit_framing:amqp_method_record(), + rabbit_types:maybe(rabbit_types:content())) -> 'ok'). -spec(shutdown/1 :: (pid()) -> 'ok'). --spec(send_command/2 :: (pid(), amqp_method()) -> 'ok'). --spec(deliver/4 :: (pid(), ctag(), boolean(), qmsg()) -> 'ok'). +-spec(send_command/2 :: (pid(), rabbit_framing:amqp_method()) -> 'ok'). +-spec(deliver/4 :: + (pid(), rabbit_types:ctag(), boolean(), rabbit_amqqueue:qmsg()) + -> 'ok'). -spec(conserve_memory/2 :: (pid(), boolean()) -> 'ok'). -spec(flushed/2 :: (pid(), pid()) -> 'ok'). -spec(flow_timeout/2 :: (pid(), ref()) -> 'ok'). -spec(list/0 :: () -> [pid()]). --spec(info_keys/0 :: () -> [info_key()]). --spec(info/1 :: (pid()) -> [info()]). --spec(info/2 :: (pid(), [info_key()]) -> [info()]). --spec(info_all/0 :: () -> [[info()]]). --spec(info_all/1 :: ([info_key()]) -> [[info()]]). +-spec(info_keys/0 :: () -> [rabbit_types:info_key()]). +-spec(info/1 :: (pid()) -> [rabbit_types:info()]). +-spec(info/2 :: (pid(), [rabbit_types:info_key()]) -> [rabbit_types:info()]). +-spec(info_all/0 :: () -> [[rabbit_types:info()]]). +-spec(info_all/1 :: ([rabbit_types:info_key()]) -> [[rabbit_types:info()]]). -endif. %%---------------------------------------------------------------------------- start_link(Channel, ReaderPid, WriterPid, Username, VHost, CollectorPid) -> - {ok, Pid} = gen_server2:start_link( - ?MODULE, [Channel, ReaderPid, WriterPid, - Username, VHost, CollectorPid], []), - Pid. + gen_server2:start_link(?MODULE, [Channel, ReaderPid, WriterPid, + Username, VHost, CollectorPid], []). do(Pid, Method) -> do(Pid, Method, none). @@ -284,20 +289,15 @@ terminating(Reason, State = #ch{channel = Channel, reader_pid = Reader}) -> Reader ! {channel_exit, Channel, Reason}, State#ch{state = terminating}. -return_queue_declare_ok(State, NoWait, Q) -> - NewState = State#ch{most_recently_declared_queue = - (Q#amqqueue.name)#resource.name}, +return_queue_declare_ok(#resource{name = ActualName}, + NoWait, MessageCount, ConsumerCount, State) -> + NewState = State#ch{most_recently_declared_queue = ActualName}, case NoWait of true -> {noreply, NewState}; - false -> - {ok, ActualName, MessageCount, ConsumerCount} = - rabbit_misc:with_exit_handler( - fun () -> {ok, Q#amqqueue.name, 0, 0} end, - fun () -> rabbit_amqqueue:stat(Q) end), - Reply = #'queue.declare_ok'{queue = ActualName#resource.name, - message_count = MessageCount, - consumer_count = ConsumerCount}, - {reply, Reply, NewState} + false -> Reply = #'queue.declare_ok'{queue = ActualName, + message_count = MessageCount, + consumer_count = ConsumerCount}, + {reply, Reply, NewState} end. check_resource_access(Username, Resource, Perm) -> @@ -329,19 +329,6 @@ check_write_permitted(Resource, #ch{ username = Username}) -> check_read_permitted(Resource, #ch{ username = Username}) -> check_resource_access(Username, Resource, read). -check_exclusive_access(#amqqueue{exclusive_owner = Owner}, Owner, _MatchType) -> - ok; -check_exclusive_access(#amqqueue{exclusive_owner = none}, _ReaderPid, lax) -> - ok; -check_exclusive_access(#amqqueue{name = QName}, _ReaderPid, _MatchType) -> - rabbit_misc:protocol_error( - resource_locked, - "cannot obtain exclusive access to locked ~s", [rabbit_misc:rs(QName)]). - -with_exclusive_access_or_die(QName, ReaderPid, F) -> - rabbit_amqqueue:with_or_die( - QName, fun (Q) -> check_exclusive_access(Q, ReaderPid, lax), F(Q) end). - expand_queue_name_shortcut(<<>>, #ch{ most_recently_declared_queue = <<>> }) -> rabbit_misc:protocol_error( not_found, "no previously declared queue", []); @@ -413,6 +400,9 @@ handle_method(#'channel.close'{}, _, State = #ch{writer_pid = WriterPid}) -> ok = rabbit_writer:send_command(WriterPid, #'channel.close_ok'{}), stop; +handle_method(#'access.request'{},_, State) -> + {reply, #'access.request_ok'{ticket = 1}, State}; + handle_method(#'basic.publish'{}, _, #ch{flow = #flow{client = false}}) -> rabbit_misc:protocol_error( command_invalid, @@ -429,7 +419,8 @@ handle_method(#'basic.publish'{exchange = ExchangeNameBin, Exchange = rabbit_exchange:lookup_or_die(ExchangeName), %% We decode the content's properties here because we're almost %% certain to want to look at delivery-mode and priority. - DecodedContent = rabbit_binary_parser:ensure_content_decoded(Content), + DecodedContent = rabbit_binary_parser:ensure_content_decoded( + Content, rabbit_framing_amqp_0_9_1), IsPersistent = is_message_persistent(DecodedContent), Message = #basic_message{exchange_name = ExchangeName, routing_key = RoutingKey, @@ -441,16 +432,9 @@ handle_method(#'basic.publish'{exchange = ExchangeNameBin, Exchange, rabbit_basic:delivery(Mandatory, Immediate, TxnKey, Message)), case RoutingRes of - routed -> - ok; - unroutable -> - %% FIXME: 312 should be replaced by the ?NO_ROUTE - %% definition, when we move to >=0-9 - ok = basic_return(Message, WriterPid, 312, <<"unroutable">>); - not_delivered -> - %% FIXME: 313 should be replaced by the ?NO_CONSUMERS - %% definition, when we move to >=0-9 - ok = basic_return(Message, WriterPid, 313, <<"not_delivered">>) + routed -> ok; + unroutable -> ok = basic_return(Message, WriterPid, no_route); + not_delivered -> ok = basic_return(Message, WriterPid, no_consumers) end, {noreply, case TxnKey of none -> State; @@ -481,7 +465,7 @@ handle_method(#'basic.get'{queue = QueueNameBin, next_tag = DeliveryTag }) -> QueueName = expand_queue_name_shortcut(QueueNameBin, State), check_read_permitted(QueueName, State), - case with_exclusive_access_or_die( + case rabbit_amqqueue:with_exclusive_access_or_die( QueueName, ReaderPid, fun (Q) -> rabbit_amqqueue:basic_get(Q, self(), NoAck) end) of {ok, MessageCount, @@ -500,7 +484,7 @@ handle_method(#'basic.get'{queue = QueueNameBin, Content), {noreply, State1#ch{next_tag = DeliveryTag + 1}}; empty -> - {reply, #'basic.get_empty'{deprecated_cluster_id = <<>>}, State} + {reply, #'basic.get_empty'{}, State} end; handle_method(#'basic.consume'{queue = QueueNameBin, @@ -525,7 +509,7 @@ handle_method(#'basic.consume'{queue = QueueNameBin, %% We get the queue process to send the consume_ok on our %% behalf. This is for symmetry with basic.cancel - see %% the comment in that method for why. - case with_exclusive_access_or_die( + case rabbit_amqqueue:with_exclusive_access_or_die( QueueName, ReaderPid, fun (Q) -> rabbit_amqqueue:basic_consume( @@ -606,8 +590,7 @@ handle_method(#'basic.qos'{prefetch_count = PrefetchCount}, {reply, #'basic.qos_ok'{}, State#ch{limiter_pid = LimiterPid2}}; handle_method(#'basic.recover_async'{requeue = true}, - _, State = #ch{ transaction_id = none, - unacked_message_q = UAMQ }) -> + _, State = #ch{ unacked_message_q = UAMQ }) -> ok = fold_per_queue( fun (QPid, MsgIds, ok) -> %% The Qpid python test suite incorrectly assumes @@ -622,8 +605,7 @@ handle_method(#'basic.recover_async'{requeue = true}, {noreply, State#ch{unacked_message_q = queue:new()}}; handle_method(#'basic.recover_async'{requeue = false}, - _, State = #ch{ transaction_id = none, - writer_pid = WriterPid, + _, State = #ch{ writer_pid = WriterPid, unacked_message_q = UAMQ }) -> ok = rabbit_misc:queue_fold( fun ({_DeliveryTag, none, _Msg}, ok) -> @@ -647,10 +629,6 @@ handle_method(#'basic.recover_async'{requeue = false}, %% variant of this method {noreply, State}; -handle_method(#'basic.recover_async'{}, _, _State) -> - rabbit_misc:protocol_error( - not_allowed, "attempt to recover a transactional channel",[]); - handle_method(#'basic.recover'{requeue = Requeue}, Content, State) -> {noreply, State2 = #ch{writer_pid = WriterPid}} = handle_method(#'basic.recover_async'{requeue = Requeue}, @@ -663,8 +641,8 @@ handle_method(#'exchange.declare'{exchange = ExchangeNameBin, type = TypeNameBin, passive = false, durable = Durable, - deprecated_auto_delete = false, %% 0-9-1: true not supported - deprecated_internal = false, %% 0-9-1: true not supported + auto_delete = AutoDelete, + internal = false, nowait = NoWait, arguments = Args}, _, State = #ch{ virtual_host = VHostPath }) -> @@ -685,9 +663,11 @@ handle_method(#'exchange.declare'{exchange = ExchangeNameBin, rabbit_exchange:declare(ExchangeName, CheckedType, Durable, + AutoDelete, Args) end, - ok = rabbit_exchange:assert_equivalence(X, CheckedType, Durable, Args), + ok = rabbit_exchange:assert_equivalence(X, CheckedType, Durable, + AutoDelete, Args), return_ok(State, NoWait, #'exchange.declare_ok'{}); handle_method(#'exchange.declare'{exchange = ExchangeNameBin, @@ -721,7 +701,7 @@ handle_method(#'queue.declare'{queue = QueueNameBin, exclusive = ExclusiveDeclare, auto_delete = AutoDelete, nowait = NoWait, - arguments = Args}, + arguments = Args} = Declare, _, State = #ch{virtual_host = VHostPath, reader_pid = ReaderPid, queue_collector_pid = CollectorPid}) -> @@ -729,46 +709,40 @@ handle_method(#'queue.declare'{queue = QueueNameBin, true -> ReaderPid; false -> none end, - %% We use this in both branches, because queue_declare may yet return an - %% existing queue. - Finish = fun (#amqqueue{name = QueueName, - durable = Durable1, - auto_delete = AutoDelete1} = Q) - when Durable =:= Durable1, AutoDelete =:= AutoDelete1 -> - check_exclusive_access(Q, Owner, strict), - check_configure_permitted(QueueName, State), - %% We need to notify the reader within the channel - %% process so that we can be sure there are no - %% outstanding exclusive queues being declared as the - %% connection shuts down. - case Owner of - none -> ok; - _ -> ok = rabbit_reader_queue_collector:register_exclusive_queue(CollectorPid, Q) - end, - Q; - %% non-equivalence trumps exclusivity arbitrarily - (#amqqueue{name = QueueName}) -> - rabbit_misc:protocol_error( - channel_error, - "parameters for ~s not equivalent", - [rabbit_misc:rs(QueueName)]) - end, - Q = case rabbit_amqqueue:with( - rabbit_misc:r(VHostPath, queue, QueueNameBin), - Finish) of - {error, not_found} -> - ActualNameBin = - case QueueNameBin of + ActualNameBin = case QueueNameBin of <<>> -> rabbit_guid:binstring_guid("amq.gen"); Other -> check_name('queue', Other) end, - QueueName = rabbit_misc:r(VHostPath, queue, ActualNameBin), - Finish(rabbit_amqqueue:declare(QueueName, Durable, AutoDelete, - Args, Owner)); - #amqqueue{} = Other -> - Other - end, - return_queue_declare_ok(State, NoWait, Q); + QueueName = rabbit_misc:r(VHostPath, queue, ActualNameBin), + check_configure_permitted(QueueName, State), + case rabbit_amqqueue:with( + QueueName, + fun (Q) -> ok = rabbit_amqqueue:assert_equivalence( + Q, Durable, AutoDelete, Args, Owner), + rabbit_amqqueue:stat(Q) + end) of + {ok, MessageCount, ConsumerCount} -> + return_queue_declare_ok(QueueName, NoWait, MessageCount, + ConsumerCount, State); + {error, not_found} -> + case rabbit_amqqueue:declare(QueueName, Durable, AutoDelete, + Args, Owner) of + {new, Q = #amqqueue{}} -> + %% We need to notify the reader within the channel + %% process so that we can be sure there are no + %% outstanding exclusive queues being declared as + %% the connection shuts down. + ok = case Owner of + none -> ok; + _ -> rabbit_queue_collector:register(CollectorPid, Q) + end, + return_queue_declare_ok(QueueName, NoWait, 0, 0, State); + {existing, _Q} -> + %% must have been created between the stat and the + %% declare. Loop around again. + handle_method(Declare, none, State) + end + end; handle_method(#'queue.declare'{queue = QueueNameBin, passive = true, @@ -777,8 +751,12 @@ handle_method(#'queue.declare'{queue = QueueNameBin, reader_pid = ReaderPid}) -> QueueName = rabbit_misc:r(VHostPath, queue, QueueNameBin), check_configure_permitted(QueueName, State), - Q = with_exclusive_access_or_die(QueueName, ReaderPid, fun (Q) -> Q end), - return_queue_declare_ok(State, NoWait, Q); + {{ok, MessageCount, ConsumerCount}, #amqqueue{} = Q} = + rabbit_amqqueue:with_or_die( + QueueName, fun (Q) -> {rabbit_amqqueue:stat(Q), Q} end), + ok = rabbit_amqqueue:check_exclusive_access(Q, ReaderPid), + return_queue_declare_ok(QueueName, NoWait, MessageCount, ConsumerCount, + State); handle_method(#'queue.delete'{queue = QueueNameBin, if_unused = IfUnused, @@ -787,7 +765,7 @@ handle_method(#'queue.delete'{queue = QueueNameBin, _, State = #ch{reader_pid = ReaderPid}) -> QueueName = expand_queue_name_shortcut(QueueNameBin, State), check_configure_permitted(QueueName, State), - case with_exclusive_access_or_die( + case rabbit_amqqueue:with_exclusive_access_or_die( QueueName, ReaderPid, fun (Q) -> rabbit_amqqueue:delete(Q, IfUnused, IfEmpty) end) of {error, in_use} -> @@ -823,7 +801,7 @@ handle_method(#'queue.purge'{queue = QueueNameBin, _, State = #ch{reader_pid = ReaderPid}) -> QueueName = expand_queue_name_shortcut(QueueNameBin, State), check_read_permitted(QueueName, State), - {ok, PurgedMessageCount} = with_exclusive_access_or_die( + {ok, PurgedMessageCount} = rabbit_amqqueue:with_exclusive_access_or_die( QueueName, ReaderPid, fun (Q) -> rabbit_amqqueue:purge(Q) end), return_ok(State, NoWait, @@ -931,7 +909,11 @@ binding_action(Fun, ExchangeNameBin, QueueNameBin, RoutingKey, Arguments, ExchangeName = rabbit_misc:r(VHostPath, exchange, ExchangeNameBin), check_read_permitted(ExchangeName, State), case Fun(ExchangeName, QueueName, ActualRoutingKey, Arguments, - fun (_X, Q) -> check_exclusive_access(Q, ReaderPid, lax) end) of + fun (_X, Q) -> + try rabbit_amqqueue:check_exclusive_access(Q, ReaderPid) + catch exit:Reason -> {error, Reason} + end + end) of {error, exchange_not_found} -> rabbit_misc:not_found(ExchangeName); {error, queue_not_found} -> @@ -945,13 +927,17 @@ binding_action(Fun, ExchangeNameBin, QueueNameBin, RoutingKey, Arguments, not_found, "no binding ~s between ~s and ~s", [RoutingKey, rabbit_misc:rs(ExchangeName), rabbit_misc:rs(QueueName)]); + {error, #amqp_error{} = Error} -> + rabbit_misc:protocol_error(Error); ok -> return_ok(State, NoWait, ReturnMethod) end. basic_return(#basic_message{exchange_name = ExchangeName, routing_key = RoutingKey, content = Content}, - WriterPid, ReplyCode, ReplyText) -> + WriterPid, Reason) -> + {_Close, ReplyCode, ReplyText} = + rabbit_framing_amqp_0_9_1:lookup_amqp_exception(Reason), ok = rabbit_writer:send_command( WriterPid, #'basic.return'{reply_code = ReplyCode, @@ -980,7 +966,7 @@ collect_acks(ToAcc, PrefixAcc, Q, DeliveryTag, Multiple) -> end; {empty, _} -> rabbit_misc:protocol_error( - not_found, "unknown delivery tag ~w", [DeliveryTag]) + precondition_failed, "unknown delivery tag ~w", [DeliveryTag]) end. add_tx_participants(MoreP, State = #ch{tx_participants = Participants}) -> @@ -1044,7 +1030,7 @@ fold_per_queue(F, Acc0, UAQ) -> Acc0, D). start_limiter(State = #ch{unacked_message_q = UAMQ}) -> - LPid = rabbit_limiter:start_link(self(), queue:len(UAMQ)), + {ok, LPid} = rabbit_limiter:start_link(self(), queue:len(UAMQ)), ok = limit_queues(LPid, State), LPid. diff --git a/src/rabbit_control.erl b/src/rabbit_control.erl index 323d4d2f..6e6ad06c 100644 --- a/src/rabbit_control.erl +++ b/src/rabbit_control.erl @@ -44,7 +44,7 @@ -spec(start/0 :: () -> no_return()). -spec(stop/0 :: () -> 'ok'). --spec(action/4 :: (atom(), erlang_node(), [string()], +-spec(action/4 :: (atom(), node(), [string()], fun ((string(), [any()]) -> 'ok')) -> 'ok'). -spec(usage/0 :: () -> no_return()). @@ -160,6 +160,12 @@ action(cluster, Node, ClusterNodeSs, Inform) -> [Node, ClusterNodes]), rpc_call(Node, rabbit_mnesia, cluster, [ClusterNodes]); +action(force_cluster, Node, ClusterNodeSs, Inform) -> + ClusterNodes = lists:map(fun list_to_atom/1, ClusterNodeSs), + Inform("Forcefully clustering node ~p with ~p (ignoring offline nodes)", + [Node, ClusterNodes]), + rpc_call(Node, rabbit_mnesia, force_cluster, [ClusterNodes]); + action(status, Node, [], Inform) -> Inform("Status of node ~p", [Node]), case call(Node, {rabbit, status, []}) of diff --git a/src/rabbit_dialyzer.erl b/src/rabbit_dialyzer.erl index f19e8d02..51bd6b1f 100644 --- a/src/rabbit_dialyzer.erl +++ b/src/rabbit_dialyzer.erl @@ -30,17 +30,17 @@ %% -module(rabbit_dialyzer). --include("rabbit.hrl"). --export([create_basic_plt/1, add_to_plt/2, dialyze_files/2, halt_with_code/1]). +-export([create_basic_plt/1, add_to_plt/2, dialyze_files/2, + halt_with_code/1]). %%---------------------------------------------------------------------------- -ifdef(use_specs). --spec(create_basic_plt/1 :: (file_path()) -> 'ok'). --spec(add_to_plt/2 :: (file_path(), string()) -> 'ok'). --spec(dialyze_files/2 :: (file_path(), string()) -> 'ok'). +-spec(create_basic_plt/1 :: (file:filename()) -> 'ok'). +-spec(add_to_plt/2 :: (file:filename(), string()) -> 'ok'). +-spec(dialyze_files/2 :: (file:filename(), string()) -> 'ok'). -spec(halt_with_code/1 :: (atom()) -> no_return()). -endif. @@ -56,7 +56,7 @@ create_basic_plt(BasicPltPath) -> ok. add_to_plt(PltPath, FilesString) -> - {ok, Files} = regexp:split(FilesString, " "), + Files = string:tokens(FilesString, " "), DialyzerWarnings = dialyzer:run([{analysis_type, plt_add}, {init_plt, PltPath}, {output_plt, PltPath}, @@ -65,7 +65,7 @@ add_to_plt(PltPath, FilesString) -> ok. dialyze_files(PltPath, ModifiedFiles) -> - {ok, Files} = regexp:split(ModifiedFiles, " "), + Files = string:tokens(ModifiedFiles, " "), DialyzerWarnings = dialyzer:run([{init_plt, PltPath}, {files, Files}]), case DialyzerWarnings of diff --git a/src/rabbit_error_logger.erl b/src/rabbit_error_logger.erl index face0a1a..42861f86 100644 --- a/src/rabbit_error_logger.erl +++ b/src/rabbit_error_logger.erl @@ -39,7 +39,8 @@ -export([boot/0]). --export([init/1, terminate/2, code_change/3, handle_call/2, handle_event/2, handle_info/2]). +-export([init/1, terminate/2, code_change/3, handle_call/2, handle_event/2, + handle_info/2]). boot() -> {ok, DefaultVHost} = application:get_env(default_vhost), @@ -48,7 +49,7 @@ boot() -> init([DefaultVHost]) -> #exchange{} = rabbit_exchange:declare( rabbit_misc:r(DefaultVHost, exchange, ?LOG_EXCH_NAME), - topic, true, []), + topic, true, false, []), {ok, #resource{virtual_host = DefaultVHost, kind = exchange, name = ?LOG_EXCH_NAME}}. diff --git a/src/rabbit_error_logger_file_h.erl b/src/rabbit_error_logger_file_h.erl index 45b66712..875d680f 100644 --- a/src/rabbit_error_logger_file_h.erl +++ b/src/rabbit_error_logger_file_h.erl @@ -33,7 +33,8 @@ -behaviour(gen_event). --export([init/1, handle_event/2, handle_call/2, handle_info/2, terminate/2, code_change/3]). +-export([init/1, handle_event/2, handle_call/2, handle_info/2, terminate/2, + code_change/3]). %% rabbit_error_logger_file_h is a wrapper around the error_logger_file_h %% module because the original's init/1 does not match properly diff --git a/src/rabbit_exchange.erl b/src/rabbit_exchange.erl index eb6f3e49..49f87a22 100644 --- a/src/rabbit_exchange.erl +++ b/src/rabbit_exchange.erl @@ -33,13 +33,12 @@ -include("rabbit.hrl"). -include("rabbit_framing.hrl"). --export([recover/0, declare/4, lookup/1, lookup_or_die/1, - list/1, info_keys/0, info/1, info/2, info_all/1, info_all/2, - publish/2]). +-export([recover/0, declare/5, lookup/1, lookup_or_die/1, list/1, info_keys/0, + info/1, info/2, info_all/1, info_all/2, publish/2]). -export([add_binding/5, delete_binding/5, list_bindings/1]). -export([delete/2]). -export([delete_queue_bindings/1, delete_transient_queue_bindings/1]). --export([assert_equivalence/4]). +-export([assert_equivalence/5]). -export([assert_args_equivalence/2]). -export([check_type/1]). @@ -50,55 +49,84 @@ -import(mnesia). -import(sets). -import(lists). --import(regexp). %%---------------------------------------------------------------------------- -ifdef(use_specs). --type(bind_res() :: 'ok' | {'error', - 'queue_not_found' | - 'exchange_not_found' | - 'exchange_and_queue_not_found'}). --type(inner_fun() :: fun((exchange(), queue()) -> any())). +-export_type([name/0, type/0, binding_key/0]). + +-type(name() :: rabbit_types:r('exchange')). +-type(type() :: atom()). +-type(binding_key() :: binary()). + +-type(bind_res() :: rabbit_types:ok_or_error('queue_not_found' | + 'exchange_not_found' | + 'exchange_and_queue_not_found')). +-type(inner_fun() :: + fun((rabbit_types:exchange(), queue()) -> + rabbit_types:ok_or_error(rabbit_types:amqp_error()))). -spec(recover/0 :: () -> 'ok'). --spec(declare/4 :: (exchange_name(), exchange_type(), boolean(), amqp_table()) -> exchange()). +-spec(declare/5 :: + (name(), type(), boolean(), boolean(), rabbit_framing:amqp_table()) + -> rabbit_types:exchange()). -spec(check_type/1 :: (binary()) -> atom()). --spec(assert_equivalence/4 :: (exchange(), atom(), boolean(), amqp_table()) -> 'ok'). --spec(assert_args_equivalence/2 :: (exchange(), amqp_table()) -> 'ok'). --spec(lookup/1 :: (exchange_name()) -> {'ok', exchange()} | not_found()). --spec(lookup_or_die/1 :: (exchange_name()) -> exchange()). --spec(list/1 :: (vhost()) -> [exchange()]). --spec(info_keys/0 :: () -> [info_key()]). --spec(info/1 :: (exchange()) -> [info()]). --spec(info/2 :: (exchange(), [info_key()]) -> [info()]). --spec(info_all/1 :: (vhost()) -> [[info()]]). --spec(info_all/2 :: (vhost(), [info_key()]) -> [[info()]]). --spec(publish/2 :: (exchange(), delivery()) -> {routing_result(), [pid()]}). +-spec(assert_equivalence/5 :: + (rabbit_types:exchange(), atom(), boolean(), boolean(), + rabbit_framing:amqp_table()) + -> 'ok' | no_return()). +-spec(assert_args_equivalence/2 :: + (rabbit_types:exchange(), rabbit_framing:amqp_table()) -> + 'ok' | no_return()). +-spec(lookup/1 :: + (name()) -> rabbit_types:ok(rabbit_types:exchange()) | + rabbit_types:error('not_found')). +-spec(lookup_or_die/1 :: (name()) -> rabbit_types:exchange()). +-spec(list/1 :: (rabbit_types:vhost()) -> [rabbit_types:exchange()]). +-spec(info_keys/0 :: () -> [rabbit_types:info_key()]). +-spec(info/1 :: (rabbit_types:exchange()) -> [rabbit_types:info()]). +-spec(info/2 :: + (rabbit_types:exchange(), [rabbit_types:info_key()]) + -> [rabbit_types:info()]). +-spec(info_all/1 :: (rabbit_types:vhost()) -> [[rabbit_types:info()]]). +-spec(info_all/2 ::(rabbit_types:vhost(), [rabbit_types:info_key()]) + -> [[rabbit_types:info()]]). +-spec(publish/2 :: (rabbit_types:exchange(), rabbit_types:delivery()) + -> {rabbit_router:routing_result(), [pid()]}). -spec(add_binding/5 :: - (exchange_name(), queue_name(), routing_key(), amqp_table(), inner_fun()) -> - bind_res()). + (name(), rabbit_amqqueue:name(), rabbit_router:routing_key(), + rabbit_framing:amqp_table(), inner_fun()) + -> bind_res()). -spec(delete_binding/5 :: - (exchange_name(), queue_name(), routing_key(), amqp_table(), inner_fun()) -> - bind_res() | {'error', 'binding_not_found'}). --spec(list_bindings/1 :: (vhost()) -> - [{exchange_name(), queue_name(), routing_key(), amqp_table()}]). --spec(delete_queue_bindings/1 :: (queue_name()) -> fun (() -> none())). --spec(delete_transient_queue_bindings/1 :: (queue_name()) -> - fun (() -> none())). --spec(delete/2 :: (exchange_name(), boolean()) -> - 'ok' | not_found() | {'error', 'in_use'}). --spec(list_queue_bindings/1 :: (queue_name()) -> - [{exchange_name(), routing_key(), amqp_table()}]). --spec(list_exchange_bindings/1 :: (exchange_name()) -> - [{queue_name(), routing_key(), amqp_table()}]). + (name(), rabbit_amqqueue:name(), rabbit_router:routing_key(), + rabbit_framing:amqp_table(), inner_fun()) + -> bind_res() | rabbit_types:error('binding_not_found')). +-spec(list_bindings/1 :: + (rabbit_types:vhost()) + -> [{name(), rabbit_amqqueue:name(), rabbit_router:routing_key(), + rabbit_framing:amqp_table()}]). +-spec(delete_queue_bindings/1 :: + (rabbit_amqqueue:name()) -> fun (() -> none())). +-spec(delete_transient_queue_bindings/1 :: + (rabbit_amqqueue:name()) -> fun (() -> none())). +-spec(delete/2 :: + (name(), boolean())-> 'ok' | + rabbit_types:error('not_found') | + rabbit_types:error('in_use')). +-spec(list_queue_bindings/1 :: + (rabbit_amqqueue:name()) + -> [{name(), rabbit_router:routing_key(), + rabbit_framing:amqp_table()}]). +-spec(list_exchange_bindings/1 :: + (name()) -> [{rabbit_amqqueue:name(), rabbit_router:routing_key(), + rabbit_framing:amqp_table()}]). -endif. %%---------------------------------------------------------------------------- --define(INFO_KEYS, [name, type, durable, arguments]. +-define(INFO_KEYS, [name, type, durable, auto_delete, arguments]). recover() -> Exs = rabbit_misc:table_fold( @@ -133,10 +161,11 @@ recover_with_bindings(Bs, [X = #exchange{type = Type} | Xs], Bindings) -> recover_with_bindings([], [], []) -> ok. -declare(ExchangeName, Type, Durable, Args) -> +declare(ExchangeName, Type, Durable, AutoDelete, Args) -> Exchange = #exchange{name = ExchangeName, type = Type, durable = Durable, + auto_delete = AutoDelete, arguments = Args}, %% We want to upset things if it isn't ok; this is different from %% the other hooks invocations, where we tend to ignore the return @@ -187,34 +216,25 @@ check_type(TypeBin) -> end. assert_equivalence(X = #exchange{ durable = Durable, + auto_delete = AutoDelete, type = Type}, - Type, Durable, - RequiredArgs) -> - ok = (type_to_module(Type)):assert_args_equivalence(X, RequiredArgs); -assert_equivalence(#exchange{ name = Name }, _Type, _Durable, + Type, Durable, AutoDelete, RequiredArgs) -> + (type_to_module(Type)):assert_args_equivalence(X, RequiredArgs); +assert_equivalence(#exchange{ name = Name }, _Type, _Durable, _AutoDelete, _Args) -> rabbit_misc:protocol_error( - precondition_failed, + not_allowed, "cannot redeclare ~s with different type, durable or autodelete value", [rabbit_misc:rs(Name)]). -alternate_exchange_value(Args) -> - lists:keysearch(<<"alternate-exchange">>, 1, Args). - assert_args_equivalence(#exchange{ name = Name, arguments = Args }, RequiredArgs) -> %% The spec says "Arguments are compared for semantic %% equivalence". The only arg we care about is %% "alternate-exchange". - Ae1 = alternate_exchange_value(RequiredArgs), - Ae2 = alternate_exchange_value(Args), - if Ae1==Ae2 -> ok; - true -> rabbit_misc:protocol_error( - precondition_failed, - "cannot redeclare ~s with inequivalent args", - [rabbit_misc:rs(Name)]) - end. + rabbit_misc:assert_args_equivalence(Args, RequiredArgs, Name, + [<<"alternate-exchange">>]). lookup(Name) -> rabbit_misc:dirty_read({rabbit_exchange, Name}). @@ -242,6 +262,7 @@ infos(Items, X) -> [{Item, i(Item, X)} || Item <- Items]. i(name, #exchange{name = Name}) -> Name; i(type, #exchange{type = Type}) -> Type; i(durable, #exchange{durable = Durable}) -> Durable; +i(auto_delete, #exchange{auto_delete = AutoDelete}) -> AutoDelete; i(arguments, #exchange{arguments = Arguments}) -> Arguments; i(Item, _) -> throw({bad_argument, Item}). @@ -330,7 +351,7 @@ delete_queue_bindings(QueueName, FwdDeleteFun) -> Module = type_to_module(Type), case IsDeleted of auto_deleted -> Module:delete(X, Bs); - no_delete -> Module:remove_bindings(X, Bs) + not_deleted -> Module:remove_bindings(X, Bs) end end, Cleanup) end. @@ -397,23 +418,27 @@ add_binding(ExchangeName, QueueName, RoutingKey, Arguments, InnerFun) -> %% this argument is used to check queue exclusivity; %% in general, we want to fail on that in preference to %% anything else - InnerFun(X, Q), - case mnesia:read({rabbit_route, B}) of - [] -> - sync_binding(B, - X#exchange.durable andalso - Q#amqqueue.durable, - fun mnesia:write/3), - {new, X, B}; - [_R] -> - {existing, X, B} + case InnerFun(X, Q) of + ok -> + case mnesia:read({rabbit_route, B}) of + [] -> + ok = sync_binding(B, + X#exchange.durable andalso + Q#amqqueue.durable, + fun mnesia:write/3), + {new, X, B}; + [_R] -> + {existing, X, B} + end; + {error, _} = E -> + E end end) of {new, Exchange = #exchange{ type = Type }, Binding} -> (type_to_module(Type)):add_binding(Exchange, Binding); {existing, _, _} -> ok; - Err = {error, _} -> + {error, _} = Err -> Err end. @@ -423,20 +448,29 @@ delete_binding(ExchangeName, QueueName, RoutingKey, Arguments, InnerFun) -> fun (X, Q, B) -> case mnesia:match_object(rabbit_route, #route{binding = B}, write) of - [] -> {error, binding_not_found}; - _ -> InnerFun(X, Q), - ok = sync_binding(B, Q#amqqueue.durable, - fun mnesia:delete_object/3), - {maybe_auto_delete(X), B} + [] -> + {error, binding_not_found}; + _ -> + case InnerFun(X, Q) of + ok -> + ok = + sync_binding(B, + X#exchange.durable andalso + Q#amqqueue.durable, + fun mnesia:delete_object/3), + {maybe_auto_delete(X), B}; + {error, _} = E -> + E + end end end) of - Err = {error, _} -> + {error, _} = Err -> Err; - {{Action, X = #exchange{ type = Type }}, B} -> + {{IsDeleted, X = #exchange{ type = Type }}, B} -> Module = type_to_module(Type), - case Action of - auto_delete -> Module:delete(X, [B]); - no_delete -> Module:remove_bindings(X, [B]) + case IsDeleted of + auto_deleted -> Module:delete(X, [B]); + not_deleted -> Module:remove_bindings(X, [B]) end end. @@ -519,9 +553,13 @@ delete(ExchangeName, IfUnused) -> Error end. -%% TODO: remove this autodelete machinery altogether. -maybe_auto_delete(Exchange) -> - {no_delete, Exchange}. +maybe_auto_delete(Exchange = #exchange{auto_delete = false}) -> + {not_deleted, Exchange}; +maybe_auto_delete(Exchange = #exchange{auto_delete = true}) -> + case conditional_delete(Exchange) of + {error, in_use} -> {not_deleted, Exchange}; + {deleted, Exchange, []} -> {auto_deleted, Exchange} + end. conditional_delete(Exchange = #exchange{name = ExchangeName}) -> Match = #route{binding = #binding{exchange_name = ExchangeName, _ = '_'}}, diff --git a/src/rabbit_exchange_type_fanout.erl b/src/rabbit_exchange_type_fanout.erl index 4f9712b1..94798c78 100644 --- a/src/rabbit_exchange_type_fanout.erl +++ b/src/rabbit_exchange_type_fanout.erl @@ -35,8 +35,8 @@ -behaviour(rabbit_exchange_type). -export([description/0, publish/2]). --export([validate/1, create/1, recover/2, delete/2, - add_binding/2, remove_bindings/2, assert_args_equivalence/2]). +-export([validate/1, create/1, recover/2, delete/2, add_binding/2, + remove_bindings/2, assert_args_equivalence/2]). -include("rabbit_exchange_type_spec.hrl"). -rabbit_boot_step({?MODULE, diff --git a/src/rabbit_exchange_type_headers.erl b/src/rabbit_exchange_type_headers.erl index 315e8000..44607398 100644 --- a/src/rabbit_exchange_type_headers.erl +++ b/src/rabbit_exchange_type_headers.erl @@ -36,8 +36,8 @@ -behaviour(rabbit_exchange_type). -export([description/0, publish/2]). --export([validate/1, create/1, recover/2, delete/2, - add_binding/2, remove_bindings/2, assert_args_equivalence/2]). +-export([validate/1, create/1, recover/2, delete/2, add_binding/2, + remove_bindings/2, assert_args_equivalence/2]). -include("rabbit_exchange_type_spec.hrl"). -rabbit_boot_step({?MODULE, @@ -48,7 +48,8 @@ {enables, kernel_ready}]}). -ifdef(use_specs). --spec(headers_match/2 :: (amqp_table(), amqp_table()) -> boolean()). +-spec(headers_match/2 :: (rabbit_framing:amqp_table(), + rabbit_framing:amqp_table()) -> boolean()). -endif. description() -> diff --git a/src/rabbit_exchange_type_registry.erl b/src/rabbit_exchange_type_registry.erl index 33ea0e92..7906fbee 100644 --- a/src/rabbit_exchange_type_registry.erl +++ b/src/rabbit_exchange_type_registry.erl @@ -35,8 +35,8 @@ -export([start_link/0]). --export([init/1, handle_call/3, handle_cast/2, handle_info/2, - terminate/2, code_change/3]). +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, + code_change/3]). -export([register/2, binary_to_type/1, lookup_module/1]). @@ -45,10 +45,13 @@ -ifdef(use_specs). --spec(start_link/0 :: () -> 'ignore' | {'error', term()} | {'ok', pid()}). +-spec(start_link/0 :: + () -> 'ignore' | rabbit_types:ok_or_error2(pid(), term())). -spec(register/2 :: (binary(), atom()) -> 'ok'). --spec(binary_to_type/1 :: (binary()) -> atom() | {'error', 'not_found'}). --spec(lookup_module/1 :: (atom()) -> {'ok', atom()} | {'error', 'not_found'}). +-spec(binary_to_type/1 :: + (binary()) -> atom() | rabbit_types:error('not_found')). +-spec(lookup_module/1 :: + (atom()) -> rabbit_types:ok_or_error2(atom(), 'not_found')). -endif. diff --git a/src/rabbit_exchange_type_topic.erl b/src/rabbit_exchange_type_topic.erl index 0e22d545..89b2441e 100644 --- a/src/rabbit_exchange_type_topic.erl +++ b/src/rabbit_exchange_type_topic.erl @@ -35,8 +35,8 @@ -behaviour(rabbit_exchange_type). -export([description/0, publish/2]). --export([validate/1, create/1, recover/2, delete/2, - add_binding/2, remove_bindings/2, assert_args_equivalence/2]). +-export([validate/1, create/1, recover/2, delete/2, add_binding/2, + remove_bindings/2, assert_args_equivalence/2]). -include("rabbit_exchange_type_spec.hrl"). -rabbit_boot_step({?MODULE, @@ -49,7 +49,9 @@ -export([topic_matches/2]). -ifdef(use_specs). + -spec(topic_matches/2 :: (binary(), binary()) -> boolean()). + -endif. description() -> @@ -65,8 +67,7 @@ publish(#exchange{name = Name}, Delivery = Delivery). split_topic_key(Key) -> - {ok, KeySplit} = regexp:split(binary_to_list(Key), "\\."), - KeySplit. + re:split(Key, "\\.", [{return, list}]). topic_matches(PatternKey, RoutingKey) -> P = split_topic_key(PatternKey), diff --git a/src/rabbit_framing_channel.erl b/src/rabbit_framing_channel.erl index 161dfd84..f4dbdf34 100644 --- a/src/rabbit_framing_channel.erl +++ b/src/rabbit_framing_channel.erl @@ -32,21 +32,22 @@ -module(rabbit_framing_channel). -include("rabbit.hrl"). --export([start_link/2, process/2, shutdown/1]). +-export([start_link/3, process/2, shutdown/1]). %% internal --export([mainloop/1]). +-export([mainloop/2]). %%-------------------------------------------------------------------- -start_link(StartFun, StartArgs) -> - spawn_link( - fun () -> - %% we trap exits so that a normal termination of the - %% channel or reader process terminates us too. - process_flag(trap_exit, true), - mainloop(apply(StartFun, StartArgs)) - end). +start_link(StartFun, StartArgs, Protocol) -> + {ok, spawn_link( + fun () -> + %% we trap exits so that a normal termination of + %% the channel or reader process terminates us too. + process_flag(trap_exit, true), + {ok, ChannelPid} = apply(StartFun, StartArgs), + mainloop(ChannelPid, Protocol) + end)}. process(Pid, Frame) -> Pid ! {frame, Frame}, @@ -72,18 +73,20 @@ read_frame(ChannelPid) -> Msg -> exit({unexpected_message, Msg}) end. -mainloop(ChannelPid) -> +mainloop(ChannelPid, Protocol) -> Decoded = read_frame(ChannelPid), case Decoded of {method, MethodName, FieldsBin} -> - Method = rabbit_framing:decode_method_fields(MethodName, FieldsBin), - case rabbit_framing:method_has_content(MethodName) of - true -> rabbit_channel:do(ChannelPid, Method, + Method = Protocol:decode_method_fields(MethodName, FieldsBin), + case Protocol:method_has_content(MethodName) of + true -> {ClassId, _MethodId} = Protocol:method_id(MethodName), + rabbit_channel:do(ChannelPid, Method, collect_content(ChannelPid, - MethodName)); + ClassId, + Protocol)); false -> rabbit_channel:do(ChannelPid, Method) end, - ?MODULE:mainloop(ChannelPid); + ?MODULE:mainloop(ChannelPid, Protocol); _ -> rabbit_misc:protocol_error( unexpected_frame, @@ -91,23 +94,21 @@ mainloop(ChannelPid) -> [Decoded]) end. -collect_content(ChannelPid, MethodName) -> - {ClassId, _MethodId} = rabbit_framing:method_id(MethodName), +collect_content(ChannelPid, ClassId, Protocol) -> case read_frame(ChannelPid) of - {content_header, HeaderClassId, 0, BodySize, PropertiesBin} -> - if HeaderClassId == ClassId -> - Payload = collect_content_payload(ChannelPid, BodySize, []), - #content{class_id = ClassId, - properties = none, - properties_bin = PropertiesBin, - payload_fragments_rev = Payload}; - true -> - rabbit_misc:protocol_error( - unexpected_frame, - "expected content header for class ~w, " - "got one for class ~w instead", - [ClassId, HeaderClassId]) - end; + {content_header, ClassId, 0, BodySize, PropertiesBin} -> + Payload = collect_content_payload(ChannelPid, BodySize, []), + #content{class_id = ClassId, + properties = none, + properties_bin = PropertiesBin, + protocol = Protocol, + payload_fragments_rev = Payload}; + {content_header, HeaderClassId, 0, _BodySize, _PropertiesBin} -> + rabbit_misc:protocol_error( + unexpected_frame, + "expected content header for class ~w, " + "got one for class ~w instead", + [ClassId, HeaderClassId]); _ -> rabbit_misc:protocol_error( unexpected_frame, diff --git a/src/rabbit_guid.erl b/src/rabbit_guid.erl index 1ae8f7da..af1c629f 100644 --- a/src/rabbit_guid.erl +++ b/src/rabbit_guid.erl @@ -31,15 +31,13 @@ -module(rabbit_guid). --include("rabbit.hrl"). - -behaviour(gen_server). -export([start_link/0]). -export([guid/0, string_guid/1, binstring_guid/1]). --export([init/1, handle_call/3, handle_cast/2, handle_info/2, - terminate/2, code_change/3]). +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, + code_change/3]). -define(SERVER, ?MODULE). -define(SERIAL_FILENAME, "rabbit_serial"). @@ -50,7 +48,11 @@ -ifdef(use_specs). --spec(start_link/0 :: () -> {'ok', pid()} | 'ignore' | {'error', any()}). +-export_type([guid/0]). + +-type(guid() :: binary()). + +-spec(start_link/0 :: () -> 'ignore' | rabbit_types:ok_or_error2(pid(), any())). -spec(guid/0 :: () -> guid()). -spec(string_guid/1 :: (any()) -> string()). -spec(binstring_guid/1 :: (any()) -> binary()). diff --git a/src/rabbit_invariable_queue.erl b/src/rabbit_invariable_queue.erl index a7ca20c8..4e0dad84 100644 --- a/src/rabbit_invariable_queue.erl +++ b/src/rabbit_invariable_queue.erl @@ -34,10 +34,10 @@ -export([init/3, terminate/1, delete_and_terminate/1, purge/1, publish/2, publish_delivered/3, fetch/2, ack/2, tx_publish/3, tx_ack/3, tx_rollback/2, tx_commit/3, requeue/2, len/1, is_empty/1, - set_ram_duration_target/2, ram_duration/1, needs_sync/1, sync/1, - handle_pre_hibernate/1, status/1]). + set_ram_duration_target/2, ram_duration/1, needs_idle_timeout/1, + idle_timeout/1, handle_pre_hibernate/1, status/1]). --export([start/1]). +-export([start/1, stop/0]). -behaviour(rabbit_backing_queue). @@ -48,11 +48,11 @@ -ifdef(use_specs). --type(ack() :: guid() | 'blank_ack'). +-type(ack() :: rabbit_guid:guid() | 'blank_ack'). -type(state() :: #iv_state { queue :: queue(), - qname :: queue_name(), + qname :: rabbit_amqqueue:name(), len :: non_neg_integer(), - pending_ack :: dict() + pending_ack :: dict:dictionary() }). -include("rabbit_backing_queue_spec.hrl"). @@ -61,6 +61,9 @@ start(DurableQueues) -> ok = rabbit_sup:start_child(rabbit_persister, [DurableQueues]). +stop() -> + ok = rabbit_sup:stop_child(rabbit_persister). + init(QName, IsDurable, Recover) -> Q = queue:from_list(case IsDurable andalso Recover of true -> rabbit_persister:queue_content(QName); @@ -197,9 +200,9 @@ set_ram_duration_target(_DurationTarget, State) -> State. ram_duration(State) -> {0, State}. -needs_sync(_State) -> false. +needs_idle_timeout(_State) -> false. -sync(State) -> State. +idle_timeout(State) -> State. handle_pre_hibernate(State) -> State. @@ -242,8 +245,7 @@ do_if_persistent(F, Txn, QName) -> persist_message(QName, true, Txn, Msg = #basic_message { is_persistent = true }) -> Msg1 = Msg #basic_message { - %% don't persist any recoverable decoded properties, - %% rebuild from properties_bin on restore + %% don't persist any recoverable decoded properties content = rabbit_binary_parser:clear_decoded_content( Msg #basic_message.content)}, persist_work(Txn, QName, diff --git a/src/rabbit_limiter.erl b/src/rabbit_limiter.erl index 878af029..813ccc75 100644 --- a/src/rabbit_limiter.erl +++ b/src/rabbit_limiter.erl @@ -45,7 +45,7 @@ -type(maybe_pid() :: pid() | 'undefined'). --spec(start_link/2 :: (pid(), non_neg_integer()) -> pid()). +-spec(start_link/2 :: (pid(), non_neg_integer()) -> rabbit_types:ok(pid())). -spec(shutdown/1 :: (maybe_pid()) -> 'ok'). -spec(limit/2 :: (maybe_pid(), non_neg_integer()) -> 'ok' | 'stopped'). -spec(can_send/3 :: (maybe_pid(), pid(), boolean()) -> boolean()). @@ -74,8 +74,7 @@ %%---------------------------------------------------------------------------- start_link(ChPid, UnackedMsgCount) -> - {ok, Pid} = gen_server2:start_link(?MODULE, [ChPid, UnackedMsgCount], []), - Pid. + gen_server2:start_link(?MODULE, [ChPid, UnackedMsgCount], []). shutdown(undefined) -> ok; diff --git a/src/rabbit_load.erl b/src/rabbit_load.erl index 4f467162..e0457b1e 100644 --- a/src/rabbit_load.erl +++ b/src/rabbit_load.erl @@ -40,11 +40,10 @@ -ifdef(use_specs). --type(erlang_node() :: atom()). --type(load() :: {{non_neg_integer(), integer() | 'unknown'}, erlang_node()}). +-type(load() :: {{non_neg_integer(), integer() | 'unknown'}, node()}). -spec(local_load/0 :: () -> load()). -spec(remote_loads/0 :: () -> [load()]). --spec(pick/0 :: () -> erlang_node()). +-spec(pick/0 :: () -> node()). -endif. diff --git a/src/rabbit_log.erl b/src/rabbit_log.erl index cc80e360..85bcbca0 100644 --- a/src/rabbit_log.erl +++ b/src/rabbit_log.erl @@ -50,7 +50,7 @@ -ifdef(use_specs). --spec(start_link/0 :: () -> {'ok', pid()} | 'ignore' | {'error', any()}). +-spec(start_link/0 :: () -> 'ignore' | rabbit_types:ok_or_error2(pid(), any())). -spec(debug/1 :: (string()) -> 'ok'). -spec(debug/2 :: (string(), [any()]) -> 'ok'). -spec(info/1 :: (string()) -> 'ok'). diff --git a/src/rabbit_memory_monitor.erl b/src/rabbit_memory_monitor.erl index 91e97ffe..bdf38075 100644 --- a/src/rabbit_memory_monitor.erl +++ b/src/rabbit_memory_monitor.erl @@ -86,11 +86,12 @@ -ifdef(use_specs). --spec(start_link/0 :: () -> 'ignore' | {'error', _} | {'ok', pid()}). +-spec(start_link/0 :: () -> 'ignore' | rabbit_types:ok_or_error2(pid(), any())). -spec(update/0 :: () -> 'ok'). -spec(register/2 :: (pid(), {atom(),atom(),[any()]}) -> 'ok'). -spec(deregister/1 :: (pid()) -> 'ok'). --spec(report_ram_duration/2 :: (pid(), float() | 'infinity') -> number()). +-spec(report_ram_duration/2 :: + (pid(), float() | 'infinity') -> number() | 'infinity'). -spec(stop/0 :: () -> 'ok'). -endif. diff --git a/src/rabbit_misc.erl b/src/rabbit_misc.erl index 35739dcb..050b499f 100644 --- a/src/rabbit_misc.erl +++ b/src/rabbit_misc.erl @@ -32,14 +32,16 @@ -module(rabbit_misc). -include("rabbit.hrl"). -include("rabbit_framing.hrl"). + -include_lib("kernel/include/file.hrl"). -export([method_record_type/1, polite_pause/0, polite_pause/1]). -export([die/1, frame_error/2, amqp_error/4, - protocol_error/3, protocol_error/4]). --export([not_found/1]). + protocol_error/3, protocol_error/4, protocol_error/1]). +-export([not_found/1, assert_args_equivalence/4]). -export([get_config/1, get_config/2, set_config/2]). -export([dirty_read/1]). +-export([table_lookup/2]). -export([r/3, r/2, r_arg/4, rs/1]). -export([enable_cover/0, report_cover/0]). -export([enable_cover/1, report_cover/1]). @@ -60,7 +62,8 @@ -export([sort_field_table/1]). -export([pid_to_string/1, string_to_pid/1]). -export([version_compare/2, version_compare/3]). --export([recursive_delete/1, dict_cons/3, unlink_and_capture_exit/1]). +-export([recursive_delete/1, dict_cons/3, orddict_cons/3, + unlink_and_capture_exit/1]). -import(mnesia). -import(lists). @@ -71,61 +74,91 @@ -ifdef(use_specs). --include_lib("kernel/include/inet.hrl"). +-export_type([resource_name/0]). --type(ok_or_error() :: 'ok' | {'error', any()}). +-type(ok_or_error() :: rabbit_types:ok_or_error(any())). +-type(thunk(T) :: fun(() -> T)). +-type(resource_name() :: binary()). --spec(method_record_type/1 :: (tuple()) -> atom()). +-spec(method_record_type/1 :: (rabbit_framing:amqp_method_record()) + -> rabbit_framing:amqp_method_name()). -spec(polite_pause/0 :: () -> 'done'). -spec(polite_pause/1 :: (non_neg_integer()) -> 'done'). --spec(die/1 :: (atom()) -> no_return()). --spec(frame_error/2 :: (atom(), binary()) -> no_return()). --spec(amqp_error/4 :: (atom(), string(), [any()], atom()) -> amqp_error()). --spec(protocol_error/3 :: (atom(), string(), [any()]) -> no_return()). --spec(protocol_error/4 :: (atom(), string(), [any()], atom()) -> no_return()). --spec(not_found/1 :: (r(atom())) -> no_return()). --spec(get_config/1 :: (atom()) -> {'ok', any()} | not_found()). +-spec(die/1 :: (rabbit_framing:amqp_exception()) -> no_return()). +-spec(frame_error/2 :: (rabbit_framing:amqp_method_name(), binary()) + -> no_return()). +-spec(amqp_error/4 :: + (rabbit_framing:amqp_exception(), string(), [any()], + rabbit_framing:amqp_method_name()) + -> rabbit_types:amqp_error()). +-spec(protocol_error/3 :: (rabbit_framing:amqp_exception(), string(), [any()]) + -> no_return()). +-spec(protocol_error/4 :: + (rabbit_framing:amqp_exception(), string(), [any()], + rabbit_framing:amqp_method_name()) + -> no_return()). +-spec(protocol_error/1 :: (rabbit_types:amqp_error()) -> no_return()). +-spec(not_found/1 :: (rabbit_types:r(atom())) -> no_return()). +-spec(assert_args_equivalence/4 :: (rabbit_framing:amqp_table(), + rabbit_framing:amqp_table(), + rabbit_types:r(any()), [binary()]) -> + 'ok' | no_return()). +-spec(get_config/1 :: + (atom()) -> rabbit_types:ok_or_error2(any(), 'not_found')). -spec(get_config/2 :: (atom(), A) -> A). -spec(set_config/2 :: (atom(), any()) -> 'ok'). --spec(dirty_read/1 :: ({atom(), any()}) -> {'ok', any()} | not_found()). --spec(r/3 :: (vhost() | r(atom()), K, resource_name()) -> - r(K) when is_subtype(K, atom())). --spec(r/2 :: (vhost(), K) -> #resource{virtual_host :: vhost(), - kind :: K, - name :: '_'} - when is_subtype(K, atom())). --spec(r_arg/4 :: (vhost() | r(atom()), K, amqp_table(), binary()) -> - undefined | r(K) when is_subtype(K, atom())). --spec(rs/1 :: (r(atom())) -> string()). +-spec(dirty_read/1 :: + ({atom(), any()}) -> rabbit_types:ok_or_error2(any(), 'not_found')). +-spec(table_lookup/2 :: + (rabbit_framing:amqp_table(), binary()) + -> 'undefined' | {rabbit_framing:amqp_field_type(), any()}). +-spec(r/2 :: (rabbit_types:vhost(), K) + -> rabbit_types:r3(rabbit_types:vhost(), K, '_') + when is_subtype(K, atom())). +-spec(r/3 :: + (rabbit_types:vhost() | rabbit_types:r(atom()), K, resource_name()) + -> rabbit_types:r3(rabbit_types:vhost(), K, resource_name()) + when is_subtype(K, atom())). +-spec(r_arg/4 :: + (rabbit_types:vhost() | rabbit_types:r(atom()), K, + rabbit_framing:amqp_table(), binary()) + -> undefined | rabbit_types:r(K) + when is_subtype(K, atom())). +-spec(rs/1 :: (rabbit_types:r(atom())) -> string()). -spec(enable_cover/0 :: () -> ok_or_error()). -spec(start_cover/1 :: ([{string(), string()} | string()]) -> 'ok'). -spec(report_cover/0 :: () -> 'ok'). --spec(enable_cover/1 :: (file_path()) -> ok_or_error()). --spec(report_cover/1 :: (file_path()) -> 'ok'). +-spec(enable_cover/1 :: (file:filename()) -> ok_or_error()). +-spec(report_cover/1 :: (file:filename()) -> 'ok'). -spec(throw_on_error/2 :: - (atom(), thunk({error, any()} | {ok, A} | A)) -> A). + (atom(), thunk(rabbit_types:error(any()) | {ok, A} | A)) -> A). -spec(with_exit_handler/2 :: (thunk(A), thunk(A)) -> A). -spec(filter_exit_map/2 :: (fun ((A) -> B), [A]) -> [B]). --spec(with_user/2 :: (username(), thunk(A)) -> A). --spec(with_vhost/2 :: (vhost(), thunk(A)) -> A). --spec(with_user_and_vhost/3 :: (username(), vhost(), thunk(A)) -> A). +-spec(with_user/2 :: (rabbit_access_control:username(), thunk(A)) -> A). +-spec(with_vhost/2 :: (rabbit_types:vhost(), thunk(A)) -> A). +-spec(with_user_and_vhost/3 :: + (rabbit_access_control:username(), rabbit_types:vhost(), thunk(A)) + -> A). -spec(execute_mnesia_transaction/1 :: (thunk(A)) -> A). -spec(ensure_ok/2 :: (ok_or_error(), atom()) -> 'ok'). --spec(makenode/1 :: ({string(), string()} | string()) -> erlang_node()). --spec(nodeparts/1 :: (erlang_node() | string()) -> {string(), string()}). +-spec(makenode/1 :: ({string(), string()} | string()) -> node()). +-spec(nodeparts/1 :: (node() | string()) -> {string(), string()}). -spec(cookie_hash/0 :: () -> string()). --spec(tcp_name/3 :: (atom(), ip_address(), ip_port()) -> atom()). +-spec(tcp_name/3 :: + (atom(), inet:ip_address(), rabbit_networking:ip_port()) + -> atom()). -spec(intersperse/2 :: (A, [A]) -> [A]). -spec(upmap/2 :: (fun ((A) -> B), [A]) -> [B]). -spec(map_in_order/2 :: (fun ((A) -> B), [A]) -> [B]). -spec(table_fold/3 :: (fun ((any(), A) -> A), A, atom()) -> A). -spec(dirty_read_all/1 :: (atom()) -> [any()]). --spec(dirty_foreach_key/2 :: (fun ((any()) -> any()), atom()) -> - 'ok' | 'aborted'). --spec(dirty_dump_log/1 :: (file_path()) -> ok_or_error()). --spec(read_term_file/1 :: (file_path()) -> {'ok', [any()]} | {'error', any()}). --spec(write_term_file/2 :: (file_path(), [any()]) -> ok_or_error()). --spec(append_file/2 :: (file_path(), string()) -> ok_or_error()). +-spec(dirty_foreach_key/2 :: (fun ((any()) -> any()), atom()) + -> 'ok' | 'aborted'). +-spec(dirty_dump_log/1 :: (file:filename()) -> ok_or_error()). +-spec(read_term_file/1 :: + (file:filename()) -> {'ok', [any()]} | rabbit_types:error(any())). +-spec(write_term_file/2 :: (file:filename(), [any()]) -> ok_or_error()). +-spec(append_file/2 :: (file:filename(), string()) -> ok_or_error()). -spec(ensure_parent_dirs_exist/1 :: (string()) -> 'ok'). -spec(format_stderr/2 :: (string(), [any()]) -> 'ok'). -spec(start_applications/1 :: ([atom()]) -> 'ok'). @@ -133,15 +166,21 @@ -spec(unfold/2 :: (fun ((A) -> ({'true', B, A} | 'false')), A) -> {[B], A}). -spec(ceil/1 :: (number()) -> integer()). -spec(queue_fold/3 :: (fun ((any(), B) -> B), B, queue()) -> B). --spec(sort_field_table/1 :: (amqp_table()) -> amqp_table()). +-spec(sort_field_table/1 :: + (rabbit_framing:amqp_table()) -> rabbit_framing:amqp_table()). -spec(pid_to_string/1 :: (pid()) -> string()). -spec(string_to_pid/1 :: (string()) -> pid()). -spec(version_compare/2 :: (string(), string()) -> 'lt' | 'eq' | 'gt'). --spec(version_compare/3 :: (string(), string(), - ('lt' | 'lte' | 'eq' | 'gte' | 'gt')) -> boolean()). --spec(recursive_delete/1 :: ([file_path()]) -> - 'ok' | {'error', {file_path(), any()}}). --spec(dict_cons/3 :: (any(), any(), dict()) -> dict()). +-spec(version_compare/3 :: + (string(), string(), ('lt' | 'lte' | 'eq' | 'gte' | 'gt')) + -> boolean()). +-spec(recursive_delete/1 :: + ([file:filename()]) + -> rabbit_types:ok_or_error({file:filename(), any()})). +-spec(dict_cons/3 :: (any(), any(), dict:dictionary()) -> + dict:dictionary()). +-spec(orddict_cons/3 :: (any(), any(), orddict:dictionary()) -> + orddict:dictionary()). -spec(unlink_and_capture_exit/1 :: (pid()) -> 'ok'). -endif. @@ -173,10 +212,27 @@ protocol_error(Name, ExplanationFormat, Params) -> protocol_error(Name, ExplanationFormat, Params, none). protocol_error(Name, ExplanationFormat, Params, Method) -> - exit(amqp_error(Name, ExplanationFormat, Params, Method)). + protocol_error(amqp_error(Name, ExplanationFormat, Params, Method)). + +protocol_error(#amqp_error{} = Error) -> + exit(Error). not_found(R) -> protocol_error(not_found, "no ~s", [rs(R)]). +assert_args_equivalence(Orig, New, Name, Keys) -> + [assert_args_equivalence1(Orig, New, Name, Key) || Key <- Keys], + ok. + +assert_args_equivalence1(Orig, New, Name, Key) -> + case {table_lookup(Orig, Key), table_lookup(New, Key)} of + {Same, Same} -> ok; + {Orig1, New1} -> protocol_error( + not_allowed, + "cannot redeclare ~s with inequivalent args for ~s: " + "required ~w, received ~w", + [rabbit_misc:rs(Name), Key, New1, Orig1]) + end. + get_config(Key) -> case dirty_read({rabbit_config, Key}) of {ok, {rabbit_config, Key, V}} -> {ok, V}; @@ -198,6 +254,12 @@ dirty_read(ReadSpec) -> [] -> {error, not_found} end. +table_lookup(Table, Key) -> + case lists:keysearch(Key, 1, Table) of + {value, {_, TypeBin, ValueBin}} -> {TypeBin, ValueBin}; + false -> undefined + end. + r(#resource{virtual_host = VHostPath}, Kind, Name) when is_binary(Name) -> #resource{virtual_host = VHostPath, kind = Kind, name = Name}; @@ -210,9 +272,9 @@ r(VHostPath, Kind) when is_binary(VHostPath) -> r_arg(#resource{virtual_host = VHostPath}, Kind, Table, Key) -> r_arg(VHostPath, Kind, Table, Key); r_arg(VHostPath, Kind, Table, Key) -> - case lists:keysearch(Key, 1, Table) of - {value, {_, longstr, NameBin}} -> r(VHostPath, Kind, NameBin); - false -> undefined + case table_lookup(Table, Key) of + {longstr, NameBin} -> r(VHostPath, Kind, NameBin); + undefined -> undefined end. rs(#resource{virtual_host = VHostPath, kind = Kind, name = Name}) -> @@ -555,7 +617,7 @@ string_to_pid(Str) -> binary_to_term(<<131,103,NodeEnc/binary,Id:32,Ser:32,0:8>>); nomatch -> throw(Err) - end. + end. version_compare(A, B, lte) -> case version_compare(A, B) of @@ -631,6 +693,9 @@ recursive_delete1(Path) -> dict_cons(Key, Value, Dict) -> dict:update(Key, fun (List) -> [Value | List] end, [Value], Dict). +orddict_cons(Key, Value, Dict) -> + orddict:update(Key, fun (List) -> [Value | List] end, [Value], Dict). + unlink_and_capture_exit(Pid) -> unlink(Pid), receive {'EXIT', Pid, _} -> ok diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl index a0b7aa4e..c808499b 100644 --- a/src/rabbit_mnesia.erl +++ b/src/rabbit_mnesia.erl @@ -29,11 +29,12 @@ %% Contributor(s): ______________________________________. %% + -module(rabbit_mnesia). -export([ensure_mnesia_dir/0, dir/0, status/0, init/0, is_db_empty/0, - cluster/1, reset/0, force_reset/0, is_clustered/0, - empty_ram_only_tables/0]). + cluster/1, force_cluster/1, reset/0, force_reset/0, + is_clustered/0, empty_ram_only_tables/0]). -export([table_names/0]). @@ -47,12 +48,18 @@ -ifdef(use_specs). --spec(status/0 :: () -> [{'nodes' | 'running_nodes', [erlang_node()]}]). --spec(dir/0 :: () -> file_path()). +-export_type([node_type/0]). + +-type(node_type() :: disc_only | disc | ram | unknown). +-spec(status/0 :: () -> [{'nodes', [{node_type(), [node()]}]} | + {'running_nodes', [node()]}]). +-spec(dir/0 :: () -> file:filename()). -spec(ensure_mnesia_dir/0 :: () -> 'ok'). -spec(init/0 :: () -> 'ok'). -spec(is_db_empty/0 :: () -> boolean()). --spec(cluster/1 :: ([erlang_node()]) -> 'ok'). +-spec(cluster/1 :: ([node()]) -> 'ok'). +-spec(force_cluster/1 :: ([node()]) -> 'ok'). +-spec(cluster/2 :: ([node()], boolean()) -> 'ok'). -spec(reset/0 :: () -> 'ok'). -spec(force_reset/0 :: () -> 'ok'). -spec(is_clustered/0 :: () -> boolean()). @@ -64,13 +71,26 @@ %%---------------------------------------------------------------------------- status() -> - [{nodes, mnesia:system_info(db_nodes)}, + [{nodes, case mnesia:system_info(is_running) of + yes -> [{Key, Nodes} || + {Key, CopyType} <- [{disc_only, disc_only_copies}, + {disc, disc_copies}, + {ram, ram_copies}], + begin + Nodes = nodes_of_type(CopyType), + Nodes =/= [] + end]; + no -> case mnesia:system_info(db_nodes) of + [] -> []; + Nodes -> [{unknown, Nodes}] + end + end}, {running_nodes, mnesia:system_info(running_db_nodes)}]. init() -> ok = ensure_mnesia_running(), ok = ensure_mnesia_dir(), - ok = init_db(read_cluster_nodes_config()), + ok = init_db(read_cluster_nodes_config(), true), ok = wait_for_tables(), ok. @@ -78,16 +98,22 @@ is_db_empty() -> lists:all(fun (Tab) -> mnesia:dirty_first(Tab) == '$end_of_table' end, table_names()). +cluster(ClusterNodes) -> + cluster(ClusterNodes, false). +force_cluster(ClusterNodes) -> + cluster(ClusterNodes, true). + %% Alter which disk nodes this node is clustered with. This can be a %% subset of all the disk nodes in the cluster but can (and should) %% include the node itself if it is to be a disk rather than a ram -%% node. -cluster(ClusterNodes) -> +%% node. If Force is false, only connections to online nodes are +%% allowed. +cluster(ClusterNodes, Force) -> ok = ensure_mnesia_not_running(), ok = ensure_mnesia_dir(), rabbit_misc:ensure_ok(mnesia:start(), cannot_start_mnesia), try - ok = init_db(ClusterNodes), + ok = init_db(ClusterNodes, Force), ok = wait_for_tables(), ok = create_cluster_nodes_config(ClusterNodes) after @@ -118,6 +144,15 @@ empty_ram_only_tables() -> %%-------------------------------------------------------------------- +nodes_of_type(Type) -> + %% This function should return the nodes of a certain type (ram, + %% disc or disc_only) in the current cluster. The type of nodes + %% is determined when the cluster is initially configured. + %% Specifically, we check whether a certain table, which we know + %% will be written to disk on a disc node, is stored on disk or in + %% RAM. + mnesia:table_info(rabbit_durable_exchange, Type). + table_definitions() -> [{rabbit_user, [{record_name, user}, @@ -149,6 +184,8 @@ table_definitions() -> [{record_name, reverse_route}, {attributes, record_info(fields, reverse_route)}, {type, ordered_set}]}, + %% Consider the implications to nodes_of_type/1 before altering + %% the next entry. {rabbit_durable_exchange, [{record_name, exchange}, {attributes, record_info(fields, exchange)}, @@ -227,20 +264,9 @@ read_cluster_nodes_config() -> case rabbit_misc:read_term_file(FileName) of {ok, [ClusterNodes]} -> ClusterNodes; {error, enoent} -> - case application:get_env(cluster_config) of + case application:get_env(cluster_nodes) of undefined -> []; - {ok, DefaultFileName} -> - case file:consult(DefaultFileName) of - {ok, [ClusterNodes]} -> ClusterNodes; - {error, enoent} -> - error_logger:warning_msg( - "default cluster config file ~p does not exist~n", - [DefaultFileName]), - []; - {error, Reason} -> - throw({error, {cannot_read_cluster_nodes_config, - DefaultFileName, Reason}}) - end + {ok, ClusterNodes} -> ClusterNodes end; {error, Reason} -> throw({error, {cannot_read_cluster_nodes_config, @@ -259,38 +285,56 @@ delete_cluster_nodes_config() -> %% Take a cluster node config and create the right kind of node - a %% standalone disk node, or disk or ram node connected to the -%% specified cluster nodes. -init_db(ClusterNodes) -> - case mnesia:change_config(extra_db_nodes, ClusterNodes -- [node()]) of - {ok, []} -> - case mnesia:system_info(use_dir) of - true -> - case check_schema_integrity() of - ok -> - ok; - {error, Reason} -> - %% NB: we cannot use rabbit_log here since - %% it may not have been started yet - error_logger:warning_msg( - "schema integrity check failed: ~p~n" - "moving database to backup location " - "and recreating schema from scratch~n", - [Reason]), - ok = move_db(), +%% specified cluster nodes. If Force is false, don't allow +%% connections to offline nodes. +init_db(ClusterNodes, Force) -> + UClusterNodes = lists:usort(ClusterNodes), + ProperClusterNodes = UClusterNodes -- [node()], + case mnesia:change_config(extra_db_nodes, ProperClusterNodes) of + {ok, Nodes} -> + case Force of + false -> + FailedClusterNodes = ProperClusterNodes -- Nodes, + case FailedClusterNodes of + [] -> ok; + _ -> + throw({error, {failed_to_cluster_with, + FailedClusterNodes, + "Mnesia could not connect to some nodes."}}) + end; + _ -> ok + end, + case Nodes of + [] -> + case mnesia:system_info(use_dir) of + true -> + case check_schema_integrity() of + ok -> + ok; + {error, Reason} -> + %% NB: we cannot use rabbit_log here since + %% it may not have been started yet + error_logger:warning_msg( + "schema integrity check failed: ~p~n" + "moving database to backup location " + "and recreating schema from scratch~n", + [Reason]), + ok = move_db(), + ok = create_schema() + end; + false -> ok = create_schema() end; - false -> - ok = create_schema() - end; - {ok, [_|_]} -> - IsDiskNode = ClusterNodes == [] orelse - lists:member(node(), ClusterNodes), - ok = wait_for_replicated_tables(), - ok = create_local_table_copy(schema, disc_copies), - ok = create_local_table_copies(case IsDiskNode of - true -> disc; - false -> ram - end); + [_|_] -> + IsDiskNode = ClusterNodes == [] orelse + lists:member(node(), ClusterNodes), + ok = wait_for_replicated_tables(), + ok = create_local_table_copy(schema, disc_copies), + ok = create_local_table_copies(case IsDiskNode of + true -> disc; + false -> ram + end) + end; {error, Reason} -> %% one reason we may end up here is if we try to join %% nodes together that are currently running standalone or diff --git a/src/rabbit_msg_file.erl b/src/rabbit_msg_file.erl new file mode 100644 index 00000000..4f178439 --- /dev/null +++ b/src/rabbit_msg_file.erl @@ -0,0 +1,136 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developers of the Original Code are LShift Ltd, +%% Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd, +%% Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd +%% are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial +%% Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift +%% Ltd. Portions created by Cohesive Financial Technologies LLC are +%% Copyright (C) 2007-2010 Cohesive Financial Technologies +%% LLC. Portions created by Rabbit Technologies Ltd are Copyright +%% (C) 2007-2010 Rabbit Technologies Ltd. +%% +%% All Rights Reserved. +%% +%% Contributor(s): ______________________________________. +%% + +-module(rabbit_msg_file). + +-export([append/3, read/2, scan/2]). + +%%---------------------------------------------------------------------------- + +-include("rabbit_msg_store.hrl"). + +-define(INTEGER_SIZE_BYTES, 8). +-define(INTEGER_SIZE_BITS, (8 * ?INTEGER_SIZE_BYTES)). +-define(WRITE_OK_SIZE_BITS, 8). +-define(WRITE_OK_MARKER, 255). +-define(FILE_PACKING_ADJUSTMENT, (1 + ?INTEGER_SIZE_BYTES)). +-define(GUID_SIZE_BYTES, 16). +-define(GUID_SIZE_BITS, (8 * ?GUID_SIZE_BYTES)). +-define(SCAN_BLOCK_SIZE, 4194304). %% 4MB + +%%---------------------------------------------------------------------------- + +-ifdef(use_specs). + +-type(io_device() :: any()). +-type(position() :: non_neg_integer()). +-type(msg_size() :: non_neg_integer()). +-type(file_size() :: non_neg_integer()). + +-spec(append/3 :: (io_device(), rabbit_guid:guid(), msg()) -> + rabbit_types:ok_or_error2(msg_size(), any())). +-spec(read/2 :: (io_device(), msg_size()) -> + rabbit_types:ok_or_error2({rabbit_guid:guid(), msg()}, + any())). +-spec(scan/2 :: (io_device(), file_size()) -> + {'ok', [{rabbit_guid:guid(), msg_size(), position()}], + position()}). + +-endif. + +%%---------------------------------------------------------------------------- + +append(FileHdl, Guid, MsgBody) + when is_binary(Guid) andalso size(Guid) =:= ?GUID_SIZE_BYTES -> + MsgBodyBin = term_to_binary(MsgBody), + MsgBodyBinSize = size(MsgBodyBin), + Size = MsgBodyBinSize + ?GUID_SIZE_BYTES, + case file_handle_cache:append(FileHdl, + <<Size:?INTEGER_SIZE_BITS, + Guid:?GUID_SIZE_BYTES/binary, + MsgBodyBin:MsgBodyBinSize/binary, + ?WRITE_OK_MARKER:?WRITE_OK_SIZE_BITS>>) of + ok -> {ok, Size + ?FILE_PACKING_ADJUSTMENT}; + KO -> KO + end. + +read(FileHdl, TotalSize) -> + Size = TotalSize - ?FILE_PACKING_ADJUSTMENT, + BodyBinSize = Size - ?GUID_SIZE_BYTES, + case file_handle_cache:read(FileHdl, TotalSize) of + {ok, <<Size:?INTEGER_SIZE_BITS, + Guid:?GUID_SIZE_BYTES/binary, + MsgBodyBin:BodyBinSize/binary, + ?WRITE_OK_MARKER:?WRITE_OK_SIZE_BITS>>} -> + {ok, {Guid, binary_to_term(MsgBodyBin)}}; + KO -> KO + end. + +scan(FileHdl, FileSize) when FileSize >= 0 -> + scan(FileHdl, FileSize, <<>>, 0, [], 0). + +scan(_FileHdl, FileSize, _Data, FileSize, Acc, ScanOffset) -> + {ok, Acc, ScanOffset}; +scan(FileHdl, FileSize, Data, ReadOffset, Acc, ScanOffset) -> + Read = lists:min([?SCAN_BLOCK_SIZE, (FileSize - ReadOffset)]), + case file_handle_cache:read(FileHdl, Read) of + {ok, Data1} -> + {Data2, Acc1, ScanOffset1} = + scan(<<Data/binary, Data1/binary>>, Acc, ScanOffset), + ReadOffset1 = ReadOffset + size(Data1), + scan(FileHdl, FileSize, Data2, ReadOffset1, Acc1, ScanOffset1); + _KO -> + {ok, Acc, ScanOffset} + end. + +scan(<<>>, Acc, Offset) -> + {<<>>, Acc, Offset}; +scan(<<0:?INTEGER_SIZE_BITS, _Rest/binary>>, Acc, Offset) -> + {<<>>, Acc, Offset}; %% Nothing to do other than stop. +scan(<<Size:?INTEGER_SIZE_BITS, GuidAndMsg:Size/binary, + WriteMarker:?WRITE_OK_SIZE_BITS, Rest/binary>>, Acc, Offset) -> + TotalSize = Size + ?FILE_PACKING_ADJUSTMENT, + case WriteMarker of + ?WRITE_OK_MARKER -> + %% Here we take option 5 from + %% http://www.erlang.org/cgi-bin/ezmlm-cgi?2:mss:1569 in + %% which we read the Guid as a number, and then convert it + %% back to a binary in order to work around bugs in + %% Erlang's GC. + <<GuidNum:?GUID_SIZE_BITS, _Msg/binary>> = + <<GuidAndMsg:Size/binary>>, + <<Guid:?GUID_SIZE_BYTES/binary>> = <<GuidNum:?GUID_SIZE_BITS>>, + scan(Rest, [{Guid, TotalSize, Offset} | Acc], Offset + TotalSize); + _ -> + scan(Rest, Acc, Offset + TotalSize) + end; +scan(Data, Acc, Offset) -> + {Data, Acc, Offset}. diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl new file mode 100644 index 00000000..63100571 --- /dev/null +++ b/src/rabbit_msg_store.erl @@ -0,0 +1,1731 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developers of the Original Code are LShift Ltd, +%% Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd, +%% Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd +%% are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial +%% Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift +%% Ltd. Portions created by Cohesive Financial Technologies LLC are +%% Copyright (C) 2007-2010 Cohesive Financial Technologies +%% LLC. Portions created by Rabbit Technologies Ltd are Copyright +%% (C) 2007-2010 Rabbit Technologies Ltd. +%% +%% All Rights Reserved. +%% +%% Contributor(s): ______________________________________. +%% + +-module(rabbit_msg_store). + +-behaviour(gen_server2). + +-export([start_link/4, write/4, read/3, contains/2, remove/2, release/2, + sync/3, client_init/2, client_terminate/1, + client_delete_and_terminate/3, successfully_recovered_state/1]). + +-export([sync/1, gc_done/4, set_maximum_since_use/2, gc/3]). %% internal + +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). + +%%---------------------------------------------------------------------------- + +-include("rabbit_msg_store.hrl"). + +-define(SYNC_INTERVAL, 5). %% milliseconds +-define(CLEAN_FILENAME, "clean.dot"). +-define(FILE_SUMMARY_FILENAME, "file_summary.ets"). + +-define(BINARY_MODE, [raw, binary]). +-define(READ_MODE, [read]). +-define(READ_AHEAD_MODE, [read_ahead | ?READ_MODE]). +-define(WRITE_MODE, [write]). + +-define(FILE_EXTENSION, ".rdq"). +-define(FILE_EXTENSION_TMP, ".rdt"). + +-define(HANDLE_CACHE_BUFFER_SIZE, 1048576). %% 1MB + +%%---------------------------------------------------------------------------- + +-record(msstate, + { dir, %% store directory + index_module, %% the module for index ops + index_state, %% where are messages? + current_file, %% current file name as number + current_file_handle, %% current file handle since the last fsync? + file_handle_cache, %% file handle cache + on_sync, %% pending sync requests + sync_timer_ref, %% TRef for our interval timer + sum_valid_data, %% sum of valid data in all files + sum_file_size, %% sum of file sizes + pending_gc_completion, %% things to do once GC completes + gc_active, %% is the GC currently working? + gc_pid, %% pid of our GC + file_handles_ets, %% tid of the shared file handles table + file_summary_ets, %% tid of the file summary table + dedup_cache_ets, %% tid of dedup cache table + cur_file_cache_ets, %% tid of current file cache table + client_refs, %% set of references of all registered clients + successfully_recovered, %% boolean: did we recover state? + file_size_limit %% how big are our files allowed to get? + }). + +-record(client_msstate, + { file_handle_cache, + index_state, + index_module, + dir, + gc_pid, + file_handles_ets, + file_summary_ets, + dedup_cache_ets, + cur_file_cache_ets + }). + +-record(file_summary, + {file, valid_total_size, contiguous_top, left, right, file_size, + locked, readers}). + +%%---------------------------------------------------------------------------- + +-ifdef(use_specs). + +-type(server() :: pid() | atom()). +-type(file_num() :: non_neg_integer()). +-type(client_msstate() :: #client_msstate { + file_handle_cache :: dict:dictionary(), + index_state :: any(), + index_module :: atom(), + dir :: file:filename(), + gc_pid :: pid(), + file_handles_ets :: ets:tid(), + file_summary_ets :: ets:tid(), + dedup_cache_ets :: ets:tid(), + cur_file_cache_ets :: ets:tid() }). +-type(startup_fun_state() :: + {(fun ((A) -> 'finished' | {rabbit_guid:guid(), non_neg_integer(), A})), + A}). + +-spec(start_link/4 :: + (atom(), file:filename(), [binary()] | 'undefined', + startup_fun_state()) -> + 'ignore' | rabbit_types:ok_or_error2(pid(), any())). +-spec(write/4 :: (server(), rabbit_guid:guid(), msg(), client_msstate()) -> + rabbit_types:ok(client_msstate())). +-spec(read/3 :: (server(), rabbit_guid:guid(), client_msstate()) -> + {rabbit_types:ok(msg()) | 'not_found', client_msstate()}). +-spec(contains/2 :: (server(), rabbit_guid:guid()) -> boolean()). +-spec(remove/2 :: (server(), [rabbit_guid:guid()]) -> 'ok'). +-spec(release/2 :: (server(), [rabbit_guid:guid()]) -> 'ok'). +-spec(sync/3 :: (server(), [rabbit_guid:guid()], fun (() -> any())) -> 'ok'). +-spec(gc_done/4 :: (server(), non_neg_integer(), file_num(), file_num()) -> + 'ok'). +-spec(set_maximum_since_use/2 :: (server(), non_neg_integer()) -> 'ok'). +-spec(client_init/2 :: (server(), binary()) -> client_msstate()). +-spec(client_terminate/1 :: (client_msstate()) -> 'ok'). +-spec(client_delete_and_terminate/3 :: + (client_msstate(), server(), binary()) -> 'ok'). +-spec(successfully_recovered_state/1 :: (server()) -> boolean()). + +-spec(gc/3 :: (non_neg_integer(), non_neg_integer(), + {ets:tid(), file:filename(), atom(), any()}) -> + 'concurrent_readers' | non_neg_integer()). + +-endif. + +%%---------------------------------------------------------------------------- + +%% We run GC whenever (garbage / sum_file_size) > ?GARBAGE_FRACTION +%% It is not recommended to set this to < 0.5 +-define(GARBAGE_FRACTION, 0.5). + +%% The components: +%% +%% Index: this is a mapping from Guid to #msg_location{}: +%% {Guid, RefCount, File, Offset, TotalSize} +%% By default, it's in ets, but it's also pluggable. +%% FileSummary: this is an ets table which maps File to #file_summary{}: +%% {File, ValidTotalSize, ContiguousTop, Left, Right, +%% FileSize, Locked, Readers} +%% +%% The basic idea is that messages are appended to the current file up +%% until that file becomes too big (> file_size_limit). At that point, +%% the file is closed and a new file is created on the _right_ of the +%% old file which is used for new messages. Files are named +%% numerically ascending, thus the file with the lowest name is the +%% eldest file. +%% +%% We need to keep track of which messages are in which files (this is +%% the Index); how much useful data is in each file and which files +%% are on the left and right of each other. This is the purpose of the +%% FileSummary ets table. +%% +%% As messages are removed from files, holes appear in these +%% files. The field ValidTotalSize contains the total amount of useful +%% data left in the file, whilst ContiguousTop contains the amount of +%% valid data right at the start of each file. These are needed for +%% garbage collection. +%% +%% When we discover that a file is now empty, we delete it. When we +%% discover that it can be combined with the useful data in either its +%% left or right neighbour, and overall, across all the files, we have +%% ((the amount of garbage) / (the sum of all file sizes)) > +%% ?GARBAGE_FRACTION, we start a garbage collection run concurrently, +%% which will compact the two files together. This keeps disk +%% utilisation high and aids performance. We deliberately do this +%% lazily in order to prevent doing GC on files which are soon to be +%% emptied (and hence deleted) soon. +%% +%% Given the compaction between two files, the left file (i.e. elder +%% file) is considered the ultimate destination for the good data in +%% the right file. If necessary, the good data in the left file which +%% is fragmented throughout the file is written out to a temporary +%% file, then read back in to form a contiguous chunk of good data at +%% the start of the left file. Thus the left file is garbage collected +%% and compacted. Then the good data from the right file is copied +%% onto the end of the left file. Index and FileSummary tables are +%% updated. +%% +%% On non-clean startup, we scan the files we discover, dealing with +%% the possibilites of a crash having occured during a compaction +%% (this consists of tidyup - the compaction is deliberately designed +%% such that data is duplicated on disk rather than risking it being +%% lost), and rebuild the FileSummary ets table and Index. +%% +%% So, with this design, messages move to the left. Eventually, they +%% should end up in a contiguous block on the left and are then never +%% rewritten. But this isn't quite the case. If in a file there is one +%% message that is being ignored, for some reason, and messages in the +%% file to the right and in the current block are being read all the +%% time then it will repeatedly be the case that the good data from +%% both files can be combined and will be written out to a new +%% file. Whenever this happens, our shunned message will be rewritten. +%% +%% So, provided that we combine messages in the right order, +%% (i.e. left file, bottom to top, right file, bottom to top), +%% eventually our shunned message will end up at the bottom of the +%% left file. The compaction/combining algorithm is smart enough to +%% read in good data from the left file that is scattered throughout +%% (i.e. C and D in the below diagram), then truncate the file to just +%% above B (i.e. truncate to the limit of the good contiguous region +%% at the start of the file), then write C and D on top and then write +%% E, F and G from the right file on top. Thus contiguous blocks of +%% good data at the bottom of files are not rewritten (yes, this is +%% the data the size of which is tracked by the ContiguousTop +%% variable. Judicious use of a mirror is required). +%% +%% +-------+ +-------+ +-------+ +%% | X | | G | | G | +%% +-------+ +-------+ +-------+ +%% | D | | X | | F | +%% +-------+ +-------+ +-------+ +%% | X | | X | | E | +%% +-------+ +-------+ +-------+ +%% | C | | F | ===> | D | +%% +-------+ +-------+ +-------+ +%% | X | | X | | C | +%% +-------+ +-------+ +-------+ +%% | B | | X | | B | +%% +-------+ +-------+ +-------+ +%% | A | | E | | A | +%% +-------+ +-------+ +-------+ +%% left right left +%% +%% From this reasoning, we do have a bound on the number of times the +%% message is rewritten. From when it is inserted, there can be no +%% files inserted between it and the head of the queue, and the worst +%% case is that everytime it is rewritten, it moves one position lower +%% in the file (for it to stay at the same position requires that +%% there are no holes beneath it, which means truncate would be used +%% and so it would not be rewritten at all). Thus this seems to +%% suggest the limit is the number of messages ahead of it in the +%% queue, though it's likely that that's pessimistic, given the +%% requirements for compaction/combination of files. +%% +%% The other property is that we have is the bound on the lowest +%% utilisation, which should be 50% - worst case is that all files are +%% fractionally over half full and can't be combined (equivalent is +%% alternating full files and files with only one tiny message in +%% them). +%% +%% Messages are reference-counted. When a message with the same guid +%% is written several times we only store it once, and only remove it +%% from the store when it has been removed the same number of times. +%% +%% The reference counts do not persist. Therefore the initialisation +%% function must be provided with a generator that produces ref count +%% deltas for all recovered messages. This is only used on startup +%% when the shutdown was non-clean. +%% +%% Read messages with a reference count greater than one are entered +%% into a message cache. The purpose of the cache is not especially +%% performance, though it can help there too, but prevention of memory +%% explosion. It ensures that as messages with a high reference count +%% are read from several processes they are read back as the same +%% binary object rather than multiples of identical binary +%% objects. +%% +%% Reads can be performed directly by clients without calling to the +%% server. This is safe because multiple file handles can be used to +%% read files. However, locking is used by the concurrent GC to make +%% sure that reads are not attempted from files which are in the +%% process of being garbage collected. +%% +%% The server automatically defers reads, removes and contains calls +%% that occur which refer to files which are currently being +%% GC'd. Contains calls are only deferred in order to ensure they do +%% not overtake removes. +%% +%% The current file to which messages are being written has a +%% write-back cache. This is written to immediately by clients and can +%% be read from by clients too. This means that there are only ever +%% writes made to the current file, thus eliminating delays due to +%% flushing write buffers in order to be able to safely read from the +%% current file. The one exception to this is that on start up, the +%% cache is not populated with msgs found in the current file, and +%% thus in this case only, reads may have to come from the file +%% itself. The effect of this is that even if the msg_store process is +%% heavily overloaded, clients can still write and read messages with +%% very low latency and not block at all. +%% +%% For notes on Clean Shutdown and startup, see documentation in +%% variable_queue. + +%%---------------------------------------------------------------------------- +%% public API +%%---------------------------------------------------------------------------- + +start_link(Server, Dir, ClientRefs, StartupFunState) -> + gen_server2:start_link({local, Server}, ?MODULE, + [Server, Dir, ClientRefs, StartupFunState], + [{timeout, infinity}]). + +write(Server, Guid, Msg, + CState = #client_msstate { cur_file_cache_ets = CurFileCacheEts }) -> + ok = update_msg_cache(CurFileCacheEts, Guid, Msg), + {gen_server2:cast(Server, {write, Guid, Msg}), CState}. + +read(Server, Guid, + CState = #client_msstate { dedup_cache_ets = DedupCacheEts, + cur_file_cache_ets = CurFileCacheEts }) -> + %% 1. Check the dedup cache + case fetch_and_increment_cache(DedupCacheEts, Guid) of + not_found -> + %% 2. Check the cur file cache + case ets:lookup(CurFileCacheEts, Guid) of + [] -> + Defer = fun() -> {gen_server2:pcall( + Server, 2, {read, Guid}, infinity), + CState} end, + case index_lookup(Guid, CState) of + not_found -> Defer(); + MsgLocation -> client_read1(Server, MsgLocation, Defer, + CState) + end; + [{Guid, Msg, _CacheRefCount}] -> + %% Although we've found it, we don't know the + %% refcount, so can't insert into dedup cache + {{ok, Msg}, CState} + end; + Msg -> + {{ok, Msg}, CState} + end. + +contains(Server, Guid) -> gen_server2:call(Server, {contains, Guid}, infinity). +remove(_Server, []) -> ok; +remove(Server, Guids) -> gen_server2:cast(Server, {remove, Guids}). +release(_Server, []) -> ok; +release(Server, Guids) -> gen_server2:cast(Server, {release, Guids}). +sync(Server, Guids, K) -> gen_server2:cast(Server, {sync, Guids, K}). +sync(Server) -> gen_server2:pcast(Server, 8, sync). %% internal + +gc_done(Server, Reclaimed, Source, Destination) -> + gen_server2:pcast(Server, 8, {gc_done, Reclaimed, Source, Destination}). + +set_maximum_since_use(Server, Age) -> + gen_server2:pcast(Server, 8, {set_maximum_since_use, Age}). + +client_init(Server, Ref) -> + {IState, IModule, Dir, GCPid, + FileHandlesEts, FileSummaryEts, DedupCacheEts, CurFileCacheEts} = + gen_server2:call(Server, {new_client_state, Ref}, infinity), + #client_msstate { file_handle_cache = dict:new(), + index_state = IState, + index_module = IModule, + dir = Dir, + gc_pid = GCPid, + file_handles_ets = FileHandlesEts, + file_summary_ets = FileSummaryEts, + dedup_cache_ets = DedupCacheEts, + cur_file_cache_ets = CurFileCacheEts }. + +client_terminate(CState) -> + close_all_handles(CState), + ok. + +client_delete_and_terminate(CState, Server, Ref) -> + ok = client_terminate(CState), + ok = gen_server2:call(Server, {delete_client, Ref}, infinity). + +successfully_recovered_state(Server) -> + gen_server2:call(Server, successfully_recovered_state, infinity). + +%%---------------------------------------------------------------------------- +%% Client-side-only helpers +%%---------------------------------------------------------------------------- + +client_read1(Server, + #msg_location { guid = Guid, file = File } = MsgLocation, + Defer, + CState = #client_msstate { file_summary_ets = FileSummaryEts }) -> + case ets:lookup(FileSummaryEts, File) of + [] -> %% File has been GC'd and no longer exists. Go around again. + read(Server, Guid, CState); + [#file_summary { locked = Locked, right = Right }] -> + client_read2(Server, Locked, Right, MsgLocation, Defer, CState) + end. + +client_read2(_Server, false, undefined, _MsgLocation, Defer, _CState) -> + %% Although we've already checked both caches and not found the + %% message there, the message is apparently in the + %% current_file. We can only arrive here if we are trying to read + %% a message which we have not written, which is very odd, so just + %% defer. + %% + %% OR, on startup, the cur_file_cache is not populated with the + %% contents of the current file, thus reads from the current file + %% will end up here and will need to be deferred. + Defer(); +client_read2(_Server, true, _Right, _MsgLocation, Defer, _CState) -> + %% Of course, in the mean time, the GC could have run and our msg + %% is actually in a different file, unlocked. However, defering is + %% the safest and simplest thing to do. + Defer(); +client_read2(Server, false, _Right, + MsgLocation = #msg_location { guid = Guid, file = File }, + Defer, + CState = #client_msstate { file_summary_ets = FileSummaryEts }) -> + %% It's entirely possible that everything we're doing from here on + %% is for the wrong file, or a non-existent file, as a GC may have + %% finished. + safe_ets_update_counter( + FileSummaryEts, File, {#file_summary.readers, +1}, + fun (_) -> client_read3(Server, MsgLocation, Defer, CState) end, + fun () -> read(Server, Guid, CState) end). + +client_read3(Server, #msg_location { guid = Guid, file = File }, Defer, + CState = #client_msstate { file_handles_ets = FileHandlesEts, + file_summary_ets = FileSummaryEts, + dedup_cache_ets = DedupCacheEts, + gc_pid = GCPid }) -> + Release = + fun() -> ok = case ets:update_counter(FileSummaryEts, File, + {#file_summary.readers, -1}) of + 0 -> case ets:lookup(FileSummaryEts, File) of + [#file_summary { locked = true }] -> + rabbit_msg_store_gc:no_readers( + GCPid, File); + _ -> ok + end; + _ -> ok + end + end, + %% If a GC involving the file hasn't already started, it won't + %% start now. Need to check again to see if we've been locked in + %% the meantime, between lookup and update_counter (thus GC + %% started before our +1. In fact, it could have finished by now + %% too). + case ets:lookup(FileSummaryEts, File) of + [] -> %% GC has deleted our file, just go round again. + read(Server, Guid, CState); + [#file_summary { locked = true }] -> + %% If we get a badarg here, then the GC has finished and + %% deleted our file. Try going around again. Otherwise, + %% just defer. + %% + %% badarg scenario: we lookup, msg_store locks, GC starts, + %% GC ends, we +1 readers, msg_store ets:deletes (and + %% unlocks the dest) + try Release(), + Defer() + catch error:badarg -> read(Server, Guid, CState) + end; + [#file_summary { locked = false }] -> + %% Ok, we're definitely safe to continue - a GC involving + %% the file cannot start up now, and isn't running, so + %% nothing will tell us from now on to close the handle if + %% it's already open. + %% + %% Finally, we need to recheck that the msg is still at + %% the same place - it's possible an entire GC ran between + %% us doing the lookup and the +1 on the readers. (Same as + %% badarg scenario above, but we don't have a missing file + %% - we just have the /wrong/ file). + case index_lookup(Guid, CState) of + #msg_location { file = File } = MsgLocation -> + %% Still the same file. + mark_handle_open(FileHandlesEts, File), + + CState1 = close_all_indicated(CState), + {Msg, CState2} = %% This will never be the current file + read_from_disk(MsgLocation, CState1, DedupCacheEts), + Release(), %% this MUST NOT fail with badarg + {{ok, Msg}, CState2}; + MsgLocation -> %% different file! + Release(), %% this MUST NOT fail with badarg + client_read1(Server, MsgLocation, Defer, CState) + end + end. + +%%---------------------------------------------------------------------------- +%% gen_server callbacks +%%---------------------------------------------------------------------------- + +init([Server, BaseDir, ClientRefs, {MsgRefDeltaGen, MsgRefDeltaGenInit}]) -> + process_flag(trap_exit, true), + + ok = file_handle_cache:register_callback(?MODULE, set_maximum_since_use, + [self()]), + + Dir = filename:join(BaseDir, atom_to_list(Server)), + + {ok, IndexModule} = application:get_env(msg_store_index_module), + rabbit_log:info("~w: using ~p to provide index~n", [Server, IndexModule]), + + {AllCleanShutdown, IndexState, ClientRefs1} = + recover_index_and_client_refs(IndexModule, ClientRefs, Dir, Server), + + {FileSummaryRecovered, FileSummaryEts} = + recover_file_summary(AllCleanShutdown, Dir, Server), + + DedupCacheEts = ets:new(rabbit_msg_store_dedup_cache, [set, public]), + FileHandlesEts = ets:new(rabbit_msg_store_shared_file_handles, + [ordered_set, public]), + CurFileCacheEts = ets:new(rabbit_msg_store_cur_file, [set, public]), + + {ok, FileSizeLimit} = application:get_env(msg_store_file_size_limit), + + State = #msstate { dir = Dir, + index_module = IndexModule, + index_state = IndexState, + current_file = 0, + current_file_handle = undefined, + file_handle_cache = dict:new(), + on_sync = [], + sync_timer_ref = undefined, + sum_valid_data = 0, + sum_file_size = 0, + pending_gc_completion = [], + gc_active = false, + gc_pid = undefined, + file_handles_ets = FileHandlesEts, + file_summary_ets = FileSummaryEts, + dedup_cache_ets = DedupCacheEts, + cur_file_cache_ets = CurFileCacheEts, + client_refs = ClientRefs1, + successfully_recovered = AllCleanShutdown, + file_size_limit = FileSizeLimit + }, + + ok = case AllCleanShutdown of + true -> ok; + false -> count_msg_refs(MsgRefDeltaGen, MsgRefDeltaGenInit, State) + end, + + FileNames = + sort_file_names(filelib:wildcard("*" ++ ?FILE_EXTENSION, Dir)), + TmpFileNames = + sort_file_names(filelib:wildcard("*" ++ ?FILE_EXTENSION_TMP, Dir)), + ok = recover_crashed_compactions(Dir, FileNames, TmpFileNames), + + %% There should be no more tmp files now, so go ahead and load the + %% whole lot + Files = [filename_to_num(FileName) || FileName <- FileNames], + {Offset, State1 = #msstate { current_file = CurFile }} = + build_index(FileSummaryRecovered, Files, State), + + %% read is only needed so that we can seek + {ok, CurHdl} = open_file(Dir, filenum_to_name(CurFile), + [read | ?WRITE_MODE]), + {ok, Offset} = file_handle_cache:position(CurHdl, Offset), + ok = file_handle_cache:truncate(CurHdl), + + {ok, GCPid} = rabbit_msg_store_gc:start_link(Dir, IndexState, IndexModule, + FileSummaryEts), + + {ok, maybe_compact( + State1 #msstate { current_file_handle = CurHdl, gc_pid = GCPid }), + hibernate, + {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. + +handle_call({read, Guid}, From, State) -> + State1 = read_message(Guid, From, State), + noreply(State1); + +handle_call({contains, Guid}, From, State) -> + State1 = contains_message(Guid, From, State), + noreply(State1); + +handle_call({new_client_state, CRef}, _From, + State = #msstate { dir = Dir, + index_state = IndexState, + index_module = IndexModule, + file_handles_ets = FileHandlesEts, + file_summary_ets = FileSummaryEts, + dedup_cache_ets = DedupCacheEts, + cur_file_cache_ets = CurFileCacheEts, + client_refs = ClientRefs, + gc_pid = GCPid }) -> + reply({IndexState, IndexModule, Dir, GCPid, + FileHandlesEts, FileSummaryEts, DedupCacheEts, CurFileCacheEts}, + State #msstate { client_refs = sets:add_element(CRef, ClientRefs) }); + +handle_call(successfully_recovered_state, _From, State) -> + reply(State #msstate.successfully_recovered, State); + +handle_call({delete_client, CRef}, _From, + State = #msstate { client_refs = ClientRefs }) -> + reply(ok, + State #msstate { client_refs = sets:del_element(CRef, ClientRefs) }). + +handle_cast({write, Guid, Msg}, + State = #msstate { current_file_handle = CurHdl, + current_file = CurFile, + sum_valid_data = SumValid, + sum_file_size = SumFileSize, + file_summary_ets = FileSummaryEts, + cur_file_cache_ets = CurFileCacheEts }) -> + true = 0 =< ets:update_counter(CurFileCacheEts, Guid, {3, -1}), + case index_lookup(Guid, State) of + not_found -> + %% New message, lots to do + {ok, CurOffset} = file_handle_cache:current_virtual_offset(CurHdl), + {ok, TotalSize} = rabbit_msg_file:append(CurHdl, Guid, Msg), + ok = index_insert(#msg_location { + guid = Guid, ref_count = 1, file = CurFile, + offset = CurOffset, total_size = TotalSize }, + State), + [#file_summary { valid_total_size = ValidTotalSize, + contiguous_top = ContiguousTop, + right = undefined, + locked = false, + file_size = FileSize }] = + ets:lookup(FileSummaryEts, CurFile), + ValidTotalSize1 = ValidTotalSize + TotalSize, + ContiguousTop1 = case CurOffset =:= ContiguousTop of + true -> ValidTotalSize1; + false -> ContiguousTop + end, + true = ets:update_element( + FileSummaryEts, CurFile, + [{#file_summary.valid_total_size, ValidTotalSize1}, + {#file_summary.contiguous_top, ContiguousTop1}, + {#file_summary.file_size, FileSize + TotalSize}]), + NextOffset = CurOffset + TotalSize, + noreply( + maybe_roll_to_new_file( + NextOffset, State #msstate { + sum_valid_data = SumValid + TotalSize, + sum_file_size = SumFileSize + TotalSize })); + #msg_location { ref_count = RefCount } -> + %% We already know about it, just update counter. Only + %% update field otherwise bad interaction with concurrent GC + ok = index_update_fields(Guid, + {#msg_location.ref_count, RefCount + 1}, + State), + noreply(State) + end; + +handle_cast({remove, Guids}, State) -> + State1 = lists:foldl( + fun (Guid, State2) -> remove_message(Guid, State2) end, + State, Guids), + noreply(maybe_compact(State1)); + +handle_cast({release, Guids}, State = + #msstate { dedup_cache_ets = DedupCacheEts }) -> + lists:foreach( + fun (Guid) -> decrement_cache(DedupCacheEts, Guid) end, Guids), + noreply(State); + +handle_cast({sync, Guids, K}, + State = #msstate { current_file = CurFile, + current_file_handle = CurHdl, + on_sync = Syncs }) -> + {ok, SyncOffset} = file_handle_cache:last_sync_offset(CurHdl), + case lists:any(fun (Guid) -> + #msg_location { file = File, offset = Offset } = + index_lookup(Guid, State), + File =:= CurFile andalso Offset >= SyncOffset + end, Guids) of + false -> K(), + noreply(State); + true -> noreply(State #msstate { on_sync = [K | Syncs] }) + end; + +handle_cast(sync, State) -> + noreply(internal_sync(State)); + +handle_cast({gc_done, Reclaimed, Src, Dst}, + State = #msstate { sum_file_size = SumFileSize, + gc_active = {Src, Dst}, + file_handles_ets = FileHandlesEts, + file_summary_ets = FileSummaryEts }) -> + %% GC done, so now ensure that any clients that have open fhs to + %% those files close them before using them again. This has to be + %% done here (given it's done in the msg_store, and not the gc), + %% and not when starting up the GC, because if done when starting + %% up the GC, the client could find the close, and close and + %% reopen the fh, whilst the GC is waiting for readers to + %% disappear, before it's actually done the GC. + true = mark_handle_to_close(FileHandlesEts, Src), + true = mark_handle_to_close(FileHandlesEts, Dst), + %% we always move data left, so Src has gone and was on the + %% right, so need to make dest = source.right.left, and also + %% dest.right = source.right + [#file_summary { left = Dst, + right = SrcRight, + locked = true, + readers = 0 }] = ets:lookup(FileSummaryEts, Src), + %% this could fail if SrcRight =:= undefined + ets:update_element(FileSummaryEts, SrcRight, {#file_summary.left, Dst}), + true = ets:update_element(FileSummaryEts, Dst, + [{#file_summary.locked, false}, + {#file_summary.right, SrcRight}]), + true = ets:delete(FileSummaryEts, Src), + noreply( + maybe_compact(run_pending( + State #msstate { sum_file_size = SumFileSize - Reclaimed, + gc_active = false }))); + +handle_cast({set_maximum_since_use, Age}, State) -> + ok = file_handle_cache:set_maximum_since_use(Age), + noreply(State). + +handle_info(timeout, State) -> + noreply(internal_sync(State)); + +handle_info({'EXIT', _Pid, Reason}, State) -> + {stop, Reason, State}. + +terminate(_Reason, State = #msstate { index_state = IndexState, + index_module = IndexModule, + current_file_handle = CurHdl, + gc_pid = GCPid, + file_handles_ets = FileHandlesEts, + file_summary_ets = FileSummaryEts, + dedup_cache_ets = DedupCacheEts, + cur_file_cache_ets = CurFileCacheEts, + client_refs = ClientRefs, + dir = Dir }) -> + %% stop the gc first, otherwise it could be working and we pull + %% out the ets tables from under it. + ok = rabbit_msg_store_gc:stop(GCPid), + State1 = case CurHdl of + undefined -> State; + _ -> State2 = internal_sync(State), + file_handle_cache:close(CurHdl), + State2 + end, + State3 = close_all_handles(State1), + store_file_summary(FileSummaryEts, Dir), + [ets:delete(T) || + T <- [FileSummaryEts, DedupCacheEts, FileHandlesEts, CurFileCacheEts]], + IndexModule:terminate(IndexState), + store_recovery_terms([{client_refs, sets:to_list(ClientRefs)}, + {index_module, IndexModule}], Dir), + State3 #msstate { index_state = undefined, + current_file_handle = undefined }. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%%---------------------------------------------------------------------------- +%% general helper functions +%%---------------------------------------------------------------------------- + +noreply(State) -> + {State1, Timeout} = next_state(State), + {noreply, State1, Timeout}. + +reply(Reply, State) -> + {State1, Timeout} = next_state(State), + {reply, Reply, State1, Timeout}. + +next_state(State = #msstate { on_sync = [], sync_timer_ref = undefined }) -> + {State, hibernate}; +next_state(State = #msstate { sync_timer_ref = undefined }) -> + {start_sync_timer(State), 0}; +next_state(State = #msstate { on_sync = [] }) -> + {stop_sync_timer(State), hibernate}; +next_state(State) -> + {State, 0}. + +start_sync_timer(State = #msstate { sync_timer_ref = undefined }) -> + {ok, TRef} = timer:apply_after(?SYNC_INTERVAL, ?MODULE, sync, [self()]), + State #msstate { sync_timer_ref = TRef }. + +stop_sync_timer(State = #msstate { sync_timer_ref = undefined }) -> + State; +stop_sync_timer(State = #msstate { sync_timer_ref = TRef }) -> + {ok, cancel} = timer:cancel(TRef), + State #msstate { sync_timer_ref = undefined }. + +internal_sync(State = #msstate { current_file_handle = CurHdl, + on_sync = Syncs }) -> + State1 = stop_sync_timer(State), + case Syncs of + [] -> State1; + _ -> ok = file_handle_cache:sync(CurHdl), + lists:foreach(fun (K) -> K() end, lists:reverse(Syncs)), + State1 #msstate { on_sync = [] } + end. + +read_message(Guid, From, + State = #msstate { dedup_cache_ets = DedupCacheEts }) -> + case index_lookup(Guid, State) of + not_found -> + gen_server2:reply(From, not_found), + State; + MsgLocation -> + case fetch_and_increment_cache(DedupCacheEts, Guid) of + not_found -> read_message1(From, MsgLocation, State); + Msg -> gen_server2:reply(From, {ok, Msg}), + State + end + end. + +read_message1(From, #msg_location { guid = Guid, ref_count = RefCount, + file = File, offset = Offset } = MsgLoc, + State = #msstate { current_file = CurFile, + current_file_handle = CurHdl, + file_summary_ets = FileSummaryEts, + dedup_cache_ets = DedupCacheEts, + cur_file_cache_ets = CurFileCacheEts }) -> + case File =:= CurFile of + true -> {Msg, State1} = + %% can return [] if msg in file existed on startup + case ets:lookup(CurFileCacheEts, Guid) of + [] -> + {ok, RawOffSet} = + file_handle_cache:current_raw_offset(CurHdl), + ok = case Offset >= RawOffSet of + true -> file_handle_cache:flush(CurHdl); + false -> ok + end, + read_from_disk(MsgLoc, State, DedupCacheEts); + [{Guid, Msg1, _CacheRefCount}] -> + ok = maybe_insert_into_cache( + DedupCacheEts, RefCount, Guid, Msg1), + {Msg1, State} + end, + gen_server2:reply(From, {ok, Msg}), + State1; + false -> [#file_summary { locked = Locked }] = + ets:lookup(FileSummaryEts, File), + case Locked of + true -> add_to_pending_gc_completion({read, Guid, From}, + State); + false -> {Msg, State1} = + read_from_disk(MsgLoc, State, DedupCacheEts), + gen_server2:reply(From, {ok, Msg}), + State1 + end + end. + +read_from_disk(#msg_location { guid = Guid, ref_count = RefCount, + file = File, offset = Offset, + total_size = TotalSize }, + State, DedupCacheEts) -> + {Hdl, State1} = get_read_handle(File, State), + {ok, Offset} = file_handle_cache:position(Hdl, Offset), + {ok, {Guid, Msg}} = + case rabbit_msg_file:read(Hdl, TotalSize) of + {ok, {Guid, _}} = Obj -> + Obj; + Rest -> + {error, {misread, [{old_state, State}, + {file_num, File}, + {offset, Offset}, + {guid, Guid}, + {read, Rest}, + {proc_dict, get()} + ]}} + end, + ok = maybe_insert_into_cache(DedupCacheEts, RefCount, Guid, Msg), + {Msg, State1}. + +contains_message(Guid, From, State = #msstate { gc_active = GCActive }) -> + case index_lookup(Guid, State) of + not_found -> + gen_server2:reply(From, false), + State; + #msg_location { file = File } -> + case GCActive of + {A, B} when File =:= A orelse File =:= B -> + add_to_pending_gc_completion( + {contains, Guid, From}, State); + _ -> + gen_server2:reply(From, true), + State + end + end. + +remove_message(Guid, State = #msstate { sum_valid_data = SumValid, + file_summary_ets = FileSummaryEts, + dedup_cache_ets = DedupCacheEts }) -> + #msg_location { ref_count = RefCount, file = File, + offset = Offset, total_size = TotalSize } = + index_lookup(Guid, State), + case RefCount of + 1 -> + %% don't remove from CUR_FILE_CACHE_ETS_NAME here because + %% there may be further writes in the mailbox for the same + %% msg. + ok = remove_cache_entry(DedupCacheEts, Guid), + [#file_summary { valid_total_size = ValidTotalSize, + contiguous_top = ContiguousTop, + locked = Locked }] = + ets:lookup(FileSummaryEts, File), + case Locked of + true -> + add_to_pending_gc_completion({remove, Guid}, State); + false -> + ok = index_delete(Guid, State), + ContiguousTop1 = lists:min([ContiguousTop, Offset]), + ValidTotalSize1 = ValidTotalSize - TotalSize, + true = ets:update_element( + FileSummaryEts, File, + [{#file_summary.valid_total_size, ValidTotalSize1}, + {#file_summary.contiguous_top, ContiguousTop1}]), + State1 = delete_file_if_empty(File, State), + State1 #msstate { sum_valid_data = SumValid - TotalSize } + end; + _ when 1 < RefCount -> + ok = decrement_cache(DedupCacheEts, Guid), + %% only update field, otherwise bad interaction with concurrent GC + ok = index_update_fields(Guid, + {#msg_location.ref_count, RefCount - 1}, + State), + State + end. + +add_to_pending_gc_completion( + Op, State = #msstate { pending_gc_completion = Pending }) -> + State #msstate { pending_gc_completion = [Op | Pending] }. + +run_pending(State = #msstate { pending_gc_completion = [] }) -> + State; +run_pending(State = #msstate { pending_gc_completion = Pending }) -> + State1 = State #msstate { pending_gc_completion = [] }, + lists:foldl(fun run_pending/2, State1, lists:reverse(Pending)). + +run_pending({read, Guid, From}, State) -> + read_message(Guid, From, State); +run_pending({contains, Guid, From}, State) -> + contains_message(Guid, From, State); +run_pending({remove, Guid}, State) -> + remove_message(Guid, State). + +safe_ets_update_counter(Tab, Key, UpdateOp, SuccessFun, FailThunk) -> + try + SuccessFun(ets:update_counter(Tab, Key, UpdateOp)) + catch error:badarg -> FailThunk() + end. + +safe_ets_update_counter_ok(Tab, Key, UpdateOp, FailThunk) -> + safe_ets_update_counter(Tab, Key, UpdateOp, fun (_) -> ok end, FailThunk). + +%%---------------------------------------------------------------------------- +%% file helper functions +%%---------------------------------------------------------------------------- + +open_file(Dir, FileName, Mode) -> + file_handle_cache:open(form_filename(Dir, FileName), ?BINARY_MODE ++ Mode, + [{write_buffer, ?HANDLE_CACHE_BUFFER_SIZE}]). + +close_handle(Key, CState = #client_msstate { file_handle_cache = FHC }) -> + CState #client_msstate { file_handle_cache = close_handle(Key, FHC) }; + +close_handle(Key, State = #msstate { file_handle_cache = FHC }) -> + State #msstate { file_handle_cache = close_handle(Key, FHC) }; + +close_handle(Key, FHC) -> + case dict:find(Key, FHC) of + {ok, Hdl} -> ok = file_handle_cache:close(Hdl), + dict:erase(Key, FHC); + error -> FHC + end. + +mark_handle_open(FileHandlesEts, File) -> + %% This is fine to fail (already exists) + ets:insert_new(FileHandlesEts, {{self(), File}, open}), + true. + +mark_handle_to_close(FileHandlesEts, File) -> + [ ets:update_element(FileHandlesEts, Key, {2, close}) + || {Key, open} <- ets:match_object(FileHandlesEts, {{'_', File}, open}) ], + true. + +close_all_indicated(#client_msstate { file_handles_ets = FileHandlesEts } = + CState) -> + Objs = ets:match_object(FileHandlesEts, {{self(), '_'}, close}), + lists:foldl(fun ({Key = {_Self, File}, close}, CStateM) -> + true = ets:delete(FileHandlesEts, Key), + close_handle(File, CStateM) + end, CState, Objs). + +close_all_handles(CState = #client_msstate { file_handles_ets = FileHandlesEts, + file_handle_cache = FHC }) -> + Self = self(), + ok = dict:fold(fun (File, Hdl, ok) -> + true = ets:delete(FileHandlesEts, {Self, File}), + file_handle_cache:close(Hdl) + end, ok, FHC), + CState #client_msstate { file_handle_cache = dict:new() }; + +close_all_handles(State = #msstate { file_handle_cache = FHC }) -> + ok = dict:fold(fun (_Key, Hdl, ok) -> file_handle_cache:close(Hdl) end, + ok, FHC), + State #msstate { file_handle_cache = dict:new() }. + +get_read_handle(FileNum, CState = #client_msstate { file_handle_cache = FHC, + dir = Dir }) -> + {Hdl, FHC2} = get_read_handle(FileNum, FHC, Dir), + {Hdl, CState #client_msstate { file_handle_cache = FHC2 }}; + +get_read_handle(FileNum, State = #msstate { file_handle_cache = FHC, + dir = Dir }) -> + {Hdl, FHC2} = get_read_handle(FileNum, FHC, Dir), + {Hdl, State #msstate { file_handle_cache = FHC2 }}. + +get_read_handle(FileNum, FHC, Dir) -> + case dict:find(FileNum, FHC) of + {ok, Hdl} -> {Hdl, FHC}; + error -> {ok, Hdl} = open_file(Dir, filenum_to_name(FileNum), + ?READ_MODE), + {Hdl, dict:store(FileNum, Hdl, FHC)} + end. + +preallocate(Hdl, FileSizeLimit, FinalPos) -> + {ok, FileSizeLimit} = file_handle_cache:position(Hdl, FileSizeLimit), + ok = file_handle_cache:truncate(Hdl), + {ok, FinalPos} = file_handle_cache:position(Hdl, FinalPos), + ok. + +truncate_and_extend_file(Hdl, Lowpoint, Highpoint) -> + {ok, Lowpoint} = file_handle_cache:position(Hdl, Lowpoint), + ok = file_handle_cache:truncate(Hdl), + ok = preallocate(Hdl, Highpoint, Lowpoint). + +form_filename(Dir, Name) -> filename:join(Dir, Name). + +filenum_to_name(File) -> integer_to_list(File) ++ ?FILE_EXTENSION. + +filename_to_num(FileName) -> list_to_integer(filename:rootname(FileName)). + +sort_file_names(FileNames) -> + lists:sort(fun (A, B) -> filename_to_num(A) < filename_to_num(B) end, + FileNames). + +%%---------------------------------------------------------------------------- +%% message cache helper functions +%%---------------------------------------------------------------------------- + +maybe_insert_into_cache(DedupCacheEts, RefCount, Guid, Msg) + when RefCount > 1 -> + update_msg_cache(DedupCacheEts, Guid, Msg); +maybe_insert_into_cache(_DedupCacheEts, _RefCount, _Guid, _Msg) -> + ok. + +update_msg_cache(CacheEts, Guid, Msg) -> + case ets:insert_new(CacheEts, {Guid, Msg, 1}) of + true -> ok; + false -> safe_ets_update_counter_ok( + CacheEts, Guid, {3, +1}, + fun () -> update_msg_cache(CacheEts, Guid, Msg) end) + end. + +remove_cache_entry(DedupCacheEts, Guid) -> + true = ets:delete(DedupCacheEts, Guid), + ok. + +fetch_and_increment_cache(DedupCacheEts, Guid) -> + case ets:lookup(DedupCacheEts, Guid) of + [] -> + not_found; + [{_Guid, Msg, _RefCount}] -> + safe_ets_update_counter_ok( + DedupCacheEts, Guid, {3, +1}, + %% someone has deleted us in the meantime, insert us + fun () -> ok = update_msg_cache(DedupCacheEts, Guid, Msg) end), + Msg + end. + +decrement_cache(DedupCacheEts, Guid) -> + true = safe_ets_update_counter( + DedupCacheEts, Guid, {3, -1}, + fun (N) when N =< 0 -> true = ets:delete(DedupCacheEts, Guid); + (_N) -> true + end, + %% Guid is not in there because although it's been + %% delivered, it's never actually been read (think: + %% persistent message held in RAM) + fun () -> true end), + ok. + +%%---------------------------------------------------------------------------- +%% index +%%---------------------------------------------------------------------------- + +index_lookup(Key, #client_msstate { index_module = Index, + index_state = State }) -> + Index:lookup(Key, State); + +index_lookup(Key, #msstate { index_module = Index, index_state = State }) -> + Index:lookup(Key, State). + +index_insert(Obj, #msstate { index_module = Index, index_state = State }) -> + Index:insert(Obj, State). + +index_update(Obj, #msstate { index_module = Index, index_state = State }) -> + Index:update(Obj, State). + +index_update_fields(Key, Updates, #msstate { index_module = Index, + index_state = State }) -> + Index:update_fields(Key, Updates, State). + +index_delete(Key, #msstate { index_module = Index, index_state = State }) -> + Index:delete(Key, State). + +index_delete_by_file(File, #msstate { index_module = Index, + index_state = State }) -> + Index:delete_by_file(File, State). + +%%---------------------------------------------------------------------------- +%% shutdown and recovery +%%---------------------------------------------------------------------------- + +recover_index_and_client_refs(IndexModule, undefined, Dir, _Server) -> + ok = rabbit_misc:recursive_delete([Dir]), + ok = filelib:ensure_dir(filename:join(Dir, "nothing")), + {false, IndexModule:new(Dir), sets:new()}; +recover_index_and_client_refs(IndexModule, ClientRefs, Dir, Server) -> + ok = filelib:ensure_dir(filename:join(Dir, "nothing")), + Fresh = fun (ErrorMsg, ErrorArgs) -> + rabbit_log:warning("~w: " ++ ErrorMsg ++ + "~nrebuilding indices from scratch~n", + [Server | ErrorArgs]), + {false, IndexModule:new(Dir), sets:new()} + end, + case read_recovery_terms(Dir) of + {false, Error} -> + Fresh("failed to read recovery terms: ~p", [Error]); + {true, Terms} -> + RecClientRefs = proplists:get_value(client_refs, Terms, []), + RecIndexModule = proplists:get_value(index_module, Terms), + case (lists:sort(ClientRefs) =:= lists:sort(RecClientRefs) + andalso IndexModule =:= RecIndexModule) of + true -> case IndexModule:recover(Dir) of + {ok, IndexState1} -> + ClientRefs1 = sets:from_list(ClientRefs), + {true, IndexState1, ClientRefs1}; + {error, Error} -> + Fresh("failed to recover index: ~p", [Error]) + end; + false -> Fresh("recovery terms differ from present", []) + end + end. + +store_recovery_terms(Terms, Dir) -> + rabbit_misc:write_term_file(filename:join(Dir, ?CLEAN_FILENAME), Terms). + +read_recovery_terms(Dir) -> + Path = filename:join(Dir, ?CLEAN_FILENAME), + case rabbit_misc:read_term_file(Path) of + {ok, Terms} -> case file:delete(Path) of + ok -> {true, Terms}; + {error, Error} -> {false, Error} + end; + {error, Error} -> {false, Error} + end. + +store_file_summary(Tid, Dir) -> + ok = ets:tab2file(Tid, filename:join(Dir, ?FILE_SUMMARY_FILENAME), + [{extended_info, [object_count]}]). + +recover_file_summary(false, _Dir, _Server) -> + %% TODO: the only reason for this to be an *ordered*_set is so + %% that a) maybe_compact can start a traversal from the eldest + %% file, and b) build_index in fast recovery mode can easily + %% identify the current file. It's awkward to have both that + %% odering and the left/right pointers in the entries - replacing + %% the former with some additional bit of state would be easy, but + %% ditching the latter would be neater. + {false, ets:new(rabbit_msg_store_file_summary, + [ordered_set, public, {keypos, #file_summary.file}])}; +recover_file_summary(true, Dir, Server) -> + Path = filename:join(Dir, ?FILE_SUMMARY_FILENAME), + case ets:file2tab(Path) of + {ok, Tid} -> file:delete(Path), + {true, Tid}; + {error, Error} -> rabbit_log:warning( + "~w: failed to recover file summary: ~p~n" + "rebuilding~n", [Server, Error]), + recover_file_summary(false, Dir, Server) + end. + +count_msg_refs(Gen, Seed, State) -> + case Gen(Seed) of + finished -> + ok; + {_Guid, 0, Next} -> + count_msg_refs(Gen, Next, State); + {Guid, Delta, Next} -> + ok = case index_lookup(Guid, State) of + not_found -> + index_insert(#msg_location { guid = Guid, + ref_count = Delta }, + State); + #msg_location { ref_count = RefCount } = StoreEntry -> + NewRefCount = RefCount + Delta, + case NewRefCount of + 0 -> index_delete(Guid, State); + _ -> index_update(StoreEntry #msg_location { + ref_count = NewRefCount }, + State) + end + end, + count_msg_refs(Gen, Next, State) + end. + +recover_crashed_compactions(Dir, FileNames, TmpFileNames) -> + lists:foreach( + fun (TmpFileName) -> + NonTmpRelatedFileName = + filename:rootname(TmpFileName) ++ ?FILE_EXTENSION, + true = lists:member(NonTmpRelatedFileName, FileNames), + ok = recover_crashed_compaction( + Dir, TmpFileName, NonTmpRelatedFileName) + end, TmpFileNames), + ok. + +recover_crashed_compaction(Dir, TmpFileName, NonTmpRelatedFileName) -> + {ok, UncorruptedMessagesTmp, GuidsTmp} = + scan_file_for_valid_messages_and_guids(Dir, TmpFileName), + {ok, UncorruptedMessages, Guids} = + scan_file_for_valid_messages_and_guids(Dir, NonTmpRelatedFileName), + %% 1) It's possible that everything in the tmp file is also in the + %% main file such that the main file is (prefix ++ + %% tmpfile). This means that compaction failed immediately + %% prior to the final step of deleting the tmp file. Plan: just + %% delete the tmp file + %% 2) It's possible that everything in the tmp file is also in the + %% main file but with holes throughout (or just somthing like + %% main = (prefix ++ hole ++ tmpfile)). This means that + %% compaction wrote out the tmp file successfully and then + %% failed. Plan: just delete the tmp file and allow the + %% compaction to eventually be triggered later + %% 3) It's possible that everything in the tmp file is also in the + %% main file but such that the main file does not end with tmp + %% file (and there are valid messages in the suffix; main = + %% (prefix ++ tmpfile[with extra holes?] ++ suffix)). This + %% means that compaction failed as we were writing out the tmp + %% file. Plan: just delete the tmp file and allow the + %% compaction to eventually be triggered later + %% 4) It's possible that there are messages in the tmp file which + %% are not in the main file. This means that writing out the + %% tmp file succeeded, but then we failed as we were copying + %% them back over to the main file, after truncating the main + %% file. As the main file has already been truncated, it should + %% consist only of valid messages. Plan: Truncate the main file + %% back to before any of the files in the tmp file and copy + %% them over again + TmpPath = form_filename(Dir, TmpFileName), + case is_sublist(GuidsTmp, Guids) of + true -> %% we're in case 1, 2 or 3 above. Just delete the tmp file + %% note this also catches the case when the tmp file + %% is empty + ok = file:delete(TmpPath); + false -> + %% We're in case 4 above. We only care about the inital + %% msgs in main file that are not in the tmp file. If + %% there are no msgs in the tmp file then we would be in + %% the 'true' branch of this case, so we know the + %% lists:last call is safe. + EldestTmpGuid = lists:last(GuidsTmp), + {Guids1, UncorruptedMessages1} + = case lists:splitwith( + fun (Guid) -> Guid =/= EldestTmpGuid end, Guids) of + {_Guids, []} -> %% no msgs from tmp in main + {Guids, UncorruptedMessages}; + {Dropped, [EldestTmpGuid | Rest]} -> + %% Msgs in Dropped are in tmp, so forget them. + %% *cry*. Lists indexed from 1. + {Rest, lists:sublist(UncorruptedMessages, + 2 + length(Dropped), + length(Rest))} + end, + %% The main file prefix should be contiguous + {Top, Guids1} = find_contiguous_block_prefix( + lists:reverse(UncorruptedMessages1)), + %% we should have that none of the messages in the prefix + %% are in the tmp file + true = is_disjoint(Guids1, GuidsTmp), + %% must open with read flag, otherwise will stomp over contents + {ok, MainHdl} = open_file(Dir, NonTmpRelatedFileName, + [read | ?WRITE_MODE]), + %% Wipe out any rubbish at the end of the file. Remember + %% the head of the list will be the highest entry in the + %% file. + [{_, TmpTopTotalSize, TmpTopOffset}|_] = UncorruptedMessagesTmp, + TmpSize = TmpTopOffset + TmpTopTotalSize, + %% Extend the main file as big as necessary in a single + %% move. If we run out of disk space, this truncate could + %% fail, but we still aren't risking losing data + ok = truncate_and_extend_file(MainHdl, Top, Top + TmpSize), + {ok, TmpHdl} = open_file(Dir, TmpFileName, ?READ_AHEAD_MODE), + {ok, TmpSize} = file_handle_cache:copy(TmpHdl, MainHdl, TmpSize), + ok = file_handle_cache:close(MainHdl), + ok = file_handle_cache:delete(TmpHdl), + + {ok, _MainMessages, GuidsMain} = + scan_file_for_valid_messages_and_guids( + Dir, NonTmpRelatedFileName), + %% check that everything in Guids1 is in GuidsMain + true = is_sublist(Guids1, GuidsMain), + %% check that everything in GuidsTmp is in GuidsMain + true = is_sublist(GuidsTmp, GuidsMain) + end, + ok. + +is_sublist(SmallerL, BiggerL) -> + lists:all(fun (Item) -> lists:member(Item, BiggerL) end, SmallerL). + +is_disjoint(SmallerL, BiggerL) -> + lists:all(fun (Item) -> not lists:member(Item, BiggerL) end, SmallerL). + +scan_file_for_valid_messages(Dir, FileName) -> + case open_file(Dir, FileName, ?READ_MODE) of + {ok, Hdl} -> Valid = rabbit_msg_file:scan( + Hdl, filelib:file_size( + form_filename(Dir, FileName))), + %% if something really bad has happened, + %% the close could fail, but ignore + file_handle_cache:close(Hdl), + Valid; + {error, enoent} -> {ok, [], 0}; + {error, Reason} -> {error, {unable_to_scan_file, FileName, Reason}} + end. + +scan_file_for_valid_messages_and_guids(Dir, FileName) -> + {ok, Messages, _FileSize} = scan_file_for_valid_messages(Dir, FileName), + {ok, Messages, [Guid || {Guid, _TotalSize, _FileOffset} <- Messages]}. + +%% Takes the list in *ascending* order (i.e. eldest message +%% first). This is the opposite of what scan_file_for_valid_messages +%% produces. The list of msgs that is produced is youngest first. +find_contiguous_block_prefix(L) -> find_contiguous_block_prefix(L, 0, []). + +find_contiguous_block_prefix([], ExpectedOffset, Guids) -> + {ExpectedOffset, Guids}; +find_contiguous_block_prefix([{Guid, TotalSize, ExpectedOffset} | Tail], + ExpectedOffset, Guids) -> + ExpectedOffset1 = ExpectedOffset + TotalSize, + find_contiguous_block_prefix(Tail, ExpectedOffset1, [Guid | Guids]); +find_contiguous_block_prefix([_MsgAfterGap | _Tail], ExpectedOffset, Guids) -> + {ExpectedOffset, Guids}. + +build_index(true, _Files, State = #msstate { + file_summary_ets = FileSummaryEts }) -> + ets:foldl( + fun (#file_summary { valid_total_size = ValidTotalSize, + file_size = FileSize, + file = File }, + {_Offset, State1 = #msstate { sum_valid_data = SumValid, + sum_file_size = SumFileSize }}) -> + {FileSize, State1 #msstate { + sum_valid_data = SumValid + ValidTotalSize, + sum_file_size = SumFileSize + FileSize, + current_file = File }} + end, {0, State}, FileSummaryEts); +build_index(false, Files, State) -> + {ok, Pid} = gatherer:start_link(), + case Files of + [] -> build_index(Pid, undefined, [State #msstate.current_file], State); + _ -> {Offset, State1} = build_index(Pid, undefined, Files, State), + {Offset, lists:foldl(fun delete_file_if_empty/2, State1, Files)} + end. + +build_index(Gatherer, Left, [], + State = #msstate { file_summary_ets = FileSummaryEts, + sum_valid_data = SumValid, + sum_file_size = SumFileSize }) -> + case gatherer:out(Gatherer) of + empty -> + ok = gatherer:stop(Gatherer), + ok = rabbit_misc:unlink_and_capture_exit(Gatherer), + ok = index_delete_by_file(undefined, State), + Offset = case ets:lookup(FileSummaryEts, Left) of + [] -> 0; + [#file_summary { file_size = FileSize }] -> FileSize + end, + {Offset, State #msstate { current_file = Left }}; + {value, #file_summary { valid_total_size = ValidTotalSize, + file_size = FileSize } = FileSummary} -> + true = ets:insert_new(FileSummaryEts, FileSummary), + build_index(Gatherer, Left, [], + State #msstate { + sum_valid_data = SumValid + ValidTotalSize, + sum_file_size = SumFileSize + FileSize }) + end; +build_index(Gatherer, Left, [File|Files], State) -> + ok = gatherer:fork(Gatherer), + ok = worker_pool:submit_async( + fun () -> build_index_worker(Gatherer, State, + Left, File, Files) + end), + build_index(Gatherer, File, Files, State). + +build_index_worker(Gatherer, State = #msstate { dir = Dir }, + Left, File, Files) -> + {ok, Messages, FileSize} = + scan_file_for_valid_messages(Dir, filenum_to_name(File)), + {ValidMessages, ValidTotalSize} = + lists:foldl( + fun (Obj = {Guid, TotalSize, Offset}, {VMAcc, VTSAcc}) -> + case index_lookup(Guid, State) of + not_found -> + {VMAcc, VTSAcc}; + StoreEntry -> + ok = index_update(StoreEntry #msg_location { + file = File, offset = Offset, + total_size = TotalSize }, + State), + {[Obj | VMAcc], VTSAcc + TotalSize} + end + end, {[], 0}, Messages), + %% foldl reverses lists, find_contiguous_block_prefix needs + %% msgs eldest first, so, ValidMessages is the right way round + {ContiguousTop, _} = find_contiguous_block_prefix(ValidMessages), + {Right, FileSize1} = + case Files of + %% if it's the last file, we'll truncate to remove any + %% rubbish above the last valid message. This affects the + %% file size. + [] -> {undefined, case ValidMessages of + [] -> 0; + _ -> {_Guid, TotalSize, Offset} = + lists:last(ValidMessages), + Offset + TotalSize + end}; + [F|_] -> {F, FileSize} + end, + ok = gatherer:in(Gatherer, #file_summary { + file = File, + valid_total_size = ValidTotalSize, + contiguous_top = ContiguousTop, + left = Left, + right = Right, + file_size = FileSize1, + locked = false, + readers = 0 }), + ok = gatherer:finish(Gatherer). + +%%---------------------------------------------------------------------------- +%% garbage collection / compaction / aggregation -- internal +%%---------------------------------------------------------------------------- + +maybe_roll_to_new_file( + Offset, + State = #msstate { dir = Dir, + current_file_handle = CurHdl, + current_file = CurFile, + file_summary_ets = FileSummaryEts, + cur_file_cache_ets = CurFileCacheEts, + file_size_limit = FileSizeLimit }) + when Offset >= FileSizeLimit -> + State1 = internal_sync(State), + ok = file_handle_cache:close(CurHdl), + NextFile = CurFile + 1, + {ok, NextHdl} = open_file(Dir, filenum_to_name(NextFile), ?WRITE_MODE), + true = ets:insert_new(FileSummaryEts, #file_summary { + file = NextFile, + valid_total_size = 0, + contiguous_top = 0, + left = CurFile, + right = undefined, + file_size = 0, + locked = false, + readers = 0 }), + true = ets:update_element(FileSummaryEts, CurFile, + {#file_summary.right, NextFile}), + true = ets:match_delete(CurFileCacheEts, {'_', '_', 0}), + maybe_compact(State1 #msstate { current_file_handle = NextHdl, + current_file = NextFile }); +maybe_roll_to_new_file(_, State) -> + State. + +maybe_compact(State = #msstate { sum_valid_data = SumValid, + sum_file_size = SumFileSize, + gc_active = false, + gc_pid = GCPid, + file_summary_ets = FileSummaryEts, + file_size_limit = FileSizeLimit }) + when (SumFileSize > 2 * FileSizeLimit andalso + (SumFileSize - SumValid) / SumFileSize > ?GARBAGE_FRACTION) -> + %% TODO: the algorithm here is sub-optimal - it may result in a + %% complete traversal of FileSummaryEts. + case ets:first(FileSummaryEts) of + '$end_of_table' -> + State; + First -> + case find_files_to_gc(FileSummaryEts, FileSizeLimit, + ets:lookup(FileSummaryEts, First)) of + not_found -> + State; + {Src, Dst} -> + State1 = close_handle(Src, close_handle(Dst, State)), + true = ets:update_element(FileSummaryEts, Src, + {#file_summary.locked, true}), + true = ets:update_element(FileSummaryEts, Dst, + {#file_summary.locked, true}), + ok = rabbit_msg_store_gc:gc(GCPid, Src, Dst), + State1 #msstate { gc_active = {Src, Dst} } + end + end; +maybe_compact(State) -> + State. + +find_files_to_gc(FileSummaryEts, FileSizeLimit, + [#file_summary { file = Dst, + valid_total_size = DstValid, + right = Src }]) -> + case Src of + undefined -> + not_found; + _ -> + [#file_summary { file = Src, + valid_total_size = SrcValid, + left = Dst, + right = SrcRight }] = Next = + ets:lookup(FileSummaryEts, Src), + case SrcRight of + undefined -> not_found; + _ -> case DstValid + SrcValid =< FileSizeLimit of + true -> {Src, Dst}; + false -> find_files_to_gc( + FileSummaryEts, FileSizeLimit, Next) + end + end + end. + +delete_file_if_empty(File, State = #msstate { current_file = File }) -> + State; +delete_file_if_empty(File, State = #msstate { + dir = Dir, + sum_file_size = SumFileSize, + file_handles_ets = FileHandlesEts, + file_summary_ets = FileSummaryEts }) -> + [#file_summary { valid_total_size = ValidData, + left = Left, + right = Right, + file_size = FileSize, + locked = false }] = + ets:lookup(FileSummaryEts, File), + case ValidData of + %% we should NEVER find the current file in here hence right + %% should always be a file, not undefined + 0 -> case {Left, Right} of + {undefined, _} when Right =/= undefined -> + %% the eldest file is empty. + true = ets:update_element( + FileSummaryEts, Right, + {#file_summary.left, undefined}); + {_, _} when Right =/= undefined -> + true = ets:update_element(FileSummaryEts, Right, + {#file_summary.left, Left}), + true = ets:update_element(FileSummaryEts, Left, + {#file_summary.right, Right}) + end, + true = mark_handle_to_close(FileHandlesEts, File), + true = ets:delete(FileSummaryEts, File), + State1 = close_handle(File, State), + ok = file:delete(form_filename(Dir, filenum_to_name(File))), + State1 #msstate { sum_file_size = SumFileSize - FileSize }; + _ -> State + end. + +%%---------------------------------------------------------------------------- +%% garbage collection / compaction / aggregation -- external +%%---------------------------------------------------------------------------- + +gc(SrcFile, DstFile, State = {FileSummaryEts, _Dir, _Index, _IndexState}) -> + [SrcObj = #file_summary { + readers = SrcReaders, + left = DstFile, + file_size = SrcFileSize, + locked = true }] = ets:lookup(FileSummaryEts, SrcFile), + [DstObj = #file_summary { + readers = DstReaders, + right = SrcFile, + file_size = DstFileSize, + locked = true }] = ets:lookup(FileSummaryEts, DstFile), + + case SrcReaders =:= 0 andalso DstReaders =:= 0 of + true -> TotalValidData = combine_files(SrcObj, DstObj, State), + %% don't update dest.right, because it could be + %% changing at the same time + true = ets:update_element( + FileSummaryEts, DstFile, + [{#file_summary.valid_total_size, TotalValidData}, + {#file_summary.contiguous_top, TotalValidData}, + {#file_summary.file_size, TotalValidData}]), + SrcFileSize + DstFileSize - TotalValidData; + false -> concurrent_readers + end. + +combine_files(#file_summary { file = Source, + valid_total_size = SourceValid, + left = Destination }, + #file_summary { file = Destination, + valid_total_size = DestinationValid, + contiguous_top = DestinationContiguousTop, + right = Source }, + State = {_FileSummaryEts, Dir, _Index, _IndexState}) -> + SourceName = filenum_to_name(Source), + DestinationName = filenum_to_name(Destination), + {ok, SourceHdl} = open_file(Dir, SourceName, + ?READ_AHEAD_MODE), + {ok, DestinationHdl} = open_file(Dir, DestinationName, + ?READ_AHEAD_MODE ++ ?WRITE_MODE), + ExpectedSize = SourceValid + DestinationValid, + %% if DestinationValid =:= DestinationContiguousTop then we don't + %% need a tmp file + %% if they're not equal, then we need to write out everything past + %% the DestinationContiguousTop to a tmp file then truncate, + %% copy back in, and then copy over from Source + %% otherwise we just truncate straight away and copy over from Source + case DestinationContiguousTop =:= DestinationValid of + true -> + ok = truncate_and_extend_file( + DestinationHdl, DestinationContiguousTop, ExpectedSize); + false -> + {DestinationWorkList, DestinationValid} = + find_unremoved_messages_in_file(Destination, State), + Worklist = + lists:dropwhile( + fun (#msg_location { offset = Offset }) + when Offset =/= DestinationContiguousTop -> + %% it cannot be that Offset =:= + %% DestinationContiguousTop because if it + %% was then DestinationContiguousTop would + %% have been extended by TotalSize + Offset < DestinationContiguousTop + end, DestinationWorkList), + Tmp = filename:rootname(DestinationName) ++ ?FILE_EXTENSION_TMP, + {ok, TmpHdl} = open_file(Dir, Tmp, ?READ_AHEAD_MODE ++ ?WRITE_MODE), + ok = copy_messages( + Worklist, DestinationContiguousTop, DestinationValid, + DestinationHdl, TmpHdl, Destination, State), + TmpSize = DestinationValid - DestinationContiguousTop, + %% so now Tmp contains everything we need to salvage from + %% Destination, and index_state has been updated to + %% reflect the compaction of Destination so truncate + %% Destination and copy from Tmp back to the end + {ok, 0} = file_handle_cache:position(TmpHdl, 0), + ok = truncate_and_extend_file( + DestinationHdl, DestinationContiguousTop, ExpectedSize), + {ok, TmpSize} = + file_handle_cache:copy(TmpHdl, DestinationHdl, TmpSize), + %% position in DestinationHdl should now be DestinationValid + ok = file_handle_cache:sync(DestinationHdl), + ok = file_handle_cache:delete(TmpHdl) + end, + {SourceWorkList, SourceValid} = + find_unremoved_messages_in_file(Source, State), + ok = copy_messages(SourceWorkList, DestinationValid, ExpectedSize, + SourceHdl, DestinationHdl, Destination, State), + %% tidy up + ok = file_handle_cache:close(DestinationHdl), + ok = file_handle_cache:delete(SourceHdl), + ExpectedSize. + +find_unremoved_messages_in_file(File, + {_FileSummaryEts, Dir, Index, IndexState}) -> + %% Messages here will be end-of-file at start-of-list + {ok, Messages, _FileSize} = + scan_file_for_valid_messages(Dir, filenum_to_name(File)), + %% foldl will reverse so will end up with msgs in ascending offset order + lists:foldl(fun ({Guid, TotalSize, _Offset}, Acc = {List, Size}) -> + case Index:lookup(Guid, IndexState) of + #msg_location { file = File } = Entry -> + {[ Entry | List ], TotalSize + Size}; + _ -> + Acc + end + end, {[], 0}, Messages). + +copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl, + Destination, {_FileSummaryEts, _Dir, Index, IndexState}) -> + Copy = fun ({BlockStart, BlockEnd}) -> + BSize = BlockEnd - BlockStart, + {ok, BlockStart} = + file_handle_cache:position(SourceHdl, BlockStart), + {ok, BSize} = + file_handle_cache:copy(SourceHdl, DestinationHdl, BSize) + end, + case + lists:foldl( + fun (#msg_location { guid = Guid, offset = Offset, + total_size = TotalSize }, + {CurOffset, Block = {BlockStart, BlockEnd}}) -> + %% CurOffset is in the DestinationFile. + %% Offset, BlockStart and BlockEnd are in the SourceFile + %% update MsgLocation to reflect change of file and offset + ok = Index:update_fields(Guid, + [{#msg_location.file, Destination}, + {#msg_location.offset, CurOffset}], + IndexState), + {CurOffset + TotalSize, + case BlockEnd of + undefined -> + %% base case, called only for the first list elem + {Offset, Offset + TotalSize}; + Offset -> + %% extend the current block because the + %% next msg follows straight on + {BlockStart, BlockEnd + TotalSize}; + _ -> + %% found a gap, so actually do the work for + %% the previous block + Copy(Block), + {Offset, Offset + TotalSize} + end} + end, {InitOffset, {undefined, undefined}}, WorkList) of + {FinalOffset, Block} -> + case WorkList of + [] -> ok; + _ -> Copy(Block), %% do the last remaining block + ok = file_handle_cache:sync(DestinationHdl) + end; + {FinalOffsetZ, _Block} -> + {gc_error, [{expected, FinalOffset}, + {got, FinalOffsetZ}, + {destination, Destination}]} + end. diff --git a/src/rabbit_msg_store_ets_index.erl b/src/rabbit_msg_store_ets_index.erl new file mode 100644 index 00000000..1eb3c11f --- /dev/null +++ b/src/rabbit_msg_store_ets_index.erl @@ -0,0 +1,90 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developers of the Original Code are LShift Ltd, +%% Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd, +%% Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd +%% are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial +%% Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift +%% Ltd. Portions created by Cohesive Financial Technologies LLC are +%% Copyright (C) 2007-2010 Cohesive Financial Technologies +%% LLC. Portions created by Rabbit Technologies Ltd are Copyright +%% (C) 2007-2010 Rabbit Technologies Ltd. +%% +%% All Rights Reserved. +%% +%% Contributor(s): ______________________________________. +%% + +-module(rabbit_msg_store_ets_index). + +-behaviour(rabbit_msg_store_index). + +-export([new/1, recover/1, + lookup/2, insert/2, update/2, update_fields/3, delete/2, + delete_by_file/2, terminate/1]). + +-define(MSG_LOC_NAME, rabbit_msg_store_ets_index). +-define(FILENAME, "msg_store_index.ets"). + +-include("rabbit_msg_store_index.hrl"). + +-record(state, { table, dir }). + +new(Dir) -> + file:delete(filename:join(Dir, ?FILENAME)), + Tid = ets:new(?MSG_LOC_NAME, [set, public, {keypos, #msg_location.guid}]), + #state { table = Tid, dir = Dir }. + +recover(Dir) -> + Path = filename:join(Dir, ?FILENAME), + case ets:file2tab(Path) of + {ok, Tid} -> file:delete(Path), + {ok, #state { table = Tid, dir = Dir }}; + Error -> Error + end. + +lookup(Key, State) -> + case ets:lookup(State #state.table, Key) of + [] -> not_found; + [Entry] -> Entry + end. + +insert(Obj, State) -> + true = ets:insert_new(State #state.table, Obj), + ok. + +update(Obj, State) -> + true = ets:insert(State #state.table, Obj), + ok. + +update_fields(Key, Updates, State) -> + true = ets:update_element(State #state.table, Key, Updates), + ok. + +delete(Key, State) -> + true = ets:delete(State #state.table, Key), + ok. + +delete_by_file(File, State) -> + MatchHead = #msg_location { file = File, _ = '_' }, + ets:select_delete(State #state.table, [{MatchHead, [], [true]}]), + ok. + +terminate(#state { table = MsgLocations, dir = Dir }) -> + ok = ets:tab2file(MsgLocations, filename:join(Dir, ?FILENAME), + [{extended_info, [object_count]}]), + ets:delete(MsgLocations). diff --git a/src/rabbit_msg_store_gc.erl b/src/rabbit_msg_store_gc.erl new file mode 100644 index 00000000..eaa41173 --- /dev/null +++ b/src/rabbit_msg_store_gc.erl @@ -0,0 +1,141 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developers of the Original Code are LShift Ltd, +%% Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd, +%% Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd +%% are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial +%% Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift +%% Ltd. Portions created by Cohesive Financial Technologies LLC are +%% Copyright (C) 2007-2010 Cohesive Financial Technologies +%% LLC. Portions created by Rabbit Technologies Ltd are Copyright +%% (C) 2007-2010 Rabbit Technologies Ltd. +%% +%% All Rights Reserved. +%% +%% Contributor(s): ______________________________________. +%% + +-module(rabbit_msg_store_gc). + +-behaviour(gen_server2). + +-export([start_link/4, gc/3, no_readers/2, stop/1]). + +-export([set_maximum_since_use/2]). + +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). + +-record(gcstate, + {dir, + index_state, + index_module, + parent, + file_summary_ets, + scheduled + }). + +-include("rabbit.hrl"). + +%%---------------------------------------------------------------------------- + +-ifdef(use_specs). + +-spec(start_link/4 :: (file:filename(), any(), atom(), ets:tid()) -> + 'ignore' | rabbit_types:ok_or_error2(pid(), any())). +-spec(gc/3 :: (pid(), non_neg_integer(), non_neg_integer()) -> 'ok'). +-spec(no_readers/2 :: (pid(), non_neg_integer()) -> 'ok'). +-spec(stop/1 :: (pid()) -> 'ok'). +-spec(set_maximum_since_use/2 :: (pid(), non_neg_integer()) -> 'ok'). + +-endif. + +%%---------------------------------------------------------------------------- + +start_link(Dir, IndexState, IndexModule, FileSummaryEts) -> + gen_server2:start_link( + ?MODULE, [self(), Dir, IndexState, IndexModule, FileSummaryEts], + [{timeout, infinity}]). + +gc(Server, Source, Destination) -> + gen_server2:cast(Server, {gc, Source, Destination}). + +no_readers(Server, File) -> + gen_server2:cast(Server, {no_readers, File}). + +stop(Server) -> + gen_server2:call(Server, stop, infinity). + +set_maximum_since_use(Pid, Age) -> + gen_server2:pcast(Pid, 8, {set_maximum_since_use, Age}). + +%%---------------------------------------------------------------------------- + +init([Parent, Dir, IndexState, IndexModule, FileSummaryEts]) -> + ok = file_handle_cache:register_callback(?MODULE, set_maximum_since_use, + [self()]), + {ok, #gcstate { dir = Dir, + index_state = IndexState, + index_module = IndexModule, + parent = Parent, + file_summary_ets = FileSummaryEts, + scheduled = undefined }, + hibernate, + {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. + +handle_call(stop, _From, State) -> + {stop, normal, ok, State}. + +handle_cast({gc, Source, Destination}, + State = #gcstate { scheduled = undefined }) -> + {noreply, attempt_gc(State #gcstate { scheduled = {Source, Destination} }), + hibernate}; + +handle_cast({no_readers, File}, + State = #gcstate { scheduled = {Source, Destination} }) + when File =:= Source orelse File =:= Destination -> + {noreply, attempt_gc(State), hibernate}; + +handle_cast({no_readers, _File}, State) -> + {noreply, State, hibernate}; + +handle_cast({set_maximum_since_use, Age}, State) -> + ok = file_handle_cache:set_maximum_since_use(Age), + {noreply, State, hibernate}. + +handle_info(Info, State) -> + {stop, {unhandled_info, Info}, State}. + +terminate(_Reason, State) -> + State. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +attempt_gc(State = #gcstate { dir = Dir, + index_state = IndexState, + index_module = Index, + parent = Parent, + file_summary_ets = FileSummaryEts, + scheduled = {Source, Destination} }) -> + case rabbit_msg_store:gc(Source, Destination, + {FileSummaryEts, Dir, Index, IndexState}) of + concurrent_readers -> State; + Reclaimed -> ok = rabbit_msg_store:gc_done( + Parent, Reclaimed, Source, Destination), + State #gcstate { scheduled = undefined } + end. diff --git a/src/rabbit_msg_store_index.erl b/src/rabbit_msg_store_index.erl new file mode 100644 index 00000000..0ed64a9d --- /dev/null +++ b/src/rabbit_msg_store_index.erl @@ -0,0 +1,47 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developers of the Original Code are LShift Ltd, +%% Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd, +%% Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd +%% are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial +%% Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift +%% Ltd. Portions created by Cohesive Financial Technologies LLC are +%% Copyright (C) 2007-2010 Cohesive Financial Technologies +%% LLC. Portions created by Rabbit Technologies Ltd are Copyright +%% (C) 2007-2010 Rabbit Technologies Ltd. +%% +%% All Rights Reserved. +%% +%% Contributor(s): ______________________________________. +%% + +-module(rabbit_msg_store_index). + +-export([behaviour_info/1]). + +behaviour_info(callbacks) -> + [{new, 1}, + {recover, 1}, + {lookup, 2}, + {insert, 2}, + {update, 2}, + {update_fields, 3}, + {delete, 2}, + {delete_by_file, 2}, + {terminate, 1}]; +behaviour_info(_Other) -> + undefined. diff --git a/src/rabbit_multi.erl b/src/rabbit_multi.erl index 5db1d77a..3facef17 100644 --- a/src/rabbit_multi.erl +++ b/src/rabbit_multi.erl @@ -309,9 +309,9 @@ is_dead(Pid) -> {win32, fun () -> Res = os:cmd("tasklist /nh /fi \"pid eq " ++ PidS ++ "\""), - case regexp:first_match(Res, "erl.exe") of - {match, _, _} -> false; - _ -> true + case re:run(Res, "erl\\.exe", [{capture, none}]) of + match -> false; + _ -> true end end}]). diff --git a/src/rabbit_net.erl b/src/rabbit_net.erl index 975954fc..6baa4b88 100644 --- a/src/rabbit_net.erl +++ b/src/rabbit_net.erl @@ -31,31 +31,42 @@ -module(rabbit_net). -include("rabbit.hrl"). --include_lib("kernel/include/inet.hrl"). -export([async_recv/3, close/1, controlling_process/2, getstat/2, peername/1, port_command/2, send/2, sockname/1]). + %%--------------------------------------------------------------------------- -ifdef(use_specs). +-export_type([socket/0]). + -type(stat_option() :: 'recv_cnt' | 'recv_max' | 'recv_avg' | 'recv_oct' | 'recv_dvi' | 'send_cnt' | 'send_max' | 'send_avg' | 'send_oct' | 'send_pend'). --type(error() :: {'error', any()}). - --spec(async_recv/3 :: (socket(), integer(), timeout()) -> {'ok', any()}). --spec(close/1 :: (socket()) -> 'ok' | error()). --spec(controlling_process/2 :: (socket(), pid()) -> 'ok' | error()). +-type(error() :: rabbit_types:error(any())). +-type(socket() :: rabbit_networking:ip_port() | rabbit_types:ssl_socket()). + +-spec(async_recv/3 :: + (socket(), integer(), timeout()) -> rabbit_types:ok(any())). +-spec(close/1 :: (socket()) -> rabbit_types:ok_or_error(any())). +-spec(controlling_process/2 :: + (socket(), pid()) -> rabbit_types:ok_or_error(any())). -spec(port_command/2 :: (socket(), iolist()) -> 'true'). --spec(send/2 :: (socket(), binary() | iolist()) -> 'ok' | error()). --spec(peername/1 :: (socket()) -> - {'ok', {ip_address(), non_neg_integer()}} | error()). --spec(sockname/1 :: (socket()) -> - {'ok', {ip_address(), non_neg_integer()}} | error()). --spec(getstat/2 :: (socket(), [stat_option()]) -> - {'ok', [{stat_option(), integer()}]} | error()). +-spec(send/2 :: + (socket(), binary() | iolist()) -> rabbit_types:ok_or_error(any())). +-spec(peername/1 :: + (socket()) + -> rabbit_types:ok({inet:ip_address(), rabbit_networking:ip_port()}) | + error()). +-spec(sockname/1 :: + (socket()) + -> rabbit_types:ok({inet:ip_address(), rabbit_networking:ip_port()}) | + error()). +-spec(getstat/2 :: + (socket(), [stat_option()]) + -> rabbit_types:ok([{stat_option(), integer()}]) | error()). -endif. diff --git a/src/rabbit_networking.erl b/src/rabbit_networking.erl index c3d0b7b7..3a3357ba 100644 --- a/src/rabbit_networking.erl +++ b/src/rabbit_networking.erl @@ -63,25 +63,29 @@ -ifdef(use_specs). --type(host() :: ip_address() | string() | atom()). --type(connection() :: pid()). +-export_type([ip_port/0, hostname/0]). -spec(start/0 :: () -> 'ok'). --spec(start_tcp_listener/2 :: (host(), ip_port()) -> 'ok'). --spec(start_ssl_listener/3 :: (host(), ip_port(), [info()]) -> 'ok'). --spec(stop_tcp_listener/2 :: (host(), ip_port()) -> 'ok'). --spec(active_listeners/0 :: () -> [listener()]). --spec(node_listeners/1 :: (erlang_node()) -> [listener()]). --spec(connections/0 :: () -> [connection()]). --spec(connection_info_keys/0 :: () -> [info_key()]). --spec(connection_info/1 :: (connection()) -> [info()]). --spec(connection_info/2 :: (connection(), [info_key()]) -> [info()]). --spec(connection_info_all/0 :: () -> [[info()]]). --spec(connection_info_all/1 :: ([info_key()]) -> [[info()]]). +-spec(start_tcp_listener/2 :: (hostname(), ip_port()) -> 'ok'). +-spec(start_ssl_listener/3 :: (hostname(), ip_port(), [rabbit_types:info()]) + -> 'ok'). +-spec(stop_tcp_listener/2 :: (hostname(), ip_port()) -> 'ok'). +-spec(active_listeners/0 :: () -> [rabbit_types:listener()]). +-spec(node_listeners/1 :: (node()) -> [rabbit_types:listener()]). +-spec(connections/0 :: () -> [rabbit_types:connection()]). +-spec(connection_info_keys/0 :: () -> [rabbit_types:info_key()]). +-spec(connection_info/1 :: + (rabbit_types:connection()) -> [rabbit_types:info()]). +-spec(connection_info/2 :: + (rabbit_types:connection(), [rabbit_types:info_key()]) + -> [rabbit_types:info()]). +-spec(connection_info_all/0 :: () -> [[rabbit_types:info()]]). +-spec(connection_info_all/1 :: + ([rabbit_types:info_key()]) -> [[rabbit_types:info()]]). -spec(close_connection/2 :: (pid(), string()) -> 'ok'). --spec(on_node_down/1 :: (erlang_node()) -> 'ok'). --spec(check_tcp_listener_address/3 :: (atom(), host(), ip_port()) -> - {ip_address(), atom()}). +-spec(on_node_down/1 :: (node()) -> 'ok'). +-spec(check_tcp_listener_address/3 :: + (atom(), hostname(), ip_port()) -> {inet:ip_address(), atom()}). -endif. @@ -102,7 +106,7 @@ boot_ssl() -> {ok, []} -> ok; {ok, SslListeners} -> - ok = rabbit_misc:start_applications([crypto, ssl]), + ok = rabbit_misc:start_applications([crypto, public_key, ssl]), {ok, SslOpts} = application:get_env(ssl_options), [start_ssl_listener(Host, Port, SslOpts) || {Host, Port} <- SslListeners], ok diff --git a/src/rabbit_persister.erl b/src/rabbit_persister.erl index 8d3c2dc0..a427b135 100644 --- a/src/rabbit_persister.erl +++ b/src/rabbit_persister.erl @@ -65,21 +65,29 @@ -ifdef(use_specs). --type(pmsg() :: {queue_name(), pkey()}). +-type(pkey() :: rabbit_guid:guid()). +-type(pmsg() :: {rabbit_amqqueue:name(), pkey()}). + -type(work_item() :: - {publish, message(), pmsg()} | + {publish, rabbit_types:message(), pmsg()} | {deliver, pmsg()} | {ack, pmsg()}). --spec(start_link/1 :: ([queue_name()]) -> - {'ok', pid()} | 'ignore' | {'error', any()}). +-spec(start_link/1 :: + ([rabbit_amqqueue:name()]) + -> 'ignore' | rabbit_types:ok_or_error2(pid(), any())). -spec(transaction/1 :: ([work_item()]) -> 'ok'). --spec(extend_transaction/2 :: ({txn(), queue_name()}, [work_item()]) -> 'ok'). +-spec(extend_transaction/2 :: + ({rabbit_types:txn(), rabbit_amqqueue:name()}, [work_item()]) + -> 'ok'). -spec(dirty_work/1 :: ([work_item()]) -> 'ok'). --spec(commit_transaction/1 :: ({txn(), queue_name()}) -> 'ok'). --spec(rollback_transaction/1 :: ({txn(), queue_name()}) -> 'ok'). +-spec(commit_transaction/1 :: + ({rabbit_types:txn(), rabbit_amqqueue:name()}) -> 'ok'). +-spec(rollback_transaction/1 :: + ({rabbit_types:txn(), rabbit_amqqueue:name()}) -> 'ok'). -spec(force_snapshot/0 :: () -> 'ok'). --spec(queue_content/1 :: (queue_name()) -> [{message(), boolean()}]). +-spec(queue_content/1 :: + (rabbit_amqqueue:name()) -> [{rabbit_types:message(), boolean()}]). -endif. diff --git a/src/rabbit_reader_queue_collector.erl b/src/rabbit_queue_collector.erl index 8d4e8fdb..ea3768d4 100644 --- a/src/rabbit_reader_queue_collector.erl +++ b/src/rabbit_queue_collector.erl @@ -29,16 +29,16 @@ %% Contributor(s): ______________________________________. %% --module(rabbit_reader_queue_collector). +-module(rabbit_queue_collector). -behaviour(gen_server). --export([start_link/0, register_exclusive_queue/2, delete_all/1, shutdown/1]). +-export([start_link/0, register/2, delete_all/1, shutdown/1]). -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3]). --record(state, {exclusive_queues}). +-record(state, {queues}). -include("rabbit.hrl"). @@ -46,8 +46,8 @@ -ifdef(use_specs). --spec(start_link/0 :: () -> {'ok', pid()}). --spec(register_exclusive_queue/2 :: (pid(), amqqueue()) -> 'ok'). +-spec(start_link/0 :: () -> rabbit_types:ok(pid())). +-spec(register/2 :: (pid(), rabbit_types:amqqueue()) -> 'ok'). -spec(delete_all/1 :: (pid()) -> 'ok'). -endif. @@ -57,8 +57,8 @@ start_link() -> gen_server:start_link(?MODULE, [], []). -register_exclusive_queue(CollectorPid, Q) -> - gen_server:call(CollectorPid, {register_exclusive_queue, Q}, infinity). +register(CollectorPid, Q) -> + gen_server:call(CollectorPid, {register, Q}, infinity). delete_all(CollectorPid) -> gen_server:call(CollectorPid, delete_all, infinity). @@ -69,25 +69,24 @@ shutdown(CollectorPid) -> %%---------------------------------------------------------------------------- init([]) -> - {ok, #state{exclusive_queues = dict:new()}}. + {ok, #state{queues = dict:new()}}. %%-------------------------------------------------------------------------- -handle_call({register_exclusive_queue, Q}, _From, - State = #state{exclusive_queues = Queues}) -> +handle_call({register, Q}, _From, + State = #state{queues = Queues}) -> MonitorRef = erlang:monitor(process, Q#amqqueue.pid), {reply, ok, - State#state{exclusive_queues = dict:store(MonitorRef, Q, Queues)}}; + State#state{queues = dict:store(MonitorRef, Q, Queues)}}; -handle_call(delete_all, _From, - State = #state{exclusive_queues = ExclusiveQueues}) -> +handle_call(delete_all, _From, State = #state{queues = Queues}) -> [rabbit_misc:with_exit_handler( fun () -> ok end, fun () -> erlang:demonitor(MonitorRef), rabbit_amqqueue:delete(Q, false, false) end) - || {MonitorRef, Q} <- dict:to_list(ExclusiveQueues)], + || {MonitorRef, Q} <- dict:to_list(Queues)], {reply, ok, State}; handle_call(shutdown, _From, State) -> @@ -97,9 +96,8 @@ handle_cast(_Msg, State) -> {noreply, State}. handle_info({'DOWN', MonitorRef, process, _DownPid, _Reason}, - State = #state{exclusive_queues = ExclusiveQueues}) -> - {noreply, State#state{exclusive_queues = - dict:erase(MonitorRef, ExclusiveQueues)}}. + State = #state{queues = Queues}) -> + {noreply, State#state{queues = dict:erase(MonitorRef, Queues)}}. terminate(_Reason, _State) -> ok. diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl new file mode 100644 index 00000000..d6b8bb28 --- /dev/null +++ b/src/rabbit_queue_index.erl @@ -0,0 +1,932 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developers of the Original Code are LShift Ltd, +%% Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd, +%% Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd +%% are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial +%% Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift +%% Ltd. Portions created by Cohesive Financial Technologies LLC are +%% Copyright (C) 2007-2010 Cohesive Financial Technologies +%% LLC. Portions created by Rabbit Technologies Ltd are Copyright +%% (C) 2007-2010 Rabbit Technologies Ltd. +%% +%% All Rights Reserved. +%% +%% Contributor(s): ______________________________________. +%% + +-module(rabbit_queue_index). + +-export([init/4, terminate/2, delete_and_terminate/1, publish/4, + deliver/2, ack/2, sync/2, flush/1, read/3, + next_segment_boundary/1, bounds/1, recover/1]). + +-define(CLEAN_FILENAME, "clean.dot"). + +%%---------------------------------------------------------------------------- + +%% The queue index is responsible for recording the order of messages +%% within a queue on disk. +%% +%% Because of the fact that the queue can decide at any point to send +%% a queue entry to disk, you can not rely on publishes appearing in +%% order. The only thing you can rely on is a message being published, +%% then delivered, then ack'd. +%% +%% In order to be able to clean up ack'd messages, we write to segment +%% files. These files have a fixed maximum size: ?SEGMENT_ENTRY_COUNT +%% publishes, delivers and acknowledgements. They are numbered, and so +%% it is known that the 0th segment contains messages 0 -> +%% ?SEGMENT_ENTRY_COUNT - 1, the 1st segment contains messages +%% ?SEGMENT_ENTRY_COUNT -> 2*?SEGMENT_ENTRY_COUNT - 1 and so on. As +%% such, in the segment files, we only refer to message sequence ids +%% by the LSBs as SeqId rem ?SEGMENT_ENTRY_COUNT. This gives them a +%% fixed size. +%% +%% However, transient messages which are not sent to disk at any point +%% will cause gaps to appear in segment files. Therefore, we delete a +%% segment file whenever the number of publishes == number of acks +%% (note that although it is not fully enforced, it is assumed that a +%% message will never be ackd before it is delivered, thus this test +%% also implies == number of delivers). In practise, this does not +%% cause disk churn in the pathological case because of the journal +%% and caching (see below). +%% +%% Because of the fact that publishes, delivers and acks can occur all +%% over, we wish to avoid lots of seeking. Therefore we have a fixed +%% sized journal to which all actions are appended. When the number of +%% entries in this journal reaches max_journal_entries, the journal +%% entries are scattered out to their relevant files, and the journal +%% is truncated to zero size. Note that entries in the journal must +%% carry the full sequence id, thus the format of entries in the +%% journal is different to that in the segments. +%% +%% The journal is also kept fully in memory, pre-segmented: the state +%% contains a mapping from segment numbers to state-per-segment (this +%% state is held for all segments which have been "seen": thus a +%% segment which has been read but has no pending entries in the +%% journal is still held in this mapping. Also note that a dict is +%% used for this mapping, not an array because with an array, you will +%% always have entries from 0). Actions are stored directly in this +%% state. Thus at the point of flushing the journal, firstly no +%% reading from disk is necessary, but secondly if the known number of +%% acks and publishes in a segment are equal, given the known state of +%% the segment file combined with the journal, no writing needs to be +%% done to the segment file either (in fact it is deleted if it exists +%% at all). This is safe given that the set of acks is a subset of the +%% set of publishes. When it's necessary to sync messages because of +%% transactions, it's only necessary to fsync on the journal: when +%% entries are distributed from the journal to segment files, those +%% segments appended to are fsync'd prior to the journal being +%% truncated. +%% +%% This module is also responsible for scanning the queue index files +%% and seeding the message store on start up. +%% +%% Note that in general, the representation of a message's state as +%% the tuple: {('no_pub'|{Guid, IsPersistent}), ('del'|'no_del'), +%% ('ack'|'no_ack')} is richer than strictly necessary for most +%% operations. However, for startup, and to ensure the safe and +%% correct combination of journal entries with entries read from the +%% segment on disk, this richer representation vastly simplifies and +%% clarifies the code. +%% +%% For notes on Clean Shutdown and startup, see documentation in +%% variable_queue. +%% +%%---------------------------------------------------------------------------- + +%% ---- Journal details ---- + +-define(JOURNAL_FILENAME, "journal.jif"). + +-define(PUB_PERSIST_JPREFIX, 2#00). +-define(PUB_TRANS_JPREFIX, 2#01). +-define(DEL_JPREFIX, 2#10). +-define(ACK_JPREFIX, 2#11). +-define(JPREFIX_BITS, 2). +-define(SEQ_BYTES, 8). +-define(SEQ_BITS, ((?SEQ_BYTES * 8) - ?JPREFIX_BITS)). + +%% ---- Segment details ---- + +-define(SEGMENT_EXTENSION, ".idx"). + +%% TODO: The segment size would be configurable, but deriving all the +%% other values is quite hairy and quite possibly noticably less +%% efficient, depending on how clever the compiler is when it comes to +%% binary generation/matching with constant vs variable lengths. + +-define(REL_SEQ_BITS, 14). +-define(SEGMENT_ENTRY_COUNT, 16384). %% trunc(math:pow(2,?REL_SEQ_BITS))). + +%% seq only is binary 00 followed by 14 bits of rel seq id +%% (range: 0 - 16383) +-define(REL_SEQ_ONLY_PREFIX, 00). +-define(REL_SEQ_ONLY_PREFIX_BITS, 2). +-define(REL_SEQ_ONLY_ENTRY_LENGTH_BYTES, 2). + +%% publish record is binary 1 followed by a bit for is_persistent, +%% then 14 bits of rel seq id, and 128 bits of md5sum msg id +-define(PUBLISH_PREFIX, 1). +-define(PUBLISH_PREFIX_BITS, 1). + +-define(GUID_BYTES, 16). %% md5sum is 128 bit or 16 bytes +-define(GUID_BITS, (?GUID_BYTES * 8)). +%% 16 bytes for md5sum + 2 for seq, bits and prefix +-define(PUBLISH_RECORD_LENGTH_BYTES, ?GUID_BYTES + 2). + +%% 1 publish, 1 deliver, 1 ack per msg +-define(SEGMENT_TOTAL_SIZE, ?SEGMENT_ENTRY_COUNT * + (?PUBLISH_RECORD_LENGTH_BYTES + + (2 * ?REL_SEQ_ONLY_ENTRY_LENGTH_BYTES))). + +%% ---- misc ---- + +-define(PUB, {_, _}). %% {Guid, IsPersistent} + +-define(READ_MODE, [binary, raw, read, {read_ahead, ?SEGMENT_TOTAL_SIZE}]). + +%%---------------------------------------------------------------------------- + +-record(qistate, { dir, segments, journal_handle, dirty_count, + max_journal_entries }). + +-record(segment, { num, path, journal_entries, unacked }). + +-include("rabbit.hrl"). + +%%---------------------------------------------------------------------------- + +-ifdef(use_specs). + +-type(hdl() :: ('undefined' | any())). +-type(segment() :: ('undefined' | + #segment { num :: non_neg_integer(), + path :: file:filename(), + journal_entries :: array(), + unacked :: non_neg_integer() + })). +-type(seq_id() :: integer()). +-type(seg_dict() :: {dict:dictionary(), [segment()]}). +-type(qistate() :: #qistate { dir :: file:filename(), + segments :: 'undefined' | seg_dict(), + journal_handle :: hdl(), + dirty_count :: integer(), + max_journal_entries :: non_neg_integer() + }). +-type(startup_fun_state() :: + {(fun ((A) -> 'finished' | {rabbit_guid:guid(), non_neg_integer(), A})), + A}). + +-spec(init/4 :: (rabbit_amqqueue:name(), boolean(), boolean(), + fun ((rabbit_guid:guid()) -> boolean())) -> + {'undefined' | non_neg_integer(), [any()], qistate()}). +-spec(terminate/2 :: ([any()], qistate()) -> qistate()). +-spec(delete_and_terminate/1 :: (qistate()) -> qistate()). +-spec(publish/4 :: (rabbit_guid:guid(), seq_id(), boolean(), qistate()) -> + qistate()). +-spec(deliver/2 :: ([seq_id()], qistate()) -> qistate()). +-spec(ack/2 :: ([seq_id()], qistate()) -> qistate()). +-spec(sync/2 :: ([seq_id()], qistate()) -> qistate()). +-spec(flush/1 :: (qistate()) -> qistate()). +-spec(read/3 :: (seq_id(), seq_id(), qistate()) -> + {[{rabbit_guid:guid(), seq_id(), boolean(), boolean()}], + qistate()}). +-spec(next_segment_boundary/1 :: (seq_id()) -> seq_id()). +-spec(bounds/1 :: (qistate()) -> + {non_neg_integer(), non_neg_integer(), qistate()}). +-spec(recover/1 :: + ([rabbit_amqqueue:name()]) -> {[[any()]], startup_fun_state()}). + +-endif. + + +%%---------------------------------------------------------------------------- +%% public API +%%---------------------------------------------------------------------------- + +init(Name, Recover, MsgStoreRecovered, ContainsCheckFun) -> + State = #qistate { dir = Dir } = blank_state(Name, not Recover), + Terms = case read_shutdown_terms(Dir) of + {error, _} -> []; + {ok, Terms1} -> Terms1 + end, + CleanShutdown = detect_clean_shutdown(Dir), + {Count, State1} = + case CleanShutdown andalso MsgStoreRecovered of + true -> RecoveredCounts = proplists:get_value(segments, Terms, []), + init_clean(RecoveredCounts, State); + false -> init_dirty(CleanShutdown, ContainsCheckFun, State) + end, + {Count, Terms, State1}. + +terminate(Terms, State) -> + {SegmentCounts, State1 = #qistate { dir = Dir }} = terminate(State), + store_clean_shutdown([{segments, SegmentCounts} | Terms], Dir), + State1. + +delete_and_terminate(State) -> + {_SegmentCounts, State1 = #qistate { dir = Dir }} = terminate(State), + ok = rabbit_misc:recursive_delete([Dir]), + State1. + +publish(Guid, SeqId, IsPersistent, State) when is_binary(Guid) -> + ?GUID_BYTES = size(Guid), + {JournalHdl, State1} = get_journal_handle(State), + ok = file_handle_cache:append( + JournalHdl, [<<(case IsPersistent of + true -> ?PUB_PERSIST_JPREFIX; + false -> ?PUB_TRANS_JPREFIX + end):?JPREFIX_BITS, SeqId:?SEQ_BITS>>, Guid]), + maybe_flush_journal(add_to_journal(SeqId, {Guid, IsPersistent}, State1)). + +deliver(SeqIds, State) -> + deliver_or_ack(del, SeqIds, State). + +ack(SeqIds, State) -> + deliver_or_ack(ack, SeqIds, State). + +sync([], State) -> + State; +sync(_SeqIds, State = #qistate { journal_handle = undefined }) -> + State; +sync(_SeqIds, State = #qistate { journal_handle = JournalHdl }) -> + %% The SeqIds here contains the SeqId of every publish and ack in + %% the transaction. Ideally we should go through these seqids and + %% only sync the journal if the pubs or acks appear in the + %% journal. However, this would be complex to do, and given that + %% the variable queue publishes and acks to the qi, and then + %% syncs, all in one operation, there is no possibility of the + %% seqids not being in the journal, provided the transaction isn't + %% emptied (handled above anyway). + ok = file_handle_cache:sync(JournalHdl), + State. + +flush(State = #qistate { dirty_count = 0 }) -> State; +flush(State) -> flush_journal(State). + +read(StartEnd, StartEnd, State) -> + {[], State}; +read(Start, End, State = #qistate { segments = Segments, + dir = Dir }) when Start =< End -> + %% Start is inclusive, End is exclusive. + LowerB = {StartSeg, _StartRelSeq} = seq_id_to_seg_and_rel_seq_id(Start), + UpperB = {EndSeg, _EndRelSeq} = seq_id_to_seg_and_rel_seq_id(End - 1), + {Messages, Segments1} = + lists:foldr(fun (Seg, Acc) -> + read_bounded_segment(Seg, LowerB, UpperB, Acc, Dir) + end, {[], Segments}, lists:seq(StartSeg, EndSeg)), + {Messages, State #qistate { segments = Segments1 }}. + +next_segment_boundary(SeqId) -> + {Seg, _RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId), + reconstruct_seq_id(Seg + 1, 0). + +bounds(State = #qistate { segments = Segments }) -> + %% This is not particularly efficient, but only gets invoked on + %% queue initialisation. + SegNums = lists:sort(segment_nums(Segments)), + %% Don't bother trying to figure out the lowest seq_id, merely the + %% seq_id of the start of the lowest segment. That seq_id may not + %% actually exist, but that's fine. The important thing is that + %% the segment exists and the seq_id reported is on a segment + %% boundary. + %% + %% We also don't really care about the max seq_id. Just start the + %% next segment: it makes life much easier. + %% + %% SegNums is sorted, ascending. + {LowSeqId, NextSeqId} = + case SegNums of + [] -> {0, 0}; + [MinSeg|_] -> {reconstruct_seq_id(MinSeg, 0), + reconstruct_seq_id(1 + lists:last(SegNums), 0)} + end, + {LowSeqId, NextSeqId, State}. + +recover(DurableQueues) -> + DurableDict = dict:from_list([ {queue_name_to_dir_name(Queue), Queue} || + Queue <- DurableQueues ]), + QueuesDir = queues_dir(), + Directories = case file:list_dir(QueuesDir) of + {ok, Entries} -> [ Entry || Entry <- Entries, + filelib:is_dir( + filename:join( + QueuesDir, Entry)) ]; + {error, enoent} -> [] + end, + DurableDirectories = sets:from_list(dict:fetch_keys(DurableDict)), + {DurableQueueNames, DurableTerms} = + lists:foldl( + fun (QueueDir, {DurableAcc, TermsAcc}) -> + case sets:is_element(QueueDir, DurableDirectories) of + true -> + TermsAcc1 = + case read_shutdown_terms( + filename:join(QueuesDir, QueueDir)) of + {error, _} -> TermsAcc; + {ok, Terms} -> [Terms | TermsAcc] + end, + {[dict:fetch(QueueDir, DurableDict) | DurableAcc], + TermsAcc1}; + false -> + Dir = filename:join(queues_dir(), QueueDir), + ok = rabbit_misc:recursive_delete([Dir]), + {DurableAcc, TermsAcc} + end + end, {[], []}, Directories), + {DurableTerms, {fun queue_index_walker/1, {start, DurableQueueNames}}}. + +%%---------------------------------------------------------------------------- +%% startup and shutdown +%%---------------------------------------------------------------------------- + +blank_state(QueueName, EnsureFresh) -> + StrName = queue_name_to_dir_name(QueueName), + Dir = filename:join(queues_dir(), StrName), + ok = case EnsureFresh of + true -> false = filelib:is_file(Dir), %% is_file == is file or dir + ok; + false -> ok + end, + ok = filelib:ensure_dir(filename:join(Dir, "nothing")), + {ok, MaxJournal} = + application:get_env(rabbit, queue_index_max_journal_entries), + #qistate { dir = Dir, + segments = segments_new(), + journal_handle = undefined, + dirty_count = 0, + max_journal_entries = MaxJournal }. + +detect_clean_shutdown(Dir) -> + case file:delete(filename:join(Dir, ?CLEAN_FILENAME)) of + ok -> true; + {error, enoent} -> false + end. + +read_shutdown_terms(Dir) -> + rabbit_misc:read_term_file(filename:join(Dir, ?CLEAN_FILENAME)). + +store_clean_shutdown(Terms, Dir) -> + rabbit_misc:write_term_file(filename:join(Dir, ?CLEAN_FILENAME), Terms). + +init_clean(RecoveredCounts, State) -> + %% Load the journal. Since this is a clean recovery this (almost) + %% gets us back to where we were on shutdown. + State1 = #qistate { dir = Dir, segments = Segments } = load_journal(State), + %% The journal loading only creates records for segments touched + %% by the journal, and the counts are based on the journal entries + %% only. We need *complete* counts for *all* segments. By an + %% amazing coincidence we stored that information on shutdown. + Segments1 = + lists:foldl( + fun ({Seg, UnackedCount}, SegmentsN) -> + Segment = segment_find_or_new(Seg, Dir, SegmentsN), + segment_store(Segment #segment { unacked = UnackedCount }, + SegmentsN) + end, Segments, RecoveredCounts), + %% the counts above include transient messages, which would be the + %% wrong thing to return + {undefined, State1 # qistate { segments = Segments1 }}. + +init_dirty(CleanShutdown, ContainsCheckFun, State) -> + %% Recover the journal completely. This will also load segments + %% which have entries in the journal and remove duplicates. The + %% counts will correctly reflect the combination of the segment + %% and the journal. + State1 = #qistate { dir = Dir, segments = Segments } = + recover_journal(State), + {Segments1, Count} = + %% Load each segment in turn and filter out messages that are + %% not in the msg_store, by adding acks to the journal. These + %% acks only go to the RAM journal as it doesn't matter if we + %% lose them. Also mark delivered if not clean shutdown. Also + %% find the number of unacked messages. + lists:foldl( + fun (Seg, {Segments2, CountAcc}) -> + Segment = #segment { unacked = UnackedCount } = + recover_segment(ContainsCheckFun, CleanShutdown, + segment_find_or_new(Seg, Dir, Segments2)), + {segment_store(Segment, Segments2), CountAcc + UnackedCount} + end, {Segments, 0}, all_segment_nums(State1)), + %% Unconditionally flush since the dirty_count doesn't get updated + %% by the above foldl. + State2 = flush_journal(State1 #qistate { segments = Segments1 }), + {Count, State2}. + +terminate(State = #qistate { journal_handle = JournalHdl, + segments = Segments }) -> + ok = case JournalHdl of + undefined -> ok; + _ -> file_handle_cache:close(JournalHdl) + end, + SegmentCounts = + segment_fold( + fun (#segment { num = Seg, unacked = UnackedCount }, Acc) -> + [{Seg, UnackedCount} | Acc] + end, [], Segments), + {SegmentCounts, State #qistate { journal_handle = undefined, + segments = undefined }}. + +recover_segment(ContainsCheckFun, CleanShutdown, + Segment = #segment { journal_entries = JEntries }) -> + {SegEntries, UnackedCount} = load_segment(false, Segment), + {SegEntries1, UnackedCountDelta} = + segment_plus_journal(SegEntries, JEntries), + array:sparse_foldl( + fun (RelSeq, {{Guid, _IsPersistent}, Del, no_ack}, Segment1) -> + recover_message(ContainsCheckFun(Guid), CleanShutdown, + Del, RelSeq, Segment1) + end, + Segment #segment { unacked = UnackedCount + UnackedCountDelta }, + SegEntries1). + +recover_message( true, true, _Del, _RelSeq, Segment) -> + Segment; +recover_message( true, false, del, _RelSeq, Segment) -> + Segment; +recover_message( true, false, no_del, RelSeq, Segment) -> + add_to_journal(RelSeq, del, Segment); +recover_message(false, _, del, RelSeq, Segment) -> + add_to_journal(RelSeq, ack, Segment); +recover_message(false, _, no_del, RelSeq, Segment) -> + add_to_journal(RelSeq, ack, add_to_journal(RelSeq, del, Segment)). + +queue_name_to_dir_name(Name = #resource { kind = queue }) -> + <<Num:128>> = erlang:md5(term_to_binary(Name)), + lists:flatten(io_lib:format("~.36B", [Num])). + +queues_dir() -> + filename:join(rabbit_mnesia:dir(), "queues"). + +%%---------------------------------------------------------------------------- +%% msg store startup delta function +%%---------------------------------------------------------------------------- + +queue_index_walker({start, DurableQueues}) when is_list(DurableQueues) -> + {ok, Gatherer} = gatherer:start_link(), + [begin + ok = gatherer:fork(Gatherer), + ok = worker_pool:submit_async( + fun () -> queue_index_walker_reader(QueueName, Gatherer) + end) + end || QueueName <- DurableQueues], + queue_index_walker({next, Gatherer}); + +queue_index_walker({next, Gatherer}) when is_pid(Gatherer) -> + case gatherer:out(Gatherer) of + empty -> + ok = gatherer:stop(Gatherer), + ok = rabbit_misc:unlink_and_capture_exit(Gatherer), + finished; + {value, {Guid, Count}} -> + {Guid, Count, {next, Gatherer}} + end. + +queue_index_walker_reader(QueueName, Gatherer) -> + State = #qistate { segments = Segments, dir = Dir } = + recover_journal(blank_state(QueueName, false)), + [ok = segment_entries_foldr( + fun (_RelSeq, {{Guid, true}, _IsDelivered, no_ack}, ok) -> + gatherer:in(Gatherer, {Guid, 1}); + (_RelSeq, _Value, Acc) -> + Acc + end, ok, segment_find_or_new(Seg, Dir, Segments)) || + Seg <- all_segment_nums(State)], + {_SegmentCounts, _State} = terminate(State), + ok = gatherer:finish(Gatherer). + +%%---------------------------------------------------------------------------- +%% journal manipulation +%%---------------------------------------------------------------------------- + +add_to_journal(SeqId, Action, State = #qistate { dirty_count = DCount, + segments = Segments, + dir = Dir }) -> + {Seg, RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId), + Segment = segment_find_or_new(Seg, Dir, Segments), + Segment1 = add_to_journal(RelSeq, Action, Segment), + State #qistate { dirty_count = DCount + 1, + segments = segment_store(Segment1, Segments) }; + +add_to_journal(RelSeq, Action, + Segment = #segment { journal_entries = JEntries, + unacked = UnackedCount }) -> + Segment1 = Segment #segment { + journal_entries = add_to_journal(RelSeq, Action, JEntries) }, + case Action of + del -> Segment1; + ack -> Segment1 #segment { unacked = UnackedCount - 1 }; + ?PUB -> Segment1 #segment { unacked = UnackedCount + 1 } + end; + +add_to_journal(RelSeq, Action, JEntries) -> + Val = case array:get(RelSeq, JEntries) of + undefined -> + case Action of + ?PUB -> {Action, no_del, no_ack}; + del -> {no_pub, del, no_ack}; + ack -> {no_pub, no_del, ack} + end; + ({Pub, no_del, no_ack}) when Action == del -> + {Pub, del, no_ack}; + ({Pub, Del, no_ack}) when Action == ack -> + {Pub, Del, ack} + end, + array:set(RelSeq, Val, JEntries). + +maybe_flush_journal(State = #qistate { dirty_count = DCount, + max_journal_entries = MaxJournal }) + when DCount > MaxJournal -> + flush_journal(State); +maybe_flush_journal(State) -> + State. + +flush_journal(State = #qistate { segments = Segments }) -> + Segments1 = + segment_fold( + fun (#segment { unacked = 0, path = Path }, SegmentsN) -> + case filelib:is_file(Path) of + true -> ok = file:delete(Path); + false -> ok + end, + SegmentsN; + (#segment {} = Segment, SegmentsN) -> + segment_store(append_journal_to_segment(Segment), SegmentsN) + end, segments_new(), Segments), + {JournalHdl, State1} = + get_journal_handle(State #qistate { segments = Segments1 }), + ok = file_handle_cache:clear(JournalHdl), + State1 #qistate { dirty_count = 0 }. + +append_journal_to_segment(#segment { journal_entries = JEntries, + path = Path } = Segment) -> + case array:sparse_size(JEntries) of + 0 -> Segment; + _ -> {ok, Hdl} = file_handle_cache:open(Path, [write | ?READ_MODE], + [{write_buffer, infinity}]), + array:sparse_foldl(fun write_entry_to_segment/3, Hdl, JEntries), + ok = file_handle_cache:close(Hdl), + Segment #segment { journal_entries = array_new() } + end. + +get_journal_handle(State = #qistate { journal_handle = undefined, + dir = Dir }) -> + Path = filename:join(Dir, ?JOURNAL_FILENAME), + {ok, Hdl} = file_handle_cache:open(Path, [write | ?READ_MODE], + [{write_buffer, infinity}]), + {Hdl, State #qistate { journal_handle = Hdl }}; +get_journal_handle(State = #qistate { journal_handle = Hdl }) -> + {Hdl, State}. + +%% Loading Journal. This isn't idempotent and will mess up the counts +%% if you call it more than once on the same state. Assumes the counts +%% are 0 to start with. +load_journal(State) -> + {JournalHdl, State1} = get_journal_handle(State), + {ok, 0} = file_handle_cache:position(JournalHdl, 0), + load_journal_entries(State1). + +%% ditto +recover_journal(State) -> + State1 = #qistate { segments = Segments } = load_journal(State), + Segments1 = + segment_map( + fun (Segment = #segment { journal_entries = JEntries, + unacked = UnackedCountInJournal }) -> + %% We want to keep ack'd entries in so that we can + %% remove them if duplicates are in the journal. The + %% counts here are purely from the segment itself. + {SegEntries, UnackedCountInSeg} = load_segment(true, Segment), + {JEntries1, UnackedCountDuplicates} = + journal_minus_segment(JEntries, SegEntries), + Segment #segment { journal_entries = JEntries1, + unacked = (UnackedCountInJournal + + UnackedCountInSeg - + UnackedCountDuplicates) } + end, Segments), + State1 #qistate { segments = Segments1 }. + +load_journal_entries(State = #qistate { journal_handle = Hdl }) -> + case file_handle_cache:read(Hdl, ?SEQ_BYTES) of + {ok, <<Prefix:?JPREFIX_BITS, SeqId:?SEQ_BITS>>} -> + case Prefix of + ?DEL_JPREFIX -> + load_journal_entries(add_to_journal(SeqId, del, State)); + ?ACK_JPREFIX -> + load_journal_entries(add_to_journal(SeqId, ack, State)); + _ -> + case file_handle_cache:read(Hdl, ?GUID_BYTES) of + {ok, <<GuidNum:?GUID_BITS>>} -> + %% work around for binary data + %% fragmentation. See + %% rabbit_msg_file:read_next/2 + <<Guid:?GUID_BYTES/binary>> = + <<GuidNum:?GUID_BITS>>, + Publish = {Guid, case Prefix of + ?PUB_PERSIST_JPREFIX -> true; + ?PUB_TRANS_JPREFIX -> false + end}, + load_journal_entries( + add_to_journal(SeqId, Publish, State)); + _ErrOrEoF -> %% err, we've lost at least a publish + State + end + end; + _ErrOrEoF -> State + end. + +deliver_or_ack(_Kind, [], State) -> + State; +deliver_or_ack(Kind, SeqIds, State) -> + JPrefix = case Kind of ack -> ?ACK_JPREFIX; del -> ?DEL_JPREFIX end, + {JournalHdl, State1} = get_journal_handle(State), + ok = file_handle_cache:append( + JournalHdl, + [<<JPrefix:?JPREFIX_BITS, SeqId:?SEQ_BITS>> || SeqId <- SeqIds]), + maybe_flush_journal(lists:foldl(fun (SeqId, StateN) -> + add_to_journal(SeqId, Kind, StateN) + end, State1, SeqIds)). + +%%---------------------------------------------------------------------------- +%% segment manipulation +%%---------------------------------------------------------------------------- + +seq_id_to_seg_and_rel_seq_id(SeqId) -> + { SeqId div ?SEGMENT_ENTRY_COUNT, SeqId rem ?SEGMENT_ENTRY_COUNT }. + +reconstruct_seq_id(Seg, RelSeq) -> + (Seg * ?SEGMENT_ENTRY_COUNT) + RelSeq. + +all_segment_nums(#qistate { dir = Dir, segments = Segments }) -> + lists:sort( + sets:to_list( + lists:foldl( + fun (SegName, Set) -> + sets:add_element( + list_to_integer( + lists:takewhile(fun (C) -> $0 =< C andalso C =< $9 end, + SegName)), Set) + end, sets:from_list(segment_nums(Segments)), + filelib:wildcard("*" ++ ?SEGMENT_EXTENSION, Dir)))). + +segment_find_or_new(Seg, Dir, Segments) -> + case segment_find(Seg, Segments) of + {ok, Segment} -> Segment; + error -> SegName = integer_to_list(Seg) ++ ?SEGMENT_EXTENSION, + Path = filename:join(Dir, SegName), + #segment { num = Seg, + path = Path, + journal_entries = array_new(), + unacked = 0 } + end. + +segment_find(Seg, {_Segments, [Segment = #segment { num = Seg } |_]}) -> + {ok, Segment}; %% 1 or (2, matches head) +segment_find(Seg, {_Segments, [_, Segment = #segment { num = Seg }]}) -> + {ok, Segment}; %% 2, matches tail +segment_find(Seg, {Segments, _}) -> %% no match + dict:find(Seg, Segments). + +segment_store(Segment = #segment { num = Seg }, %% 1 or (2, matches head) + {Segments, [#segment { num = Seg } | Tail]}) -> + {Segments, [Segment | Tail]}; +segment_store(Segment = #segment { num = Seg }, %% 2, matches tail + {Segments, [SegmentA, #segment { num = Seg }]}) -> + {Segments, [Segment, SegmentA]}; +segment_store(Segment = #segment { num = Seg }, {Segments, []}) -> + {dict:erase(Seg, Segments), [Segment]}; +segment_store(Segment = #segment { num = Seg }, {Segments, [SegmentA]}) -> + {dict:erase(Seg, Segments), [Segment, SegmentA]}; +segment_store(Segment = #segment { num = Seg }, + {Segments, [SegmentA, SegmentB]}) -> + {dict:store(SegmentB#segment.num, SegmentB, dict:erase(Seg, Segments)), + [Segment, SegmentA]}. + +segment_fold(Fun, Acc, {Segments, CachedSegments}) -> + dict:fold(fun (_Seg, Segment, Acc1) -> Fun(Segment, Acc1) end, + lists:foldl(Fun, Acc, CachedSegments), Segments). + +segment_map(Fun, {Segments, CachedSegments}) -> + {dict:map(fun (_Seg, Segment) -> Fun(Segment) end, Segments), + lists:map(Fun, CachedSegments)}. + +segment_nums({Segments, CachedSegments}) -> + lists:map(fun (#segment { num = Num }) -> Num end, CachedSegments) ++ + dict:fetch_keys(Segments). + +segments_new() -> + {dict:new(), []}. + +write_entry_to_segment(_RelSeq, {?PUB, del, ack}, Hdl) -> + Hdl; +write_entry_to_segment(RelSeq, {Pub, Del, Ack}, Hdl) -> + ok = case Pub of + no_pub -> + ok; + {Guid, IsPersistent} -> + file_handle_cache:append( + Hdl, [<<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS, + (bool_to_int(IsPersistent)):1, + RelSeq:?REL_SEQ_BITS>>, Guid]) + end, + ok = case {Del, Ack} of + {no_del, no_ack} -> + ok; + _ -> + Binary = <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS, + RelSeq:?REL_SEQ_BITS>>, + file_handle_cache:append( + Hdl, case {Del, Ack} of + {del, ack} -> [Binary, Binary]; + _ -> Binary + end) + end, + Hdl. + +read_bounded_segment(Seg, {StartSeg, StartRelSeq}, {EndSeg, EndRelSeq}, + {Messages, Segments}, Dir) -> + Segment = segment_find_or_new(Seg, Dir, Segments), + {segment_entries_foldr( + fun (RelSeq, {{Guid, IsPersistent}, IsDelivered, no_ack}, Acc) + when (Seg > StartSeg orelse StartRelSeq =< RelSeq) andalso + (Seg < EndSeg orelse EndRelSeq >= RelSeq) -> + [ {Guid, reconstruct_seq_id(StartSeg, RelSeq), + IsPersistent, IsDelivered == del} | Acc ]; + (_RelSeq, _Value, Acc) -> + Acc + end, Messages, Segment), + segment_store(Segment, Segments)}. + +segment_entries_foldr(Fun, Init, + Segment = #segment { journal_entries = JEntries }) -> + {SegEntries, _UnackedCount} = load_segment(false, Segment), + {SegEntries1, _UnackedCountD} = segment_plus_journal(SegEntries, JEntries), + array:sparse_foldr(Fun, Init, SegEntries1). + +%% Loading segments +%% +%% Does not do any combining with the journal at all. +load_segment(KeepAcked, #segment { path = Path }) -> + case filelib:is_file(Path) of + false -> {array_new(), 0}; + true -> {ok, Hdl} = file_handle_cache:open(Path, ?READ_MODE, []), + {ok, 0} = file_handle_cache:position(Hdl, bof), + Res = load_segment_entries(KeepAcked, Hdl, array_new(), 0), + ok = file_handle_cache:close(Hdl), + Res + end. + +load_segment_entries(KeepAcked, Hdl, SegEntries, UnackedCount) -> + case file_handle_cache:read(Hdl, ?REL_SEQ_ONLY_ENTRY_LENGTH_BYTES) of + {ok, <<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS, + IsPersistentNum:1, RelSeq:?REL_SEQ_BITS>>} -> + %% because we specify /binary, and binaries are complete + %% bytes, the size spec is in bytes, not bits. + {ok, Guid} = file_handle_cache:read(Hdl, ?GUID_BYTES), + Obj = {{Guid, 1 == IsPersistentNum}, no_del, no_ack}, + SegEntries1 = array:set(RelSeq, Obj, SegEntries), + load_segment_entries(KeepAcked, Hdl, SegEntries1, + UnackedCount + 1); + {ok, <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS, + RelSeq:?REL_SEQ_BITS>>} -> + {UnackedCountDelta, SegEntries1} = + case array:get(RelSeq, SegEntries) of + {Pub, no_del, no_ack} -> + { 0, array:set(RelSeq, {Pub, del, no_ack}, SegEntries)}; + {Pub, del, no_ack} when KeepAcked -> + {-1, array:set(RelSeq, {Pub, del, ack}, SegEntries)}; + {_Pub, del, no_ack} -> + {-1, array:reset(RelSeq, SegEntries)} + end, + load_segment_entries(KeepAcked, Hdl, SegEntries1, + UnackedCount + UnackedCountDelta); + _ErrOrEoF -> + {SegEntries, UnackedCount} + end. + +array_new() -> + array:new([{default, undefined}, fixed, {size, ?SEGMENT_ENTRY_COUNT}]). + +bool_to_int(true ) -> 1; +bool_to_int(false) -> 0. + +%%---------------------------------------------------------------------------- +%% journal & segment combination +%%---------------------------------------------------------------------------- + +%% Combine what we have just read from a segment file with what we're +%% holding for that segment in memory. There must be no duplicates. +segment_plus_journal(SegEntries, JEntries) -> + array:sparse_foldl( + fun (RelSeq, JObj, {SegEntriesOut, AdditionalUnacked}) -> + SegEntry = array:get(RelSeq, SegEntriesOut), + {Obj, AdditionalUnackedDelta} = + segment_plus_journal1(SegEntry, JObj), + {case Obj of + undefined -> array:reset(RelSeq, SegEntriesOut); + _ -> array:set(RelSeq, Obj, SegEntriesOut) + end, + AdditionalUnacked + AdditionalUnackedDelta} + end, {SegEntries, 0}, JEntries). + +%% Here, the result is a tuple with the first element containing the +%% item which we may be adding to (for items only in the journal), +%% modifying in (bits in both), or, when returning 'undefined', +%% erasing from (ack in journal, not segment) the segment array. The +%% other element of the tuple is the delta for AdditionalUnacked. +segment_plus_journal1(undefined, {?PUB, no_del, no_ack} = Obj) -> + {Obj, 1}; +segment_plus_journal1(undefined, {?PUB, del, no_ack} = Obj) -> + {Obj, 1}; +segment_plus_journal1(undefined, {?PUB, del, ack}) -> + {undefined, 0}; + +segment_plus_journal1({?PUB = Pub, no_del, no_ack}, {no_pub, del, no_ack}) -> + {{Pub, del, no_ack}, 0}; +segment_plus_journal1({?PUB, no_del, no_ack}, {no_pub, del, ack}) -> + {undefined, -1}; +segment_plus_journal1({?PUB, del, no_ack}, {no_pub, no_del, ack}) -> + {undefined, -1}. + +%% Remove from the journal entries for a segment, items that are +%% duplicates of entries found in the segment itself. Used on start up +%% to clean up the journal. +journal_minus_segment(JEntries, SegEntries) -> + array:sparse_foldl( + fun (RelSeq, JObj, {JEntriesOut, UnackedRemoved}) -> + SegEntry = array:get(RelSeq, SegEntries), + {Obj, UnackedRemovedDelta} = + journal_minus_segment1(JObj, SegEntry), + {case Obj of + keep -> JEntriesOut; + undefined -> array:reset(RelSeq, JEntriesOut); + _ -> array:set(RelSeq, Obj, JEntriesOut) + end, + UnackedRemoved + UnackedRemovedDelta} + end, {JEntries, 0}, JEntries). + +%% Here, the result is a tuple with the first element containing the +%% item we are adding to or modifying in the (initially fresh) journal +%% array. If the item is 'undefined' we leave the journal array +%% alone. The other element of the tuple is the deltas for +%% UnackedRemoved. + +%% Both the same. Must be at least the publish +journal_minus_segment1({?PUB, _Del, no_ack} = Obj, Obj) -> + {undefined, 1}; +journal_minus_segment1({?PUB, _Del, ack} = Obj, Obj) -> + {undefined, 0}; + +%% Just publish in journal +journal_minus_segment1({?PUB, no_del, no_ack}, undefined) -> + {keep, 0}; + +%% Publish and deliver in journal +journal_minus_segment1({?PUB, del, no_ack}, undefined) -> + {keep, 0}; +journal_minus_segment1({?PUB = Pub, del, no_ack}, {Pub, no_del, no_ack}) -> + {{no_pub, del, no_ack}, 1}; + +%% Publish, deliver and ack in journal +journal_minus_segment1({?PUB, del, ack}, undefined) -> + {keep, 0}; +journal_minus_segment1({?PUB = Pub, del, ack}, {Pub, no_del, no_ack}) -> + {{no_pub, del, ack}, 1}; +journal_minus_segment1({?PUB = Pub, del, ack}, {Pub, del, no_ack}) -> + {{no_pub, no_del, ack}, 1}; + +%% Just deliver in journal +journal_minus_segment1({no_pub, del, no_ack}, {?PUB, no_del, no_ack}) -> + {keep, 0}; +journal_minus_segment1({no_pub, del, no_ack}, {?PUB, del, no_ack}) -> + {undefined, 0}; + +%% Just ack in journal +journal_minus_segment1({no_pub, no_del, ack}, {?PUB, del, no_ack}) -> + {keep, 0}; +journal_minus_segment1({no_pub, no_del, ack}, {?PUB, del, ack}) -> + {undefined, -1}; + +%% Deliver and ack in journal +journal_minus_segment1({no_pub, del, ack}, {?PUB, no_del, no_ack}) -> + {keep, 0}; +journal_minus_segment1({no_pub, del, ack}, {?PUB, del, no_ack}) -> + {{no_pub, no_del, ack}, 0}; +journal_minus_segment1({no_pub, del, ack}, {?PUB, del, ack}) -> + {undefined, -1}. diff --git a/src/rabbit_reader.erl b/src/rabbit_reader.erl index 3e03ae0c..9603faf5 100644 --- a/src/rabbit_reader.erl +++ b/src/rabbit_reader.erl @@ -41,7 +41,7 @@ -export([server_properties/0]). --export([analyze_frame/2]). +-export([analyze_frame/3]). -import(gen_tcp). -import(fprof). @@ -53,9 +53,7 @@ -define(CLOSING_TIMEOUT, 1). -define(CHANNEL_TERMINATION_TIMEOUT, 3). -define(SILENT_CLOSE_DELAY, 3). -%% set to zero once QPid fix their negotiation --define(FRAME_MAX, 131072). --define(CHANNEL_MAX, 0). +-define(FRAME_MAX, 131072). %% set to zero once QPid fix their negotiation %--------------------------------------------------------------------------- @@ -64,8 +62,8 @@ -define(INFO_KEYS, [pid, address, port, peer_address, peer_port, - recv_oct, recv_cnt, send_oct, send_cnt, send_pend, - state, channels, user, vhost, timeout, frame_max, client_properties]). + recv_oct, recv_cnt, send_oct, send_cnt, send_pend, state, channels, + protocol, user, vhost, timeout, frame_max, client_properties]). %% connection lifecycle %% @@ -140,11 +138,11 @@ -ifdef(use_specs). --spec(info_keys/0 :: () -> [info_key()]). --spec(info/1 :: (pid()) -> [info()]). --spec(info/2 :: (pid(), [info_key()]) -> [info()]). +-spec(info_keys/0 :: () -> [rabbit_types:info_key()]). +-spec(info/1 :: (pid()) -> [rabbit_types:info()]). +-spec(info/2 :: (pid(), [rabbit_types:info_key()]) -> [rabbit_types:info()]). -spec(shutdown/2 :: (pid(), string()) -> 'ok'). --spec(server_properties/0 :: () -> amqp_table()). +-spec(server_properties/0 :: () -> rabbit_framing:amqp_table()). -endif. @@ -242,7 +240,7 @@ start_connection(Parent, Deb, Sock, SockTransform) -> erlang:send_after(?HANDSHAKE_TIMEOUT * 1000, self(), handshake_timeout), ProfilingValue = setup_profiling(), - {ok, Collector} = rabbit_reader_queue_collector:start_link(), + {ok, Collector} = rabbit_queue_collector:start_link(), try mainloop(Parent, Deb, switch_callback( #v1{sock = ClientSock, @@ -251,7 +249,8 @@ start_connection(Parent, Deb, Sock, SockTransform) -> timeout_sec = ?HANDSHAKE_TIMEOUT, frame_max = ?FRAME_MIN_SIZE, vhost = none, - client_properties = none}, + client_properties = none, + protocol = none}, callback = uninitialized_callback, recv_ref = none, connection_state = pre_init, @@ -274,7 +273,7 @@ start_connection(Parent, Deb, Sock, SockTransform) -> %% %% gen_tcp:close(ClientSock), teardown_profiling(ProfilingValue), - rabbit_reader_queue_collector:shutdown(Collector), + rabbit_queue_collector:shutdown(Collector), rabbit_misc:unlink_and_capture_exit(Collector) end, done. @@ -439,24 +438,28 @@ wait_for_channel_termination(N, TimerRef) -> end. maybe_close(State = #v1{connection_state = closing, - queue_collector = Collector}) -> + queue_collector = Collector, + connection = #connection{protocol = Protocol}, + sock = Sock}) -> case all_channels() of [] -> %% Spec says "Exclusive queues may only be accessed by the current %% connection, and are deleted when that connection closes." %% This does not strictly imply synchrony, but in practice it seems %% to be what people assume. - rabbit_reader_queue_collector:delete_all(Collector), - ok = send_on_channel0(State#v1.sock, #'connection.close_ok'{}), + rabbit_queue_collector:delete_all(Collector), + ok = send_on_channel0(Sock, #'connection.close_ok'{}, Protocol), close_connection(State); _ -> State end; maybe_close(State) -> State. -handle_frame(Type, 0, Payload, State = #v1{connection_state = CS}) +handle_frame(Type, 0, Payload, + State = #v1{connection_state = CS, + connection = #connection{protocol = Protocol}}) when CS =:= closing; CS =:= closed -> - case analyze_frame(Type, Payload) of + case analyze_frame(Type, Payload, Protocol) of {method, MethodName, FieldsBin} -> handle_method0(MethodName, FieldsBin, State); _Other -> State @@ -464,16 +467,18 @@ handle_frame(Type, 0, Payload, State = #v1{connection_state = CS}) handle_frame(_Type, _Channel, _Payload, State = #v1{connection_state = CS}) when CS =:= closing; CS =:= closed -> State; -handle_frame(Type, 0, Payload, State) -> - case analyze_frame(Type, Payload) of +handle_frame(Type, 0, Payload, + State = #v1{connection = #connection{protocol = Protocol}}) -> + case analyze_frame(Type, Payload, Protocol) of error -> throw({unknown_frame, 0, Type, Payload}); heartbeat -> State; {method, MethodName, FieldsBin} -> handle_method0(MethodName, FieldsBin, State); Other -> throw({unexpected_frame_on_channel0, Other}) end; -handle_frame(Type, Channel, Payload, State) -> - case analyze_frame(Type, Payload) of +handle_frame(Type, Channel, Payload, + State = #v1{connection = #connection{protocol = Protocol}}) -> + case analyze_frame(Type, Payload, Protocol) of error -> throw({unknown_frame, Channel, Type, Payload}); heartbeat -> throw({unexpected_heartbeat_frame, Channel}); AnalyzedFrame -> @@ -516,15 +521,20 @@ handle_frame(Type, Channel, Payload, State) -> end end. -analyze_frame(?FRAME_METHOD, <<ClassId:16, MethodId:16, MethodFields/binary>>) -> - {method, rabbit_framing:lookup_method_name({ClassId, MethodId}), MethodFields}; -analyze_frame(?FRAME_HEADER, <<ClassId:16, Weight:16, BodySize:64, Properties/binary>>) -> +analyze_frame(?FRAME_METHOD, + <<ClassId:16, MethodId:16, MethodFields/binary>>, + Protocol) -> + MethodName = Protocol:lookup_method_name({ClassId, MethodId}), + {method, MethodName, MethodFields}; +analyze_frame(?FRAME_HEADER, + <<ClassId:16, Weight:16, BodySize:64, Properties/binary>>, + _Protocol) -> {content_header, ClassId, Weight, BodySize, Properties}; -analyze_frame(?FRAME_BODY, Body) -> +analyze_frame(?FRAME_BODY, Body, _Protocol) -> {content_body, Body}; -analyze_frame(?FRAME_HEARTBEAT, <<>>) -> +analyze_frame(?FRAME_HEARTBEAT, <<>>, _Protocol) -> heartbeat; -analyze_frame(_Type, _Body) -> +analyze_frame(_Type, _Body, _Protocol) -> error. handle_input(frame_header, <<Type:8,Channel:16,PayloadSize:32>>, State) -> @@ -549,20 +559,21 @@ handle_input({frame_payload, Type, Channel, PayloadSize}, PayloadAndMarker, Stat %% %% * The server MUST provide a protocol version that is lower than or %% equal to that requested by the client in the protocol header. -%% -%% We support 0-9-1 and 0-9, so by the first rule, we must close the -%% connection if we're sent anything else. Then, we must send that -%% version in the Connection.start method. -handle_input(handshake, <<"AMQP",0,0,9,1>>, State) -> - %% 0-9-1 style protocol header. - protocol_negotiate(0, 9, 1, State); -handle_input(handshake, <<"AMQP",1,1,0,9>>, State) -> - %% 0-8 and 0-9 style protocol header; we support only 0-9 - protocol_negotiate(0, 9, 0, State); +handle_input(handshake, <<"AMQP", 0, 0, 9, 1>>, State) -> + start_connection({0, 9, 1}, rabbit_framing_amqp_0_9_1, State); + +handle_input(handshake, <<"AMQP", 1, 1, 0, 9>>, State) -> + start_connection({0, 9, 0}, rabbit_framing_amqp_0_9_1, State); + +%% the 0-8 spec, confusingly, defines the version as 8-0 +handle_input(handshake, <<"AMQP", 1, 1, 8, 0>>, State) -> + start_connection({8, 0, 0}, rabbit_framing_amqp_0_8, State); + +handle_input(handshake, <<"AMQP", A, B, C, D>>, #v1{sock = Sock}) -> + refuse_connection(Sock, {bad_version, A, B, C, D}); + handle_input(handshake, Other, #v1{sock = Sock}) -> - ok = inet_op(fun () -> rabbit_net:send( - Sock, <<"AMQP",0,0,9,1>>) end), - throw({bad_header, Other}); + refuse_connection(Sock, {bad_header, Other}); handle_input(Callback, Data, _State) -> throw({bad_input, Callback, Data}). @@ -570,27 +581,31 @@ handle_input(Callback, Data, _State) -> %% Offer a protocol version to the client. Connection.start only %% includes a major and minor version number, Luckily 0-9 and 0-9-1 %% are similar enough that clients will be happy with either. -protocol_negotiate(ProtocolMajor, ProtocolMinor, _ProtocolRevision, - State = #v1{sock = Sock, connection = Connection}) -> - ok = send_on_channel0( - Sock, - #'connection.start'{ - version_major = ProtocolMajor, - version_minor = ProtocolMinor, - server_properties = server_properties(), - mechanisms = <<"PLAIN AMQPLAIN">>, - locales = <<"en_US">> }), +start_connection({ProtocolMajor, ProtocolMinor, _ProtocolRevision}, + Protocol, + State = #v1{sock = Sock, connection = Connection}) -> + Start = #'connection.start'{ version_major = ProtocolMajor, + version_minor = ProtocolMinor, + server_properties = server_properties(), + mechanisms = <<"PLAIN AMQPLAIN">>, + locales = <<"en_US">> }, + ok = send_on_channel0(Sock, Start, Protocol), {State#v1{connection = Connection#connection{ - timeout_sec = ?NORMAL_TIMEOUT}, + timeout_sec = ?NORMAL_TIMEOUT, + protocol = Protocol}, connection_state = starting}, frame_header, 7}. +refuse_connection(Sock, Exception) -> + ok = inet_op(fun () -> rabbit_net:send(Sock, <<"AMQP",0,0,9,1>>) end), + throw(Exception). + %%-------------------------------------------------------------------------- -handle_method0(MethodName, FieldsBin, State) -> +handle_method0(MethodName, FieldsBin, + State = #v1{connection = #connection{protocol = Protocol}}) -> try - handle_method0(rabbit_framing:decode_method_fields( - MethodName, FieldsBin), + handle_method0(Protocol:decode_method_fields(MethodName, FieldsBin), State) catch exit:Reason -> CompleteReason = case Reason of @@ -612,34 +627,31 @@ handle_method0(#'connection.start_ok'{mechanism = Mechanism, response = Response, client_properties = ClientProperties}, State = #v1{connection_state = starting, - connection = Connection, + connection = Connection = + #connection{protocol = Protocol}, sock = Sock}) -> User = rabbit_access_control:check_login(Mechanism, Response), - ok = send_on_channel0( - Sock, - #'connection.tune'{channel_max = ?CHANNEL_MAX, + Tune = #'connection.tune'{channel_max = 0, frame_max = ?FRAME_MAX, - heartbeat = 0}), + heartbeat = 0}, + ok = send_on_channel0(Sock, Tune, Protocol), State#v1{connection_state = tuning, connection = Connection#connection{ user = User, client_properties = ClientProperties}}; -handle_method0(#'connection.tune_ok'{channel_max = _ChannelMax, - frame_max = FrameMax, +handle_method0(#'connection.tune_ok'{frame_max = FrameMax, heartbeat = ClientHeartbeat}, State = #v1{connection_state = tuning, connection = Connection, sock = Sock}) -> - if (FrameMax =< ?FRAME_MIN_SIZE) or + if (FrameMax /= 0) and (FrameMax < ?FRAME_MIN_SIZE) -> + rabbit_misc:protocol_error( + not_allowed, "frame_max=~w < ~w min size", + [FrameMax, ?FRAME_MIN_SIZE]); (?FRAME_MAX /= 0) and (FrameMax > ?FRAME_MAX) -> rabbit_misc:protocol_error( - mistuned, "peer sent tune_ok with invalid frame_max", []); - %% If we have a channel_max limit that the client wishes to - %% exceed, die as per spec. Not currently a problem, so we ignore - %% the client's channel_max parameter. -%% (?CHANNEL_MAX /= 0) and (ChannelMax > ?CHANNEL_MAX) -> -%% rabbit_misc:protocol_error( -%% mistuned, "peer sent tune_ok with invalid channel_max"); + not_allowed, "frame_max=~w > ~w max size", + [FrameMax, ?FRAME_MAX]); true -> rabbit_heartbeat:start_heartbeat(Sock, ClientHeartbeat), State#v1{connection_state = opening, @@ -647,27 +659,31 @@ handle_method0(#'connection.tune_ok'{channel_max = _ChannelMax, timeout_sec = ClientHeartbeat, frame_max = FrameMax}} end; + handle_method0(#'connection.open'{virtual_host = VHostPath}, + State = #v1{connection_state = opening, connection = Connection = #connection{ - user = User}, + user = User, + protocol = Protocol}, sock = Sock}) -> ok = rabbit_access_control:check_vhost_access(User, VHostPath), NewConnection = Connection#connection{vhost = VHostPath}, - ok = send_on_channel0( - Sock, - #'connection.open_ok'{deprecated_known_hosts = <<>>}), + ok = send_on_channel0(Sock, #'connection.open_ok'{}, Protocol), State#v1{connection_state = running, connection = NewConnection}; handle_method0(#'connection.close'{}, State = #v1{connection_state = running}) -> lists:foreach(fun rabbit_framing_channel:shutdown/1, all_channels()), maybe_close(State#v1{connection_state = closing}); -handle_method0(#'connection.close'{}, State = #v1{connection_state = CS}) +handle_method0(#'connection.close'{}, + State = #v1{connection_state = CS, + connection = #connection{protocol = Protocol}, + sock = Sock}) when CS =:= closing; CS =:= closed -> %% We're already closed or closing, so we don't need to cleanup %% anything. - ok = send_on_channel0(State#v1.sock, #'connection.close_ok'{}), + ok = send_on_channel0(Sock, #'connection.close_ok'{}, Protocol), State; handle_method0(#'connection.close_ok'{}, State = #v1{connection_state = closed}) -> @@ -680,8 +696,8 @@ handle_method0(_Method, #v1{connection_state = S}) -> rabbit_misc:protocol_error( channel_error, "unexpected method in connection state ~w", [S]). -send_on_channel0(Sock, Method) -> - ok = rabbit_writer:internal_send_command(Sock, 0, Method). +send_on_channel0(Sock, Method, Protocol) -> + ok = rabbit_writer:internal_send_command(Sock, 0, Method, Protocol). %%-------------------------------------------------------------------------- @@ -715,6 +731,10 @@ i(state, #v1{connection_state = S}) -> S; i(channels, #v1{}) -> length(all_channels()); +i(protocol, #v1{connection = #connection{protocol = none}}) -> + none; +i(protocol, #v1{connection = #connection{protocol = Protocol}}) -> + Protocol:version(); i(user, #v1{connection = #connection{user = #user{username = Username}}}) -> Username; i(user, #v1{connection = #connection{user = none}}) -> @@ -738,11 +758,13 @@ send_to_new_channel(Channel, AnalyzedFrame, #v1{sock = Sock, connection = #connection{ frame_max = FrameMax, user = #user{username = Username}, - vhost = VHost}} = State, - WriterPid = rabbit_writer:start(Sock, Channel, FrameMax), - ChPid = rabbit_framing_channel:start_link( - fun rabbit_channel:start_link/6, - [Channel, self(), WriterPid, Username, VHost, Collector]), + vhost = VHost, + protocol = Protocol}} = State, + {ok, WriterPid} = rabbit_writer:start(Sock, Channel, FrameMax, Protocol), + {ok, ChPid} = rabbit_framing_channel:start_link( + fun rabbit_channel:start_link/6, + [Channel, self(), WriterPid, Username, VHost, Collector], + Protocol), put({channel, Channel}, {chpid, ChPid}), put({chpid, ChPid}, {channel, Channel}), ok = rabbit_framing_channel:process(ChPid, AnalyzedFrame). @@ -758,25 +780,27 @@ handle_exception(State = #v1{connection_state = CS}, Channel, Reason) -> log_channel_error(CS, Channel, Reason), send_exception(State, Channel, Reason). -send_exception(State, Channel, Reason) -> - {ShouldClose, CloseChannel, CloseMethod} = map_exception(Channel, Reason), +send_exception(State = #v1{connection = #connection{protocol = Protocol}}, + Channel, Reason) -> + {ShouldClose, CloseChannel, CloseMethod} = + map_exception(Channel, Reason, Protocol), NewState = case ShouldClose of true -> terminate_channels(), close_connection(State); false -> close_channel(Channel, State) end, ok = rabbit_writer:internal_send_command( - NewState#v1.sock, CloseChannel, CloseMethod), + NewState#v1.sock, CloseChannel, CloseMethod, Protocol), NewState. -map_exception(Channel, Reason) -> +map_exception(Channel, Reason, Protocol) -> {SuggestedClose, ReplyCode, ReplyText, FailedMethod} = - lookup_amqp_exception(Reason), + lookup_amqp_exception(Reason, Protocol), ShouldClose = SuggestedClose or (Channel == 0), {ClassId, MethodId} = case FailedMethod of {_, _} -> FailedMethod; - none -> {0, 0}; - _ -> rabbit_framing:method_id(FailedMethod) + none -> {0, 0}; + _ -> Protocol:method_id(FailedMethod) end, {CloseChannel, CloseMethod} = case ShouldClose of @@ -791,22 +815,16 @@ map_exception(Channel, Reason) -> end, {ShouldClose, CloseChannel, CloseMethod}. -%% FIXME: this clause can go when we move to AMQP spec >=8.1 -lookup_amqp_exception(#amqp_error{name = precondition_failed, - explanation = Expl, - method = Method}) -> - ExplBin = amqp_exception_explanation(<<"PRECONDITION_FAILED">>, Expl), - {false, 406, ExplBin, Method}; lookup_amqp_exception(#amqp_error{name = Name, explanation = Expl, - method = Method}) -> - {ShouldClose, Code, Text} = rabbit_framing:lookup_amqp_exception(Name), + method = Method}, + Protocol) -> + {ShouldClose, Code, Text} = Protocol:lookup_amqp_exception(Name), ExplBin = amqp_exception_explanation(Text, Expl), {ShouldClose, Code, ExplBin, Method}; -lookup_amqp_exception(Other) -> +lookup_amqp_exception(Other, Protocol) -> rabbit_log:warning("Non-AMQP exit reason '~p'~n", [Other]), - {ShouldClose, Code, Text} = - rabbit_framing:lookup_amqp_exception(internal_error), + {ShouldClose, Code, Text} = Protocol:lookup_amqp_exception(internal_error), {ShouldClose, Code, Text, none}. amqp_exception_explanation(Text, Expl) -> diff --git a/src/rabbit_router.erl b/src/rabbit_router.erl index 5cd15a94..d50b9f31 100644 --- a/src/rabbit_router.erl +++ b/src/rabbit_router.erl @@ -41,7 +41,13 @@ -ifdef(use_specs). --spec(deliver/2 :: ([pid()], delivery()) -> {routing_result(), [pid()]}). +-export_type([routing_key/0, routing_result/0]). + +-type(routing_key() :: binary()). +-type(routing_result() :: 'routed' | 'unroutable' | 'not_delivered'). + +-spec(deliver/2 :: + ([pid()], rabbit_types:delivery()) -> {routing_result(), [pid()]}). -endif. @@ -90,13 +96,13 @@ match_routing_key(Name, RoutingKey) -> lookup_qpids(mnesia:dirty_select(rabbit_route, [{MatchHead, [], ['$1']}])). lookup_qpids(Queues) -> - sets:fold( + lists:foldl( fun (Key, Acc) -> case mnesia:dirty_read({rabbit_queue, Key}) of [#amqqueue{pid = QPid}] -> [QPid | Acc]; [] -> Acc end - end, [], sets:from_list(Queues)). + end, [], lists:usort(Queues)). %%-------------------------------------------------------------------- diff --git a/src/rabbit_sasl_report_file_h.erl b/src/rabbit_sasl_report_file_h.erl index 434cdae0..eb2037c2 100644 --- a/src/rabbit_sasl_report_file_h.erl +++ b/src/rabbit_sasl_report_file_h.erl @@ -33,7 +33,8 @@ -behaviour(gen_event). --export([init/1, handle_event/2, handle_call/2, handle_info/2, terminate/2, code_change/3]). +-export([init/1, handle_event/2, handle_call/2, handle_info/2, terminate/2, + code_change/3]). %% rabbit_sasl_report_file_h is a wrapper around the sasl_report_file_h %% module because the original's init/1 does not match properly diff --git a/src/rabbit_sup.erl b/src/rabbit_sup.erl index 2c5e5112..97613d17 100644 --- a/src/rabbit_sup.erl +++ b/src/rabbit_sup.erl @@ -34,7 +34,7 @@ -behaviour(supervisor). -export([start_link/0, start_child/1, start_child/2, start_child/3, - start_restartable_child/1, start_restartable_child/2]). + start_restartable_child/1, start_restartable_child/2, stop_child/1]). -export([init/1]). @@ -69,5 +69,11 @@ start_restartable_child(Mod, Args) -> transient, infinity, supervisor, [rabbit_restartable_sup]}), ok. +stop_child(ChildId) -> + case supervisor:terminate_child(?SERVER, ChildId) of + ok -> supervisor:delete_child(?SERVER, ChildId); + E -> E + end. + init([]) -> {ok, {{one_for_all, 0, 1}, []}}. diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl index fb4a3ad6..56aca1d6 100644 --- a/src/rabbit_tests.erl +++ b/src/rabbit_tests.erl @@ -44,6 +44,9 @@ -include("rabbit_framing.hrl"). -include_lib("kernel/include/file.hrl"). +-define(PERSISTENT_MSG_STORE, msg_store_persistent). +-define(TRANSIENT_MSG_STORE, msg_store_transient). + test_content_prop_roundtrip(Datum, Binary) -> Types = [element(1, E) || E <- Datum], Values = [element(2, E) || E <- Datum], @@ -51,9 +54,13 @@ test_content_prop_roundtrip(Datum, Binary) -> Binary = rabbit_binary_generator:encode_properties(Types, Values). %% assertion all_tests() -> + application:set_env(rabbit, file_handles_high_watermark, 10, infinity), + passed = test_backing_queue(), passed = test_priority_queue(), + passed = test_bpqueue(), passed = test_pg_local(), passed = test_unfold(), + passed = test_supervisor_delayed_restart(), passed = test_parsing(), passed = test_content_framing(), passed = test_topic_matching(), @@ -207,6 +214,143 @@ test_priority_queue(Q) -> priority_queue:to_list(Q), priority_queue_out_all(Q)}. +test_bpqueue() -> + Q = bpqueue:new(), + true = bpqueue:is_empty(Q), + 0 = bpqueue:len(Q), + [] = bpqueue:to_list(Q), + + Q1 = bpqueue_test(fun bpqueue:in/3, fun bpqueue:out/1, + fun bpqueue:to_list/1, + fun bpqueue:foldl/3, fun bpqueue:map_fold_filter_l/4), + Q2 = bpqueue_test(fun bpqueue:in_r/3, fun bpqueue:out_r/1, + fun (QR) -> lists:reverse( + [{P, lists:reverse(L)} || + {P, L} <- bpqueue:to_list(QR)]) + end, + fun bpqueue:foldr/3, fun bpqueue:map_fold_filter_r/4), + + [{foo, [1, 2]}, {bar, [3]}] = bpqueue:to_list(bpqueue:join(Q, Q1)), + [{bar, [3]}, {foo, [2, 1]}] = bpqueue:to_list(bpqueue:join(Q2, Q)), + [{foo, [1, 2]}, {bar, [3, 3]}, {foo, [2,1]}] = + bpqueue:to_list(bpqueue:join(Q1, Q2)), + + [{foo, [1, 2]}, {bar, [3]}, {foo, [1, 2]}, {bar, [3]}] = + bpqueue:to_list(bpqueue:join(Q1, Q1)), + + [{foo, [1, 2]}, {bar, [3]}] = + bpqueue:to_list( + bpqueue:from_list( + [{x, []}, {foo, [1]}, {y, []}, {foo, [2]}, {bar, [3]}, {z, []}])), + + [{undefined, [a]}] = bpqueue:to_list(bpqueue:from_list([{undefined, [a]}])), + + {4, [a,b,c,d]} = + bpqueue:foldl( + fun (Prefix, Value, {Prefix, Acc}) -> + {Prefix + 1, [Value | Acc]} + end, + {0, []}, bpqueue:from_list([{0,[d]}, {1,[c]}, {2,[b]}, {3,[a]}])), + + [{bar,3}, {foo,2}, {foo,1}] = + bpqueue:foldr(fun (P, V, I) -> [{P,V} | I] end, [], Q2), + + BPQL = [{foo,[1,2,2]}, {bar,[3,4,5]}, {foo,[5,6,7]}], + BPQ = bpqueue:from_list(BPQL), + + %% no effect + {BPQL, 0} = bpqueue_mffl([none], {none, []}, BPQ), + {BPQL, 0} = bpqueue_mffl([foo,bar], {none, [1]}, BPQ), + {BPQL, 0} = bpqueue_mffl([bar], {none, [3]}, BPQ), + {BPQL, 0} = bpqueue_mffr([bar], {foo, [5]}, BPQ), + + %% process 1 item + {[{foo,[-1,2,2]}, {bar,[3,4,5]}, {foo,[5,6,7]}], 1} = + bpqueue_mffl([foo,bar], {foo, [2]}, BPQ), + {[{foo,[1,2,2]}, {bar,[-3,4,5]}, {foo,[5,6,7]}], 1} = + bpqueue_mffl([bar], {bar, [4]}, BPQ), + {[{foo,[1,2,2]}, {bar,[3,4,5]}, {foo,[5,6,-7]}], 1} = + bpqueue_mffr([foo,bar], {foo, [6]}, BPQ), + {[{foo,[1,2,2]}, {bar,[3,4]}, {baz,[-5]}, {foo,[5,6,7]}], 1} = + bpqueue_mffr([bar], {baz, [4]}, BPQ), + + %% change prefix + {[{bar,[-1,-2,-2,-3,-4,-5,-5,-6,-7]}], 9} = + bpqueue_mffl([foo,bar], {bar, []}, BPQ), + {[{bar,[-1,-2,-2,3,4,5]}, {foo,[5,6,7]}], 3} = + bpqueue_mffl([foo], {bar, [5]}, BPQ), + {[{bar,[-1,-2,-2,3,4,5,-5,-6]}, {foo,[7]}], 5} = + bpqueue_mffl([foo], {bar, [7]}, BPQ), + {[{foo,[1,2,2,-3,-4]}, {bar,[5]}, {foo,[5,6,7]}], 2} = + bpqueue_mffl([bar], {foo, [5]}, BPQ), + {[{bar,[-1,-2,-2,3,4,5,-5,-6,-7]}], 6} = + bpqueue_mffl([foo], {bar, []}, BPQ), + {[{foo,[1,2,2,-3,-4,-5,5,6,7]}], 3} = + bpqueue_mffl([bar], {foo, []}, BPQ), + + %% edge cases + {[{foo,[-1,-2,-2]}, {bar,[3,4,5]}, {foo,[5,6,7]}], 3} = + bpqueue_mffl([foo], {foo, [5]}, BPQ), + {[{foo,[1,2,2]}, {bar,[3,4,5]}, {foo,[-5,-6,-7]}], 3} = + bpqueue_mffr([foo], {foo, [2]}, BPQ), + + passed. + +bpqueue_test(In, Out, List, Fold, MapFoldFilter) -> + Q = bpqueue:new(), + {empty, _Q} = Out(Q), + + ok = Fold(fun (Prefix, Value, ok) -> {error, Prefix, Value} end, ok, Q), + {Q1M, 0} = MapFoldFilter(fun(_P) -> throw(explosion) end, + fun(_V, _N) -> throw(explosion) end, 0, Q), + [] = bpqueue:to_list(Q1M), + + Q1 = In(bar, 3, In(foo, 2, In(foo, 1, Q))), + false = bpqueue:is_empty(Q1), + 3 = bpqueue:len(Q1), + [{foo, [1, 2]}, {bar, [3]}] = List(Q1), + + {{value, foo, 1}, Q3} = Out(Q1), + {{value, foo, 2}, Q4} = Out(Q3), + {{value, bar, 3}, _Q5} = Out(Q4), + + F = fun (QN) -> + MapFoldFilter(fun (foo) -> true; + (_) -> false + end, + fun (2, _Num) -> stop; + (V, Num) -> {bar, -V, V - Num} end, + 0, QN) + end, + {Q6, 0} = F(Q), + [] = bpqueue:to_list(Q6), + {Q7, 1} = F(Q1), + [{bar, [-1]}, {foo, [2]}, {bar, [3]}] = List(Q7), + + Q1. + +bpqueue_mffl(FF1A, FF2A, BPQ) -> + bpqueue_mff(fun bpqueue:map_fold_filter_l/4, FF1A, FF2A, BPQ). + +bpqueue_mffr(FF1A, FF2A, BPQ) -> + bpqueue_mff(fun bpqueue:map_fold_filter_r/4, FF1A, FF2A, BPQ). + +bpqueue_mff(Fold, FF1A, FF2A, BPQ) -> + FF1 = fun (Prefixes) -> + fun (P) -> lists:member(P, Prefixes) end + end, + FF2 = fun ({Prefix, Stoppers}) -> + fun (Val, Num) -> + case lists:member(Val, Stoppers) of + true -> stop; + false -> {Prefix, -Val, 1 + Num} + end + end + end, + Queue_to_list = fun ({LHS, RHS}) -> {bpqueue:to_list(LHS), RHS} end, + + Queue_to_list(Fold(FF1(FF1A), FF2(FF2A), 0, BPQ)). + test_simple_n_element_queue(N) -> Items = lists:seq(1, N), Q = priority_queue_in_all(priority_queue:new(), Items), @@ -355,42 +499,40 @@ test_field_values() -> passed. %% Test that content frames don't exceed frame-max -test_content_framing(FrameMax, Fragments) -> +test_content_framing(FrameMax, BodyBin) -> [Header | Frames] = rabbit_binary_generator:build_simple_content_frames( 1, - #content{class_id = 0, properties_bin = <<>>, - payload_fragments_rev = Fragments}, - FrameMax), - % header is formatted correctly and the size is the total of the - % fragments + rabbit_binary_generator:ensure_content_encoded( + rabbit_basic:build_content(#'P_basic'{}, BodyBin), + rabbit_framing_amqp_0_9_1), + FrameMax, + rabbit_framing_amqp_0_9_1), + %% header is formatted correctly and the size is the total of the + %% fragments <<_FrameHeader:7/binary, _ClassAndWeight:4/binary, - BodySize:64/unsigned, _Rest/binary>> = list_to_binary(Header), - BodySize = size(list_to_binary(Fragments)), - false = lists:any( - fun (ContentFrame) -> - FrameBinary = list_to_binary(ContentFrame), - % assert - <<_TypeAndChannel:3/binary, - Size:32/unsigned, - _Payload:Size/binary, - 16#CE>> = FrameBinary, - size(FrameBinary) > FrameMax - end, - Frames), + BodySize:64/unsigned, _Rest/binary>> = list_to_binary(Header), + BodySize = size(BodyBin), + true = lists:all( + fun (ContentFrame) -> + FrameBinary = list_to_binary(ContentFrame), + %% assert + <<_TypeAndChannel:3/binary, + Size:32/unsigned, _Payload:Size/binary, 16#CE>> = + FrameBinary, + size(FrameBinary) =< FrameMax + end, Frames), passed. test_content_framing() -> - % no content - passed = test_content_framing(4096, []), - passed = test_content_framing(4096, [<<>>]), - % easily fit in one frame - passed = test_content_framing(4096, [<<"Easy">>]), - % exactly one frame (empty frame = 8 bytes) - passed = test_content_framing(11, [<<"One">>]), - % more than one frame - passed = test_content_framing(20, [<<"into more than one frame">>, - <<"This will have to go">>]), + %% no content + passed = test_content_framing(4096, <<>>), + %% easily fit in one frame + passed = test_content_framing(4096, <<"Easy">>), + %% exactly one frame (empty frame = 8 bytes) + passed = test_content_framing(11, <<"One">>), + %% more than one frame + passed = test_content_framing(11, <<"More than one frame">>), passed. test_topic_match(P, R) -> @@ -601,19 +743,19 @@ test_cluster_management() -> ok = control_action(reset, []), lists:foreach(fun (Arg) -> - ok = control_action(cluster, Arg), + ok = control_action(force_cluster, Arg), ok end, ClusteringSequence), lists:foreach(fun (Arg) -> ok = control_action(reset, []), - ok = control_action(cluster, Arg), + ok = control_action(force_cluster, Arg), ok end, ClusteringSequence), ok = control_action(reset, []), lists:foreach(fun (Arg) -> - ok = control_action(cluster, Arg), + ok = control_action(force_cluster, Arg), ok = control_action(start_app, []), ok = control_action(stop_app, []), ok @@ -621,7 +763,7 @@ test_cluster_management() -> ClusteringSequence), lists:foreach(fun (Arg) -> ok = control_action(reset, []), - ok = control_action(cluster, Arg), + ok = control_action(force_cluster, Arg), ok = control_action(start_app, []), ok = control_action(stop_app, []), ok @@ -632,13 +774,13 @@ test_cluster_management() -> ok = control_action(reset, []), ok = control_action(start_app, []), ok = control_action(stop_app, []), - ok = control_action(cluster, ["invalid1@invalid", - "invalid2@invalid"]), + ok = control_action(force_cluster, ["invalid1@invalid", + "invalid2@invalid"]), %% join a non-existing cluster as a ram node ok = control_action(reset, []), - ok = control_action(cluster, ["invalid1@invalid", - "invalid2@invalid"]), + ok = control_action(force_cluster, ["invalid1@invalid", + "invalid2@invalid"]), SecondaryNode = rabbit_misc:makenode("hare"), case net_adm:ping(SecondaryNode) of @@ -663,18 +805,26 @@ test_cluster_management2(SecondaryNode) -> %% join cluster as a ram node ok = control_action(reset, []), - ok = control_action(cluster, [SecondaryNodeS, "invalid1@invalid"]), + ok = control_action(force_cluster, [SecondaryNodeS, "invalid1@invalid"]), ok = control_action(start_app, []), ok = control_action(stop_app, []), %% change cluster config while remaining in same cluster - ok = control_action(cluster, ["invalid2@invalid", SecondaryNodeS]), + ok = control_action(force_cluster, ["invalid2@invalid", SecondaryNodeS]), ok = control_action(start_app, []), ok = control_action(stop_app, []), %% join non-existing cluster as a ram node - ok = control_action(cluster, ["invalid1@invalid", - "invalid2@invalid"]), + ok = control_action(force_cluster, ["invalid1@invalid", + "invalid2@invalid"]), + ok = control_action(start_app, []), + ok = control_action(stop_app, []), + + %% join empty cluster as a ram node + ok = control_action(cluster, []), + ok = control_action(start_app, []), + ok = control_action(stop_app, []), + %% turn ram node into disk node ok = control_action(reset, []), ok = control_action(cluster, [SecondaryNodeS, NodeS]), @@ -682,8 +832,8 @@ test_cluster_management2(SecondaryNode) -> ok = control_action(stop_app, []), %% convert a disk node into a ram node - ok = control_action(cluster, ["invalid1@invalid", - "invalid2@invalid"]), + ok = control_action(force_cluster, ["invalid1@invalid", + "invalid2@invalid"]), %% turn a disk node into a ram node ok = control_action(reset, []), @@ -790,15 +940,17 @@ test_user_management() -> test_server_status() -> %% create a few things so there is some useful information to list Writer = spawn(fun () -> receive shutdown -> ok end end), - Ch = rabbit_channel:start_link(1, self(), Writer, <<"user">>, <<"/">>, - self()), - [Q, Q2] = [#amqqueue{} = rabbit_amqqueue:declare( + {ok, Ch} = rabbit_channel:start_link(1, self(), Writer, + <<"user">>, <<"/">>, self()), + [Q, Q2] = [Queue || Name <- [<<"foo">>, <<"bar">>], + {new, Queue = #amqqueue{}} <- + [rabbit_amqqueue:declare( rabbit_misc:r(<<"/">>, queue, Name), - false, false, [], none) || - Name <- [<<"foo">>, <<"bar">>]], + false, false, [], none)]], ok = rabbit_amqqueue:basic_consume(Q, true, Ch, undefined, <<"ctag">>, true, undefined), + %% list queues ok = info_action(list_queues, rabbit_amqqueue:info_keys(), true), @@ -918,8 +1070,8 @@ test_memory_pressure_sync(Ch, Writer) -> test_memory_pressure_spawn() -> Me = self(), Writer = spawn(fun () -> test_memory_pressure_receiver(Me) end), - Ch = rabbit_channel:start_link(1, self(), Writer, <<"user">>, <<"/">>, - self()), + {ok, Ch} = rabbit_channel:start_link(1, self(), Writer, + <<"user">>, <<"/">>, self()), ok = rabbit_channel:do(Ch, #'channel.open'{}), MRef = erlang:monitor(process, Ch), receive #'channel.open_ok'{} -> ok @@ -932,6 +1084,11 @@ expect_normal_channel_termination(MRef, Ch) -> after 1000 -> throw(channel_failed_to_exit) end. +gobble_channel_exit() -> + receive {channel_exit, _, _} -> ok + after 1000 -> throw(channel_exit_not_received) + end. + test_memory_pressure() -> {Writer0, Ch0, MRef0} = test_memory_pressure_spawn(), [ok = rabbit_channel:conserve_memory(Ch0, Conserve) || @@ -951,13 +1108,10 @@ test_memory_pressure() -> ok = test_memory_pressure_receive_flow(true), %% if we publish at this point, the channel should die - Content = #content{class_id = element(1, rabbit_framing:method_id( - 'basic.publish')), - properties = none, - properties_bin = <<>>, - payload_fragments_rev = []}, + Content = rabbit_basic:build_content(#'P_basic'{}, <<>>), ok = rabbit_channel:do(Ch0, #'basic.publish'{}, Content), expect_normal_channel_termination(MRef0, Ch0), + gobble_channel_exit(), {Writer1, Ch1, MRef1} = test_memory_pressure_spawn(), ok = rabbit_channel:conserve_memory(Ch1, true), @@ -969,25 +1123,29 @@ test_memory_pressure() -> %% send back the wrong flow_ok. Channel should die. ok = rabbit_channel:do(Ch1, #'channel.flow_ok'{active = false}), expect_normal_channel_termination(MRef1, Ch1), + gobble_channel_exit(), {_Writer2, Ch2, MRef2} = test_memory_pressure_spawn(), %% just out of the blue, send a flow_ok. Life should end. ok = rabbit_channel:do(Ch2, #'channel.flow_ok'{active = true}), expect_normal_channel_termination(MRef2, Ch2), + gobble_channel_exit(), {_Writer3, Ch3, MRef3} = test_memory_pressure_spawn(), ok = rabbit_channel:conserve_memory(Ch3, true), + ok = test_memory_pressure_receive_flow(false), receive {'DOWN', MRef3, process, Ch3, _} -> ok after 12000 -> throw(channel_failed_to_exit) end, + gobble_channel_exit(), alarm_handler:set_alarm({vm_memory_high_watermark, []}), Me = self(), Writer4 = spawn(fun () -> test_memory_pressure_receiver(Me) end), - Ch4 = rabbit_channel:start_link(1, self(), Writer4, <<"user">>, <<"/">>, - self()), + {ok, Ch4} = rabbit_channel:start_link(1, self(), Writer4, + <<"user">>, <<"/">>, self()), ok = rabbit_channel:do(Ch4, #'channel.open'{}), MRef4 = erlang:monitor(process, Ch4), Writer4 ! sync, @@ -1181,6 +1339,586 @@ handle_hook(HookName, Handler, Args) -> A = atom_to_list(HookName) ++ "_" ++ atom_to_list(Handler) ++ "_fired", put(list_to_atom(A), Args). bad_handle_hook(_, _, _) -> - bad:bad(). + exit(bad_handle_hook_called). extra_arg_hook(Hookname, Handler, Args, Extra1, Extra2) -> handle_hook(Hookname, Handler, {Args, Extra1, Extra2}). + +test_supervisor_delayed_restart() -> + test_sup:test_supervisor_delayed_restart(). + +test_backing_queue() -> + case application:get_env(rabbit, backing_queue_module) of + {ok, rabbit_variable_queue} -> + {ok, FileSizeLimit} = + application:get_env(rabbit, msg_store_file_size_limit), + application:set_env(rabbit, msg_store_file_size_limit, 512, + infinity), + {ok, MaxJournal} = + application:get_env(rabbit, queue_index_max_journal_entries), + application:set_env(rabbit, queue_index_max_journal_entries, 128, + infinity), + passed = test_msg_store(), + application:set_env(rabbit, msg_store_file_size_limit, + FileSizeLimit, infinity), + passed = test_queue_index(), + passed = test_variable_queue(), + passed = test_queue_recover(), + application:set_env(rabbit, queue_index_max_journal_entries, + MaxJournal, infinity), + passed; + _ -> + passed + end. + +restart_msg_store_empty() -> + ok = rabbit_variable_queue:stop_msg_store(), + ok = rabbit_variable_queue:start_msg_store( + undefined, {fun (ok) -> finished end, ok}). + +guid_bin(X) -> + erlang:md5(term_to_binary(X)). + +msg_store_contains(Atom, Guids) -> + Atom = lists:foldl( + fun (Guid, Atom1) when Atom1 =:= Atom -> + rabbit_msg_store:contains(?PERSISTENT_MSG_STORE, Guid) end, + Atom, Guids). + +msg_store_sync(Guids) -> + Ref = make_ref(), + Self = self(), + ok = rabbit_msg_store:sync(?PERSISTENT_MSG_STORE, Guids, + fun () -> Self ! {sync, Ref} end), + receive + {sync, Ref} -> ok + after + 10000 -> + io:format("Sync from msg_store missing for guids ~p~n", [Guids]), + throw(timeout) + end. + +msg_store_read(Guids, MSCState) -> + lists:foldl(fun (Guid, MSCStateM) -> + {{ok, Guid}, MSCStateN} = rabbit_msg_store:read( + ?PERSISTENT_MSG_STORE, + Guid, MSCStateM), + MSCStateN + end, MSCState, Guids). + +msg_store_write(Guids, MSCState) -> + lists:foldl(fun (Guid, {ok, MSCStateN}) -> + rabbit_msg_store:write(?PERSISTENT_MSG_STORE, + Guid, Guid, MSCStateN) + end, {ok, MSCState}, Guids). + +msg_store_remove(Guids) -> + rabbit_msg_store:remove(?PERSISTENT_MSG_STORE, Guids). + +foreach_with_msg_store_client(MsgStore, Ref, Fun, L) -> + rabbit_msg_store:client_terminate( + lists:foldl(fun (Guid, MSCState) -> Fun(Guid, MsgStore, MSCState) end, + rabbit_msg_store:client_init(MsgStore, Ref), L)). + +test_msg_store() -> + restart_msg_store_empty(), + Self = self(), + Guids = [guid_bin(M) || M <- lists:seq(1,100)], + {Guids1stHalf, Guids2ndHalf} = lists:split(50, Guids), + %% check we don't contain any of the msgs we're about to publish + false = msg_store_contains(false, Guids), + Ref = rabbit_guid:guid(), + MSCState = rabbit_msg_store:client_init(?PERSISTENT_MSG_STORE, Ref), + %% publish the first half + {ok, MSCState1} = msg_store_write(Guids1stHalf, MSCState), + %% sync on the first half + ok = msg_store_sync(Guids1stHalf), + %% publish the second half + {ok, MSCState2} = msg_store_write(Guids2ndHalf, MSCState1), + %% sync on the first half again - the msg_store will be dirty, but + %% we won't need the fsync + ok = msg_store_sync(Guids1stHalf), + %% check they're all in there + true = msg_store_contains(true, Guids), + %% publish the latter half twice so we hit the caching and ref count code + {ok, MSCState3} = msg_store_write(Guids2ndHalf, MSCState2), + %% check they're still all in there + true = msg_store_contains(true, Guids), + %% sync on the 2nd half, but do lots of individual syncs to try + %% and cause coalescing to happen + ok = lists:foldl( + fun (Guid, ok) -> rabbit_msg_store:sync( + ?PERSISTENT_MSG_STORE, + [Guid], fun () -> Self ! {sync, Guid} end) + end, ok, Guids2ndHalf), + lists:foldl( + fun(Guid, ok) -> + receive + {sync, Guid} -> ok + after + 10000 -> + io:format("Sync from msg_store missing (guid: ~p)~n", + [Guid]), + throw(timeout) + end + end, ok, Guids2ndHalf), + %% it's very likely we're not dirty here, so the 1st half sync + %% should hit a different code path + ok = msg_store_sync(Guids1stHalf), + %% read them all + MSCState4 = msg_store_read(Guids, MSCState3), + %% read them all again - this will hit the cache, not disk + MSCState5 = msg_store_read(Guids, MSCState4), + %% remove them all + ok = rabbit_msg_store:remove(?PERSISTENT_MSG_STORE, Guids), + %% check first half doesn't exist + false = msg_store_contains(false, Guids1stHalf), + %% check second half does exist + true = msg_store_contains(true, Guids2ndHalf), + %% read the second half again + MSCState6 = msg_store_read(Guids2ndHalf, MSCState5), + %% release the second half, just for fun (aka code coverage) + ok = rabbit_msg_store:release(?PERSISTENT_MSG_STORE, Guids2ndHalf), + %% read the second half again, just for fun (aka code coverage) + MSCState7 = msg_store_read(Guids2ndHalf, MSCState6), + ok = rabbit_msg_store:client_terminate(MSCState7), + %% stop and restart, preserving every other msg in 2nd half + ok = rabbit_variable_queue:stop_msg_store(), + ok = rabbit_variable_queue:start_msg_store( + [], {fun ([]) -> finished; + ([Guid|GuidsTail]) + when length(GuidsTail) rem 2 == 0 -> + {Guid, 1, GuidsTail}; + ([Guid|GuidsTail]) -> + {Guid, 0, GuidsTail} + end, Guids2ndHalf}), + %% check we have the right msgs left + lists:foldl( + fun (Guid, Bool) -> + not(Bool = rabbit_msg_store:contains(?PERSISTENT_MSG_STORE, Guid)) + end, false, Guids2ndHalf), + %% restart empty + restart_msg_store_empty(), + %% check we don't contain any of the msgs + false = msg_store_contains(false, Guids), + %% publish the first half again + MSCState8 = rabbit_msg_store:client_init(?PERSISTENT_MSG_STORE, Ref), + {ok, MSCState9} = msg_store_write(Guids1stHalf, MSCState8), + %% this should force some sort of sync internally otherwise misread + ok = rabbit_msg_store:client_terminate( + msg_store_read(Guids1stHalf, MSCState9)), + ok = rabbit_msg_store:remove(?PERSISTENT_MSG_STORE, Guids1stHalf), + %% restart empty + restart_msg_store_empty(), %% now safe to reuse guids + %% push a lot of msgs in... at least 100 files worth + {ok, FileSize} = application:get_env(rabbit, msg_store_file_size_limit), + PayloadSizeBits = 65536, + BigCount = trunc(100 * FileSize / (PayloadSizeBits div 8)), + GuidsBig = [guid_bin(X) || X <- lists:seq(1, BigCount)], + Payload = << 0:PayloadSizeBits >>, + ok = foreach_with_msg_store_client( + ?PERSISTENT_MSG_STORE, Ref, + fun (Guid, MsgStore, MSCStateM) -> + {ok, MSCStateN} = rabbit_msg_store:write( + MsgStore, Guid, Payload, MSCStateM), + MSCStateN + end, GuidsBig), + %% now read them to ensure we hit the fast client-side reading + ok = foreach_with_msg_store_client( + ?PERSISTENT_MSG_STORE, Ref, + fun (Guid, MsgStore, MSCStateM) -> + {{ok, Payload}, MSCStateN} = rabbit_msg_store:read( + MsgStore, Guid, MSCStateM), + MSCStateN + end, GuidsBig), + %% .., then 3s by 1... + ok = msg_store_remove([guid_bin(X) || X <- lists:seq(BigCount, 1, -3)]), + %% .., then remove 3s by 2, from the young end first. This hits + %% GC (under 50% good data left, but no empty files. Must GC). + ok = msg_store_remove([guid_bin(X) || X <- lists:seq(BigCount-1, 1, -3)]), + %% .., then remove 3s by 3, from the young end first. This hits + %% GC... + ok = msg_store_remove([guid_bin(X) || X <- lists:seq(BigCount-2, 1, -3)]), + %% ensure empty + false = msg_store_contains(false, GuidsBig), + %% restart empty + restart_msg_store_empty(), + passed. + +queue_name(Name) -> + rabbit_misc:r(<<"/">>, queue, Name). + +test_queue() -> + queue_name(<<"test">>). + +init_test_queue() -> + rabbit_queue_index:init( + test_queue(), true, false, + fun (Guid) -> + rabbit_msg_store:contains(?PERSISTENT_MSG_STORE, Guid) + end). + +restart_test_queue(Qi) -> + _ = rabbit_queue_index:terminate([], Qi), + ok = rabbit_variable_queue:stop(), + ok = rabbit_variable_queue:start([test_queue()]), + init_test_queue(). + +empty_test_queue() -> + ok = rabbit_variable_queue:stop(), + ok = rabbit_variable_queue:start([]), + {0, _Terms, Qi} = init_test_queue(), + _ = rabbit_queue_index:delete_and_terminate(Qi), + ok. + +with_empty_test_queue(Fun) -> + ok = empty_test_queue(), + {0, _Terms, Qi} = init_test_queue(), + rabbit_queue_index:delete_and_terminate(Fun(Qi)). + +queue_index_publish(SeqIds, Persistent, Qi) -> + Ref = rabbit_guid:guid(), + MsgStore = case Persistent of + true -> ?PERSISTENT_MSG_STORE; + false -> ?TRANSIENT_MSG_STORE + end, + {A, B, MSCStateEnd} = + lists:foldl( + fun (SeqId, {QiN, SeqIdsGuidsAcc, MSCStateN}) -> + Guid = rabbit_guid:guid(), + QiM = rabbit_queue_index:publish( + Guid, SeqId, Persistent, QiN), + {ok, MSCStateM} = rabbit_msg_store:write(MsgStore, Guid, + Guid, MSCStateN), + {QiM, [{SeqId, Guid} | SeqIdsGuidsAcc], MSCStateM} + end, {Qi, [], rabbit_msg_store:client_init(MsgStore, Ref)}, SeqIds), + ok = rabbit_msg_store:client_delete_and_terminate( + MSCStateEnd, MsgStore, Ref), + {A, B}. + +verify_read_with_published(_Delivered, _Persistent, [], _) -> + ok; +verify_read_with_published(Delivered, Persistent, + [{Guid, SeqId, Persistent, Delivered}|Read], + [{SeqId, Guid}|Published]) -> + verify_read_with_published(Delivered, Persistent, Read, Published); +verify_read_with_published(_Delivered, _Persistent, _Read, _Published) -> + ko. + +test_queue_index() -> + SegmentSize = rabbit_queue_index:next_segment_boundary(0), + TwoSegs = SegmentSize + SegmentSize, + MostOfASegment = trunc(SegmentSize*0.75), + SeqIdsA = lists:seq(0, MostOfASegment-1), + SeqIdsB = lists:seq(MostOfASegment, 2*MostOfASegment), + SeqIdsC = lists:seq(0, trunc(SegmentSize/2)), + SeqIdsD = lists:seq(0, SegmentSize*4), + + with_empty_test_queue( + fun (Qi0) -> + {0, 0, Qi1} = rabbit_queue_index:bounds(Qi0), + {Qi2, SeqIdsGuidsA} = queue_index_publish(SeqIdsA, false, Qi1), + {0, SegmentSize, Qi3} = rabbit_queue_index:bounds(Qi2), + {ReadA, Qi4} = rabbit_queue_index:read(0, SegmentSize, Qi3), + ok = verify_read_with_published(false, false, ReadA, + lists:reverse(SeqIdsGuidsA)), + %% should get length back as 0, as all the msgs were transient + {0, _Terms1, Qi6} = restart_test_queue(Qi4), + {0, 0, Qi7} = rabbit_queue_index:bounds(Qi6), + {Qi8, SeqIdsGuidsB} = queue_index_publish(SeqIdsB, true, Qi7), + {0, TwoSegs, Qi9} = rabbit_queue_index:bounds(Qi8), + {ReadB, Qi10} = rabbit_queue_index:read(0, SegmentSize, Qi9), + ok = verify_read_with_published(false, true, ReadB, + lists:reverse(SeqIdsGuidsB)), + %% should get length back as MostOfASegment + LenB = length(SeqIdsB), + {LenB, _Terms2, Qi12} = restart_test_queue(Qi10), + {0, TwoSegs, Qi13} = rabbit_queue_index:bounds(Qi12), + Qi14 = rabbit_queue_index:deliver(SeqIdsB, Qi13), + {ReadC, Qi15} = rabbit_queue_index:read(0, SegmentSize, Qi14), + ok = verify_read_with_published(true, true, ReadC, + lists:reverse(SeqIdsGuidsB)), + Qi16 = rabbit_queue_index:ack(SeqIdsB, Qi15), + Qi17 = rabbit_queue_index:flush(Qi16), + %% Everything will have gone now because #pubs == #acks + {0, 0, Qi18} = rabbit_queue_index:bounds(Qi17), + %% should get length back as 0 because all persistent + %% msgs have been acked + {0, _Terms3, Qi19} = restart_test_queue(Qi18), + Qi19 + end), + + %% These next bits are just to hit the auto deletion of segment files. + %% First, partials: + %% a) partial pub+del+ack, then move to new segment + with_empty_test_queue( + fun (Qi0) -> + {Qi1, _SeqIdsGuidsC} = queue_index_publish(SeqIdsC, + false, Qi0), + Qi2 = rabbit_queue_index:deliver(SeqIdsC, Qi1), + Qi3 = rabbit_queue_index:ack(SeqIdsC, Qi2), + Qi4 = rabbit_queue_index:flush(Qi3), + {Qi5, _SeqIdsGuidsC1} = queue_index_publish([SegmentSize], + false, Qi4), + Qi5 + end), + + %% b) partial pub+del, then move to new segment, then ack all in old segment + with_empty_test_queue( + fun (Qi0) -> + {Qi1, _SeqIdsGuidsC2} = queue_index_publish(SeqIdsC, + false, Qi0), + Qi2 = rabbit_queue_index:deliver(SeqIdsC, Qi1), + {Qi3, _SeqIdsGuidsC3} = queue_index_publish([SegmentSize], + false, Qi2), + Qi4 = rabbit_queue_index:ack(SeqIdsC, Qi3), + rabbit_queue_index:flush(Qi4) + end), + + %% c) just fill up several segments of all pubs, then +dels, then +acks + with_empty_test_queue( + fun (Qi0) -> + {Qi1, _SeqIdsGuidsD} = queue_index_publish(SeqIdsD, + false, Qi0), + Qi2 = rabbit_queue_index:deliver(SeqIdsD, Qi1), + Qi3 = rabbit_queue_index:ack(SeqIdsD, Qi2), + rabbit_queue_index:flush(Qi3) + end), + + %% d) get messages in all states to a segment, then flush, then do + %% the same again, don't flush and read. This will hit all + %% possibilities in combining the segment with the journal. + with_empty_test_queue( + fun (Qi0) -> + {Qi1, [Seven,Five,Four|_]} = queue_index_publish([0,1,2,4,5,7], + false, Qi0), + Qi2 = rabbit_queue_index:deliver([0,1,4], Qi1), + Qi3 = rabbit_queue_index:ack([0], Qi2), + Qi4 = rabbit_queue_index:flush(Qi3), + {Qi5, [Eight,Six|_]} = queue_index_publish([3,6,8], false, Qi4), + Qi6 = rabbit_queue_index:deliver([2,3,5,6], Qi5), + Qi7 = rabbit_queue_index:ack([1,2,3], Qi6), + {[], Qi8} = rabbit_queue_index:read(0, 4, Qi7), + {ReadD, Qi9} = rabbit_queue_index:read(4, 7, Qi8), + ok = verify_read_with_published(true, false, ReadD, + [Four, Five, Six]), + {ReadE, Qi10} = rabbit_queue_index:read(7, 9, Qi9), + ok = verify_read_with_published(false, false, ReadE, + [Seven, Eight]), + Qi10 + end), + + %% e) as for (d), but use terminate instead of read, which will + %% exercise journal_minus_segment, not segment_plus_journal. + with_empty_test_queue( + fun (Qi0) -> + {Qi1, _SeqIdsGuidsE} = queue_index_publish([0,1,2,4,5,7], + true, Qi0), + Qi2 = rabbit_queue_index:deliver([0,1,4], Qi1), + Qi3 = rabbit_queue_index:ack([0], Qi2), + {5, _Terms9, Qi4} = restart_test_queue(Qi3), + {Qi5, _SeqIdsGuidsF} = queue_index_publish([3,6,8], true, Qi4), + Qi6 = rabbit_queue_index:deliver([2,3,5,6], Qi5), + Qi7 = rabbit_queue_index:ack([1,2,3], Qi6), + {5, _Terms10, Qi8} = restart_test_queue(Qi7), + Qi8 + end), + + ok = rabbit_variable_queue:stop(), + ok = rabbit_variable_queue:start([]), + + passed. + +variable_queue_publish(IsPersistent, Count, VQ) -> + lists:foldl( + fun (_N, VQN) -> + rabbit_variable_queue:publish( + rabbit_basic:message( + rabbit_misc:r(<<>>, exchange, <<>>), + <<>>, #'P_basic'{delivery_mode = case IsPersistent of + true -> 2; + false -> 1 + end}, <<>>), VQN) + end, VQ, lists:seq(1, Count)). + +variable_queue_fetch(Count, IsPersistent, IsDelivered, Len, VQ) -> + lists:foldl(fun (N, {VQN, AckTagsAcc}) -> + Rem = Len - N, + {{#basic_message { is_persistent = IsPersistent }, + IsDelivered, AckTagN, Rem}, VQM} = + rabbit_variable_queue:fetch(true, VQN), + {VQM, [AckTagN | AckTagsAcc]} + end, {VQ, []}, lists:seq(1, Count)). + +assert_prop(List, Prop, Value) -> + Value = proplists:get_value(Prop, List). + +assert_props(List, PropVals) -> + [assert_prop(List, Prop, Value) || {Prop, Value} <- PropVals]. + +with_fresh_variable_queue(Fun) -> + ok = empty_test_queue(), + VQ = rabbit_variable_queue:init(test_queue(), true, false), + S0 = rabbit_variable_queue:status(VQ), + assert_props(S0, [{q1, 0}, {q2, 0}, + {delta, {delta, undefined, 0, undefined}}, + {q3, 0}, {q4, 0}, + {len, 0}]), + _ = rabbit_variable_queue:delete_and_terminate(Fun(VQ)), + passed. + +test_variable_queue() -> + [passed = with_fresh_variable_queue(F) || + F <- [fun test_variable_queue_dynamic_duration_change/1, + fun test_variable_queue_partial_segments_delta_thing/1, + fun test_variable_queue_all_the_bits_not_covered_elsewhere1/1, + fun test_variable_queue_all_the_bits_not_covered_elsewhere2/1]], + passed. + +test_variable_queue_dynamic_duration_change(VQ0) -> + SegmentSize = rabbit_queue_index:next_segment_boundary(0), + + %% start by sending in a couple of segments worth + Len = 2*SegmentSize, + VQ1 = variable_queue_publish(false, Len, VQ0), + + %% squeeze and relax queue + Churn = Len div 32, + VQ2 = publish_fetch_and_ack(Churn, Len, VQ1), + {Duration, VQ3} = rabbit_variable_queue:ram_duration(VQ2), + VQ7 = lists:foldl( + fun (Duration1, VQ4) -> + {_Duration, VQ5} = rabbit_variable_queue:ram_duration(VQ4), + io:format("~p:~n~p~n", + [Duration1, rabbit_variable_queue:status(VQ5)]), + VQ6 = rabbit_variable_queue:set_ram_duration_target( + Duration1, VQ5), + publish_fetch_and_ack(Churn, Len, VQ6) + end, VQ3, [Duration / 4, 0, Duration / 4, infinity]), + + %% drain + {VQ8, AckTags} = variable_queue_fetch(Len, false, false, Len, VQ7), + VQ9 = rabbit_variable_queue:ack(AckTags, VQ8), + {empty, VQ10} = rabbit_variable_queue:fetch(true, VQ9), + + VQ10. + +publish_fetch_and_ack(0, _Len, VQ0) -> + VQ0; +publish_fetch_and_ack(N, Len, VQ0) -> + VQ1 = variable_queue_publish(false, 1, VQ0), + {{_Msg, false, AckTag, Len}, VQ2} = rabbit_variable_queue:fetch(true, VQ1), + publish_fetch_and_ack(N-1, Len, rabbit_variable_queue:ack([AckTag], VQ2)). + +test_variable_queue_partial_segments_delta_thing(VQ0) -> + SegmentSize = rabbit_queue_index:next_segment_boundary(0), + HalfSegment = SegmentSize div 2, + OneAndAHalfSegment = SegmentSize + HalfSegment, + VQ1 = variable_queue_publish(true, OneAndAHalfSegment, VQ0), + {_Duration, VQ2} = rabbit_variable_queue:ram_duration(VQ1), + VQ3 = check_variable_queue_status( + rabbit_variable_queue:set_ram_duration_target(0, VQ2), + %% one segment in q3 as betas, and half a segment in delta + [{delta, {delta, SegmentSize, HalfSegment, OneAndAHalfSegment}}, + {q3, SegmentSize}, + {len, SegmentSize + HalfSegment}]), + VQ4 = rabbit_variable_queue:set_ram_duration_target(infinity, VQ3), + VQ5 = check_variable_queue_status( + variable_queue_publish(true, 1, VQ4), + %% one alpha, but it's in the same segment as the deltas + [{q1, 1}, + {delta, {delta, SegmentSize, HalfSegment, OneAndAHalfSegment}}, + {q3, SegmentSize}, + {len, SegmentSize + HalfSegment + 1}]), + {VQ6, AckTags} = variable_queue_fetch(SegmentSize, true, false, + SegmentSize + HalfSegment + 1, VQ5), + VQ7 = check_variable_queue_status( + VQ6, + %% the half segment should now be in q3 as betas + [{q1, 1}, + {delta, {delta, undefined, 0, undefined}}, + {q3, HalfSegment}, + {len, HalfSegment + 1}]), + {VQ8, AckTags1} = variable_queue_fetch(HalfSegment + 1, true, false, + HalfSegment + 1, VQ7), + VQ9 = rabbit_variable_queue:ack(AckTags ++ AckTags1, VQ8), + %% should be empty now + {empty, VQ10} = rabbit_variable_queue:fetch(true, VQ9), + VQ10. + +check_variable_queue_status(VQ0, Props) -> + VQ1 = variable_queue_wait_for_shuffling_end(VQ0), + S = rabbit_variable_queue:status(VQ1), + io:format("~p~n", [S]), + assert_props(S, Props), + VQ1. + +variable_queue_wait_for_shuffling_end(VQ) -> + case rabbit_variable_queue:needs_idle_timeout(VQ) of + true -> variable_queue_wait_for_shuffling_end( + rabbit_variable_queue:idle_timeout(VQ)); + false -> VQ + end. + +test_variable_queue_all_the_bits_not_covered_elsewhere1(VQ0) -> + Count = 2 * rabbit_queue_index:next_segment_boundary(0), + VQ1 = variable_queue_publish(true, Count, VQ0), + VQ2 = variable_queue_publish(false, Count, VQ1), + VQ3 = rabbit_variable_queue:set_ram_duration_target(0, VQ2), + {VQ4, _AckTags} = variable_queue_fetch(Count, true, false, + Count + Count, VQ3), + {VQ5, _AckTags1} = variable_queue_fetch(Count, false, false, + Count, VQ4), + _VQ6 = rabbit_variable_queue:terminate(VQ5), + VQ7 = rabbit_variable_queue:init(test_queue(), true, true), + {{_Msg1, true, _AckTag1, Count1}, VQ8} = + rabbit_variable_queue:fetch(true, VQ7), + VQ9 = variable_queue_publish(false, 1, VQ8), + VQ10 = rabbit_variable_queue:set_ram_duration_target(0, VQ9), + {VQ11, _AckTags2} = variable_queue_fetch(Count1, true, true, Count, VQ10), + {VQ12, _AckTags3} = variable_queue_fetch(1, false, false, 1, VQ11), + VQ12. + +test_variable_queue_all_the_bits_not_covered_elsewhere2(VQ0) -> + VQ1 = rabbit_variable_queue:set_ram_duration_target(0, VQ0), + VQ2 = variable_queue_publish(false, 4, VQ1), + {VQ3, AckTags} = variable_queue_fetch(2, false, false, 4, VQ2), + VQ4 = rabbit_variable_queue:requeue(AckTags, VQ3), + VQ5 = rabbit_variable_queue:idle_timeout(VQ4), + _VQ6 = rabbit_variable_queue:terminate(VQ5), + VQ7 = rabbit_variable_queue:init(test_queue(), true, true), + {empty, VQ8} = rabbit_variable_queue:fetch(false, VQ7), + VQ8. + +test_queue_recover() -> + Count = 2 * rabbit_queue_index:next_segment_boundary(0), + TxID = rabbit_guid:guid(), + {new, #amqqueue { pid = QPid, name = QName }} = + rabbit_amqqueue:declare(test_queue(), true, false, [], none), + Msg = rabbit_basic:message(rabbit_misc:r(<<>>, exchange, <<>>), + <<>>, #'P_basic'{delivery_mode = 2}, <<>>), + Delivery = #delivery{mandatory = false, immediate = false, txn = TxID, + sender = self(), message = Msg}, + [true = rabbit_amqqueue:deliver(QPid, Delivery) || + _ <- lists:seq(1, Count)], + rabbit_amqqueue:commit_all([QPid], TxID, self()), + exit(QPid, kill), + MRef = erlang:monitor(process, QPid), + receive {'DOWN', MRef, process, QPid, _Info} -> ok + after 10000 -> exit(timeout_waiting_for_queue_death) + end, + rabbit_amqqueue:stop(), + ok = rabbit_amqqueue:start(), + rabbit_amqqueue:with_or_die( + QName, + fun (Q1 = #amqqueue { pid = QPid1 }) -> + CountMinusOne = Count - 1, + {ok, CountMinusOne, {QName, QPid1, _AckTag, true, _Msg}} = + rabbit_amqqueue:basic_get(Q1, self(), false), + exit(QPid1, shutdown), + VQ1 = rabbit_variable_queue:init(QName, true, true), + {{_Msg1, true, _AckTag1, CountMinusOne}, VQ2} = + rabbit_variable_queue:fetch(true, VQ1), + _VQ3 = rabbit_variable_queue:delete_and_terminate(VQ2), + rabbit_amqqueue:internal_delete(QName) + end), + passed. diff --git a/src/rabbit_types.erl b/src/rabbit_types.erl new file mode 100644 index 00000000..3aaf1917 --- /dev/null +++ b/src/rabbit_types.erl @@ -0,0 +1,147 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developers of the Original Code are LShift Ltd, +%% Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd, +%% Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd +%% are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial +%% Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift +%% Ltd. Portions created by Cohesive Financial Technologies LLC are +%% Copyright (C) 2007-2010 Cohesive Financial Technologies +%% LLC. Portions created by Rabbit Technologies Ltd are Copyright +%% (C) 2007-2010 Rabbit Technologies Ltd. +%% +%% All Rights Reserved. +%% +%% Contributor(s): ______________________________________. +%% + +-module(rabbit_types). + +-include("rabbit.hrl"). + +-ifdef(use_specs). + +-export_type([txn/0, maybe/1, info/0, info_key/0, message/0, basic_message/0, + delivery/0, content/0, decoded_content/0, undecoded_content/0, + unencoded_content/0, encoded_content/0, vhost/0, ctag/0, + amqp_error/0, r/1, r2/2, r3/3, ssl_socket/0, listener/0, + binding/0, amqqueue/0, exchange/0, connection/0, protocol/0, + user/0, error/1, ok_or_error/1, ok_or_error2/2, ok/1]). + +-type(maybe(T) :: T | 'none'). +-type(vhost() :: binary()). +-type(ctag() :: binary()). + +%% TODO: make this more precise by tying specific class_ids to +%% specific properties +-type(undecoded_content() :: + #content{class_id :: rabbit_framing:amqp_class_id(), + properties :: 'none', + properties_bin :: binary(), + payload_fragments_rev :: [binary()]} | + #content{class_id :: rabbit_framing:amqp_class_id(), + properties :: rabbit_framing:amqp_property_record(), + properties_bin :: 'none', + payload_fragments_rev :: [binary()]}). +-type(unencoded_content() :: undecoded_content()). +-type(decoded_content() :: + #content{class_id :: rabbit_framing:amqp_class_id(), + properties :: rabbit_framing:amqp_property_record(), + properties_bin :: maybe(binary()), + payload_fragments_rev :: [binary()]}). +-type(encoded_content() :: + #content{class_id :: rabbit_framing:amqp_class_id(), + properties :: maybe(rabbit_framing:amqp_property_record()), + properties_bin :: binary(), + payload_fragments_rev :: [binary()]}). +-type(content() :: undecoded_content() | decoded_content()). +-type(basic_message() :: + #basic_message{exchange_name :: rabbit_exchange:name(), + routing_key :: rabbit_router:routing_key(), + content :: content(), + guid :: rabbit_guid:guid(), + is_persistent :: boolean()}). +-type(message() :: basic_message()). +-type(delivery() :: + #delivery{mandatory :: boolean(), + immediate :: boolean(), + txn :: maybe(txn()), + sender :: pid(), + message :: message()}). + +%% this is really an abstract type, but dialyzer does not support them +-type(txn() :: rabbit_guid:guid()). + +-type(info_key() :: atom()). +-type(info() :: {info_key(), any()}). + +-type(amqp_error() :: + #amqp_error{name :: rabbit_framing:amqp_exception(), + explanation :: string(), + method :: rabbit_framing:amqp_method_name()}). + +-type(r(Kind) :: + r2(vhost(), Kind)). +-type(r2(VirtualHost, Kind) :: + r3(VirtualHost, Kind, rabbit_misc:resource_name())). +-type(r3(VirtualHost, Kind, Name) :: + #resource{virtual_host :: VirtualHost, + kind :: Kind, + name :: Name}). + +-type(ssl_socket() :: #ssl_socket{}). + +-type(listener() :: + #listener{node :: node(), + protocol :: atom(), + host :: rabbit_networking:hostname(), + port :: rabbit_networking:ip_port()}). + +-type(binding() :: + #binding{exchange_name :: rabbit_exchange:name(), + queue_name :: rabbit_amqqueue:name(), + key :: rabbit_exchange:binding_key()}). + +-type(amqqueue() :: + #amqqueue{name :: rabbit_amqqueue:name(), + durable :: boolean(), + auto_delete :: boolean(), + exclusive_owner :: rabbit_types:maybe(pid()), + arguments :: rabbit_framing:amqp_table(), + pid :: rabbit_types:maybe(pid())}). + +-type(exchange() :: + #exchange{name :: rabbit_exchange:name(), + type :: rabbit_exchange:type(), + durable :: boolean(), + auto_delete :: boolean(), + arguments :: rabbit_framing:amqp_table()}). + +-type(connection() :: pid()). + +-type(protocol() :: atom()). + +-type(user() :: + #user{username :: rabbit_access_control:username(), + password :: rabbit_access_control:password()}). + +-type(ok(A) :: {'ok', A}). +-type(error(A) :: {'error', A}). +-type(ok_or_error(A) :: 'ok' | error(A)). +-type(ok_or_error2(A, B) :: ok(A) | error(B)). + +-endif. % use_specs diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl new file mode 100644 index 00000000..0f52eee8 --- /dev/null +++ b/src/rabbit_variable_queue.erl @@ -0,0 +1,1433 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developers of the Original Code are LShift Ltd, +%% Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd, +%% Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd +%% are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial +%% Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift +%% Ltd. Portions created by Cohesive Financial Technologies LLC are +%% Copyright (C) 2007-2010 Cohesive Financial Technologies +%% LLC. Portions created by Rabbit Technologies Ltd are Copyright +%% (C) 2007-2010 Rabbit Technologies Ltd. +%% +%% All Rights Reserved. +%% +%% Contributor(s): ______________________________________. +%% + +-module(rabbit_variable_queue). + +-export([init/3, terminate/1, delete_and_terminate/1, + purge/1, publish/2, publish_delivered/3, fetch/2, ack/2, + tx_publish/3, tx_ack/3, tx_rollback/2, tx_commit/3, + requeue/2, len/1, is_empty/1, + set_ram_duration_target/2, ram_duration/1, + needs_idle_timeout/1, idle_timeout/1, handle_pre_hibernate/1, + status/1]). + +-export([start/1, stop/0]). + +%% exported for testing only +-export([start_msg_store/2, stop_msg_store/0]). + +%%---------------------------------------------------------------------------- +%% Definitions: + +%% alpha: this is a message where both the message itself, and its +%% position within the queue are held in RAM +%% +%% beta: this is a message where the message itself is only held on +%% disk, but its position within the queue is held in RAM. +%% +%% gamma: this is a message where the message itself is only held on +%% disk, but its position is both in RAM and on disk. +%% +%% delta: this is a collection of messages, represented by a single +%% term, where the messages and their position are only held on +%% disk. +%% +%% Note that for persistent messages, the message and its position +%% within the queue are always held on disk, *in addition* to being in +%% one of the above classifications. +%% +%% Also note that within this code, the term gamma never +%% appears. Instead, gammas are defined by betas who have had their +%% queue position recorded on disk. +%% +%% In general, messages move q1 -> q2 -> delta -> q3 -> q4, though +%% many of these steps are frequently skipped. q1 and q4 only hold +%% alphas, q2 and q3 hold both betas and gammas (as queues of queues, +%% using the bpqueue module where the block prefix determines whether +%% they're betas or gammas). When a message arrives, its +%% classification is determined. It is then added to the rightmost +%% appropriate queue. +%% +%% If a new message is determined to be a beta or gamma, q1 is +%% empty. If a new message is determined to be a delta, q1 and q2 are +%% empty (and actually q4 too). +%% +%% When removing messages from a queue, if q4 is empty then q3 is read +%% directly. If q3 becomes empty then the next segment's worth of +%% messages from delta are read into q3, reducing the size of +%% delta. If the queue is non empty, either q4 or q3 contain +%% entries. It is never permitted for delta to hold all the messages +%% in the queue. +%% +%% The duration indicated to us by the memory_monitor is used to +%% calculate, given our current ingress and egress rates, how many +%% messages we should hold in RAM. When we need to push alphas to +%% betas or betas to gammas, we favour writing out messages that are +%% further from the head of the queue. This minimises writes to disk, +%% as the messages closer to the tail of the queue stay in the queue +%% for longer, thus do not need to be replaced as quickly by sending +%% other messages to disk. +%% +%% Whilst messages are pushed to disk and forgotten from RAM as soon +%% as requested by a new setting of the queue RAM duration, the +%% inverse is not true: we only load messages back into RAM as +%% demanded as the queue is read from. Thus only publishes to the +%% queue will take up available spare capacity. +%% +%% When we report our duration to the memory monitor, we calculate +%% average ingress and egress rates over the last two samples, and +%% then calculate our duration based on the sum of the ingress and +%% egress rates. More than two samples could be used, but it's a +%% balance between responding quickly enough to changes in +%% producers/consumers versus ignoring temporary blips. The problem +%% with temporary blips is that with just a few queues, they can have +%% substantial impact on the calculation of the average duration and +%% hence cause unnecessary I/O. Another alternative is to increase the +%% amqqueue_process:RAM_DURATION_UPDATE_PERIOD to beyond 5 +%% seconds. However, that then runs the risk of being too slow to +%% inform the memory monitor of changes. Thus a 5 second interval, +%% plus a rolling average over the last two samples seems to work +%% well in practice. +%% +%% The sum of the ingress and egress rates is used because the egress +%% rate alone is not sufficient. Adding in the ingress rate means that +%% queues which are being flooded by messages are given more memory, +%% resulting in them being able to process the messages faster (by +%% doing less I/O, or at least deferring it) and thus helping keep +%% their mailboxes empty and thus the queue as a whole is more +%% responsive. If such a queue also has fast but previously idle +%% consumers, the consumer can then start to be driven as fast as it +%% can go, whereas if only egress rate was being used, the incoming +%% messages may have to be written to disk and then read back in, +%% resulting in the hard disk being a bottleneck in driving the +%% consumers. Generally, we want to give Rabbit every chance of +%% getting rid of messages as fast as possible and remaining +%% responsive, and using only the egress rate impacts that goal. +%% +%% If a queue is full of transient messages, then the transition from +%% betas to deltas will be potentially very expensive as millions of +%% entries must be written to disk by the queue_index module. This can +%% badly stall the queue. In order to avoid this, the proportion of +%% gammas / (betas+gammas) must not be lower than (betas+gammas) / +%% (alphas+betas+gammas). As the queue grows or available memory +%% shrinks, the latter ratio increases, requiring the conversion of +%% more gammas to betas in order to maintain the invariant. At the +%% point at which betas and gammas must be converted to deltas, there +%% should be very few betas remaining, thus the transition is fast (no +%% work needs to be done for the gamma -> delta transition). +%% +%% The conversion of betas to gammas is done in batches of exactly +%% ?IO_BATCH_SIZE. This value should not be too small, otherwise the +%% frequent operations on the queues of q2 and q3 will not be +%% effectively amortised (switching the direction of queue access +%% defeats amortisation), nor should it be too big, otherwise +%% converting a batch stalls the queue for too long. Therefore, it +%% must be just right. ram_index_count is used here and is the number +%% of betas. +%% +%% The conversion from alphas to betas is also chunked, but only to +%% ensure no more than ?IO_BATCH_SIZE alphas are converted to betas at +%% any one time. This further smooths the effects of changes to the +%% target_ram_msg_count and ensures the queue remains responsive +%% even when there is a large amount of IO work to do. The +%% idle_timeout callback is utilised to ensure that conversions are +%% done as promptly as possible whilst ensuring the queue remains +%% responsive. +%% +%% In the queue we keep track of both messages that are pending +%% delivery and messages that are pending acks. This ensures that +%% purging (deleting the former) and deletion (deleting the former and +%% the latter) are both cheap and do require any scanning through qi +%% segments. +%% +%% Notes on Clean Shutdown +%% (This documents behaviour in variable_queue, queue_index and +%% msg_store.) +%% +%% In order to try to achieve as fast a start-up as possible, if a +%% clean shutdown occurs, we try to save out state to disk to reduce +%% work on startup. In the msg_store this takes the form of the +%% index_module's state, plus the file_summary ets table, and client +%% refs. In the VQ, this takes the form of the count of persistent +%% messages in the queue and references into the msg_stores. The +%% queue_index adds to these terms the details of its segments and +%% stores the terms in the queue directory. +%% +%% Two message stores are used. One is created for persistent messages +%% to durable queues that must survive restarts, and the other is used +%% for all other messages that just happen to need to be written to +%% disk. On start up we can therefore nuke the transient message +%% store, and be sure that the messages in the persistent store are +%% all that we need. +%% +%% The references to the msg_stores are there so that the msg_store +%% knows to only trust its saved state if all of the queues it was +%% previously talking to come up cleanly. Likewise, the queues +%% themselves (esp queue_index) skips work in init if all the queues +%% and msg_store were shutdown cleanly. This gives both good speed +%% improvements and also robustness so that if anything possibly went +%% wrong in shutdown (or there was subsequent manual tampering), all +%% messages and queues that can be recovered are recovered, safely. +%% +%% To delete transient messages lazily, the variable_queue, on +%% startup, stores the next_seq_id reported by the queue_index as the +%% transient_threshold. From that point on, whenever it's reading a +%% message off disk via the queue_index, if the seq_id is below this +%% threshold and the message is transient then it drops the message +%% (the message itself won't exist on disk because it would have been +%% stored in the transient msg_store which would have had its saved +%% state nuked on startup). This avoids the expensive operation of +%% scanning the entire queue on startup in order to delete transient +%% messages that were only pushed to disk to save memory. +%% +%%---------------------------------------------------------------------------- + +-behaviour(rabbit_backing_queue). + +-record(vqstate, + { q1, + q2, + delta, + q3, + q4, + next_seq_id, + pending_ack, + index_state, + msg_store_clients, + on_sync, + durable, + transient_threshold, + + len, + persistent_count, + + duration_target, + target_ram_msg_count, + ram_msg_count, + ram_msg_count_prev, + ram_index_count, + out_counter, + in_counter, + rates + }). + +-record(rates, { egress, ingress, avg_egress, avg_ingress, timestamp }). + +-record(msg_status, + { seq_id, + guid, + msg, + is_persistent, + is_delivered, + msg_on_disk, + index_on_disk + }). + +-record(delta, + { start_seq_id, %% start_seq_id is inclusive + count, + end_seq_id %% end_seq_id is exclusive + }). + +-record(tx, { pending_messages, pending_acks }). + +-record(sync, { acks_persistent, acks_all, pubs, funs }). + +%% When we discover, on publish, that we should write some indices to +%% disk for some betas, the RAM_INDEX_BATCH_SIZE sets the number of +%% betas that we must be due to write indices for before we do any +%% work at all. This is both a minimum and a maximum - we don't write +%% fewer than RAM_INDEX_BATCH_SIZE indices out in one go, and we don't +%% write more - we can always come back on the next publish to do +%% more. +-define(IO_BATCH_SIZE, 64). +-define(PERSISTENT_MSG_STORE, msg_store_persistent). +-define(TRANSIENT_MSG_STORE, msg_store_transient). + +-include("rabbit.hrl"). + +%%---------------------------------------------------------------------------- + +-ifdef(use_specs). + +-type(timestamp() :: {non_neg_integer(), non_neg_integer(), non_neg_integer()}). +-type(seq_id() :: non_neg_integer()). +-type(ack() :: seq_id() | 'blank_ack'). + +-type(rates() :: #rates { egress :: {timestamp(), non_neg_integer()}, + ingress :: {timestamp(), non_neg_integer()}, + avg_egress :: float(), + avg_ingress :: float(), + timestamp :: timestamp() }). + +-type(delta() :: #delta { start_seq_id :: non_neg_integer(), + count :: non_neg_integer (), + end_seq_id :: non_neg_integer() }). + +-type(sync() :: #sync { acks_persistent :: [[seq_id()]], + acks_all :: [[seq_id()]], + pubs :: [[rabbit_guid:guid()]], + funs :: [fun (() -> any())] }). + +-type(state() :: #vqstate { + q1 :: queue(), + q2 :: bpqueue:bpqueue(), + delta :: delta(), + q3 :: bpqueue:bpqueue(), + q4 :: queue(), + next_seq_id :: seq_id(), + pending_ack :: dict:dictionary(), + index_state :: any(), + msg_store_clients :: 'undefined' | {{any(), binary()}, + {any(), binary()}}, + on_sync :: sync(), + durable :: boolean(), + + len :: non_neg_integer(), + persistent_count :: non_neg_integer(), + + transient_threshold :: non_neg_integer(), + duration_target :: number() | 'infinity', + target_ram_msg_count :: non_neg_integer() | 'infinity', + ram_msg_count :: non_neg_integer(), + ram_msg_count_prev :: non_neg_integer(), + ram_index_count :: non_neg_integer(), + out_counter :: non_neg_integer(), + in_counter :: non_neg_integer(), + rates :: rates() }). + +-include("rabbit_backing_queue_spec.hrl"). + +-endif. + +-define(BLANK_DELTA, #delta { start_seq_id = undefined, + count = 0, + end_seq_id = undefined }). +-define(BLANK_DELTA_PATTERN(Z), #delta { start_seq_id = Z, + count = 0, + end_seq_id = Z }). + +-define(BLANK_SYNC, #sync { acks_persistent = [], + acks_all = [], + pubs = [], + funs = [] }). + +%%---------------------------------------------------------------------------- +%% Public API +%%---------------------------------------------------------------------------- + +start(DurableQueues) -> + {AllTerms, StartFunState} = rabbit_queue_index:recover(DurableQueues), + start_msg_store( + [Ref || Terms <- AllTerms, + begin + Ref = proplists:get_value(persistent_ref, Terms), + Ref =/= undefined + end], + StartFunState). + +stop() -> stop_msg_store(). + +start_msg_store(Refs, StartFunState) -> + ok = rabbit_sup:start_child(?TRANSIENT_MSG_STORE, rabbit_msg_store, + [?TRANSIENT_MSG_STORE, rabbit_mnesia:dir(), + undefined, {fun (ok) -> finished end, ok}]), + ok = rabbit_sup:start_child(?PERSISTENT_MSG_STORE, rabbit_msg_store, + [?PERSISTENT_MSG_STORE, rabbit_mnesia:dir(), + Refs, StartFunState]). + +stop_msg_store() -> + ok = rabbit_sup:stop_child(?PERSISTENT_MSG_STORE), + ok = rabbit_sup:stop_child(?TRANSIENT_MSG_STORE). + +init(QueueName, IsDurable, Recover) -> + {DeltaCount, Terms, IndexState} = + rabbit_queue_index:init( + QueueName, Recover, + rabbit_msg_store:successfully_recovered_state(?PERSISTENT_MSG_STORE), + fun (Guid) -> + rabbit_msg_store:contains(?PERSISTENT_MSG_STORE, Guid) + end), + {LowSeqId, NextSeqId, IndexState1} = rabbit_queue_index:bounds(IndexState), + + {PRef, TRef, Terms1} = + case [persistent_ref, transient_ref] -- proplists:get_keys(Terms) of + [] -> {proplists:get_value(persistent_ref, Terms), + proplists:get_value(transient_ref, Terms), + Terms}; + _ -> {rabbit_guid:guid(), rabbit_guid:guid(), []} + end, + DeltaCount1 = proplists:get_value(persistent_count, Terms1, DeltaCount), + Delta = case DeltaCount1 == 0 andalso DeltaCount /= undefined of + true -> ?BLANK_DELTA; + false -> #delta { start_seq_id = LowSeqId, + count = DeltaCount1, + end_seq_id = NextSeqId } + end, + Now = now(), + PersistentClient = + case IsDurable of + true -> rabbit_msg_store:client_init(?PERSISTENT_MSG_STORE, PRef); + false -> undefined + end, + TransientClient = rabbit_msg_store:client_init(?TRANSIENT_MSG_STORE, TRef), + State = #vqstate { + q1 = queue:new(), + q2 = bpqueue:new(), + delta = Delta, + q3 = bpqueue:new(), + q4 = queue:new(), + next_seq_id = NextSeqId, + pending_ack = dict:new(), + index_state = IndexState1, + msg_store_clients = {{PersistentClient, PRef}, + {TransientClient, TRef}}, + on_sync = ?BLANK_SYNC, + durable = IsDurable, + transient_threshold = NextSeqId, + + len = DeltaCount1, + persistent_count = DeltaCount1, + + duration_target = infinity, + target_ram_msg_count = infinity, + ram_msg_count = 0, + ram_msg_count_prev = 0, + ram_index_count = 0, + out_counter = 0, + in_counter = 0, + rates = #rates { egress = {Now, 0}, + ingress = {Now, DeltaCount1}, + avg_egress = 0.0, + avg_ingress = 0.0, + timestamp = Now } }, + a(maybe_deltas_to_betas(State)). + +terminate(State) -> + State1 = #vqstate { persistent_count = PCount, + index_state = IndexState, + msg_store_clients = {{MSCStateP, PRef}, + {MSCStateT, TRef}} } = + remove_pending_ack(true, tx_commit_index(State)), + case MSCStateP of + undefined -> ok; + _ -> rabbit_msg_store:client_terminate(MSCStateP) + end, + rabbit_msg_store:client_terminate(MSCStateT), + Terms = [{persistent_ref, PRef}, + {transient_ref, TRef}, + {persistent_count, PCount}], + a(State1 #vqstate { index_state = rabbit_queue_index:terminate( + Terms, IndexState), + msg_store_clients = undefined }). + +%% the only difference between purge and delete is that delete also +%% needs to delete everything that's been delivered and not ack'd. +delete_and_terminate(State) -> + %% TODO: there is no need to interact with qi at all - which we do + %% as part of 'purge' and 'remove_pending_ack', other than + %% deleting it. + {_PurgeCount, State1} = purge(State), + State2 = #vqstate { index_state = IndexState, + msg_store_clients = {{MSCStateP, PRef}, + {MSCStateT, TRef}} } = + remove_pending_ack(false, State1), + IndexState1 = rabbit_queue_index:delete_and_terminate(IndexState), + case MSCStateP of + undefined -> ok; + _ -> rabbit_msg_store:client_delete_and_terminate( + MSCStateP, ?PERSISTENT_MSG_STORE, PRef), + rabbit_msg_store:client_terminate(MSCStateP) + end, + rabbit_msg_store:client_delete_and_terminate( + MSCStateT, ?TRANSIENT_MSG_STORE, TRef), + a(State2 #vqstate { index_state = IndexState1, + msg_store_clients = undefined }). + +purge(State = #vqstate { q4 = Q4, index_state = IndexState, len = Len }) -> + %% TODO: when there are no pending acks, which is a common case, + %% we could simply wipe the qi instead of issuing delivers and + %% acks for all the messages. + IndexState1 = remove_queue_entries(fun rabbit_misc:queue_fold/3, Q4, + IndexState), + State1 = #vqstate { q1 = Q1, index_state = IndexState2 } = + purge_betas_and_deltas(State #vqstate { q4 = queue:new(), + index_state = IndexState1 }), + IndexState3 = remove_queue_entries(fun rabbit_misc:queue_fold/3, Q1, + IndexState2), + {Len, a(State1 #vqstate { q1 = queue:new(), + index_state = IndexState3, + len = 0, + ram_msg_count = 0, + ram_index_count = 0, + persistent_count = 0 })}. + +publish(Msg, State) -> + {_SeqId, State1} = publish(Msg, false, false, State), + a(reduce_memory_use(State1)). + +publish_delivered(false, _Msg, State = #vqstate { len = 0 }) -> + {blank_ack, a(State)}; +publish_delivered(true, Msg = #basic_message { is_persistent = IsPersistent }, + State = #vqstate { len = 0, + next_seq_id = SeqId, + out_counter = OutCount, + in_counter = InCount, + persistent_count = PCount, + pending_ack = PA, + durable = IsDurable }) -> + IsPersistent1 = IsDurable andalso IsPersistent, + MsgStatus = (msg_status(IsPersistent1, SeqId, Msg)) + #msg_status { is_delivered = true }, + {MsgStatus1, State1} = maybe_write_to_disk(false, false, MsgStatus, State), + PA1 = record_pending_ack(m(MsgStatus1), PA), + PCount1 = PCount + one_if(IsPersistent1), + {SeqId, a(State1 #vqstate { next_seq_id = SeqId + 1, + out_counter = OutCount + 1, + in_counter = InCount + 1, + persistent_count = PCount1, + pending_ack = PA1 })}. + +fetch(AckRequired, State = #vqstate { q4 = Q4, + ram_msg_count = RamMsgCount, + out_counter = OutCount, + index_state = IndexState, + len = Len, + persistent_count = PCount, + pending_ack = PA }) -> + case queue:out(Q4) of + {empty, _Q4} -> + case fetch_from_q3_to_q4(State) of + {empty, State1} = Result -> a(State1), Result; + {loaded, State1} -> fetch(AckRequired, State1) + end; + {{value, MsgStatus = #msg_status { + msg = Msg, guid = Guid, seq_id = SeqId, + is_persistent = IsPersistent, is_delivered = IsDelivered, + msg_on_disk = MsgOnDisk, index_on_disk = IndexOnDisk }}, + Q4a} -> + + %% 1. Mark it delivered if necessary + IndexState1 = maybe_write_delivered( + IndexOnDisk andalso not IsDelivered, + SeqId, IndexState), + + %% 2. Remove from msg_store and queue index, if necessary + MsgStore = find_msg_store(IsPersistent), + Rem = fun () -> ok = rabbit_msg_store:remove(MsgStore, [Guid]) end, + Ack = fun () -> rabbit_queue_index:ack([SeqId], IndexState1) end, + IndexState2 = + case {AckRequired, MsgOnDisk, IndexOnDisk, IsPersistent} of + {false, true, false, _} -> Rem(), IndexState1; + {false, true, true, _} -> Rem(), Ack(); + { true, true, true, false} -> Ack(); + _ -> IndexState1 + end, + + %% 3. If an ack is required, add something sensible to PA + {AckTag, PA1} = case AckRequired of + true -> PA2 = record_pending_ack( + MsgStatus #msg_status { + is_delivered = true }, PA), + {SeqId, PA2}; + false -> {blank_ack, PA} + end, + + PCount1 = PCount - one_if(IsPersistent andalso not AckRequired), + Len1 = Len - 1, + {{Msg, IsDelivered, AckTag, Len1}, + a(State #vqstate { q4 = Q4a, + ram_msg_count = RamMsgCount - 1, + out_counter = OutCount + 1, + index_state = IndexState2, + len = Len1, + persistent_count = PCount1, + pending_ack = PA1 })} + end. + +ack(AckTags, State) -> + a(ack(fun rabbit_msg_store:remove/2, + fun (_AckEntry, State1) -> State1 end, + AckTags, State)). + +tx_publish(Txn, Msg = #basic_message { is_persistent = IsPersistent }, + State = #vqstate { durable = IsDurable, + msg_store_clients = MSCState }) -> + Tx = #tx { pending_messages = Pubs } = lookup_tx(Txn), + store_tx(Txn, Tx #tx { pending_messages = [Msg | Pubs] }), + a(case IsPersistent andalso IsDurable of + true -> MsgStatus = msg_status(true, undefined, Msg), + {#msg_status { msg_on_disk = true }, MSCState1} = + maybe_write_msg_to_disk(false, MsgStatus, MSCState), + State #vqstate { msg_store_clients = MSCState1 }; + false -> State + end). + +tx_ack(Txn, AckTags, State) -> + Tx = #tx { pending_acks = Acks } = lookup_tx(Txn), + store_tx(Txn, Tx #tx { pending_acks = [AckTags | Acks] }), + State. + +tx_rollback(Txn, State = #vqstate { durable = IsDurable }) -> + #tx { pending_acks = AckTags, pending_messages = Pubs } = lookup_tx(Txn), + erase_tx(Txn), + ok = case IsDurable of + true -> rabbit_msg_store:remove(?PERSISTENT_MSG_STORE, + persistent_guids(Pubs)); + false -> ok + end, + {lists:append(AckTags), a(State)}. + +tx_commit(Txn, Fun, State = #vqstate { durable = IsDurable }) -> + #tx { pending_acks = AckTags, pending_messages = Pubs } = lookup_tx(Txn), + erase_tx(Txn), + PubsOrdered = lists:reverse(Pubs), + AckTags1 = lists:append(AckTags), + PersistentGuids = persistent_guids(PubsOrdered), + HasPersistentPubs = PersistentGuids =/= [], + {AckTags1, + a(case IsDurable andalso HasPersistentPubs of + true -> ok = rabbit_msg_store:sync( + ?PERSISTENT_MSG_STORE, PersistentGuids, + msg_store_callback(PersistentGuids, + PubsOrdered, AckTags1, Fun)), + State; + false -> tx_commit_post_msg_store( + HasPersistentPubs, PubsOrdered, AckTags1, Fun, State) + end)}. + +requeue(AckTags, State) -> + a(reduce_memory_use( + ack(fun rabbit_msg_store:release/2, + fun (#msg_status { msg = Msg }, State1) -> + {_SeqId, State2} = publish(Msg, true, false, State1), + State2; + ({IsPersistent, Guid}, State1) -> + #vqstate { msg_store_clients = MSCState } = State1, + {{ok, Msg = #basic_message{}}, MSCState1} = + read_from_msg_store(MSCState, IsPersistent, Guid), + State2 = State1 #vqstate { msg_store_clients = MSCState1 }, + {_SeqId, State3} = publish(Msg, true, true, State2), + State3 + end, + AckTags, State))). + +len(#vqstate { len = Len }) -> Len. + +is_empty(State) -> 0 == len(State). + +set_ram_duration_target(DurationTarget, + State = #vqstate { + rates = #rates { avg_egress = AvgEgressRate, + avg_ingress = AvgIngressRate }, + target_ram_msg_count = TargetRamMsgCount }) -> + Rate = AvgEgressRate + AvgIngressRate, + TargetRamMsgCount1 = + case DurationTarget of + infinity -> infinity; + _ -> trunc(DurationTarget * Rate) %% msgs = sec * msgs/sec + end, + State1 = State #vqstate { target_ram_msg_count = TargetRamMsgCount1, + duration_target = DurationTarget }, + a(case TargetRamMsgCount1 == infinity orelse + (TargetRamMsgCount =/= infinity andalso + TargetRamMsgCount1 >= TargetRamMsgCount) of + true -> State1; + false -> reduce_memory_use(State1) + end). + +ram_duration(State = #vqstate { + rates = #rates { egress = Egress, + ingress = Ingress, + timestamp = Timestamp } = Rates, + in_counter = InCount, + out_counter = OutCount, + ram_msg_count = RamMsgCount, + duration_target = DurationTarget, + ram_msg_count_prev = RamMsgCountPrev }) -> + Now = now(), + {AvgEgressRate, Egress1} = update_rate(Now, Timestamp, OutCount, Egress), + {AvgIngressRate, Ingress1} = update_rate(Now, Timestamp, InCount, Ingress), + + Duration = %% msgs / (msgs/sec) == sec + case AvgEgressRate == 0 andalso AvgIngressRate == 0 of + true -> infinity; + false -> (RamMsgCountPrev + RamMsgCount) / + (2 * (AvgEgressRate + AvgIngressRate)) + end, + + {Duration, set_ram_duration_target( + DurationTarget, + State #vqstate { + rates = Rates #rates { + egress = Egress1, + ingress = Ingress1, + avg_egress = AvgEgressRate, + avg_ingress = AvgIngressRate, + timestamp = Now }, + in_counter = 0, + out_counter = 0, + ram_msg_count_prev = RamMsgCount })}. + +needs_idle_timeout(State = #vqstate { on_sync = ?BLANK_SYNC }) -> + {Res, _State} = reduce_memory_use(fun (_Quota, State1) -> State1 end, + fun (_Quota, State1) -> State1 end, + fun (State1) -> State1 end, + State), + Res; +needs_idle_timeout(_State) -> + true. + +idle_timeout(State) -> a(reduce_memory_use(tx_commit_index(State))). + +handle_pre_hibernate(State = #vqstate { index_state = IndexState }) -> + State #vqstate { index_state = rabbit_queue_index:flush(IndexState) }. + +status(#vqstate { q1 = Q1, q2 = Q2, delta = Delta, q3 = Q3, q4 = Q4, + len = Len, + pending_ack = PA, + on_sync = #sync { funs = From }, + target_ram_msg_count = TargetRamMsgCount, + ram_msg_count = RamMsgCount, + ram_index_count = RamIndexCount, + next_seq_id = NextSeqId, + persistent_count = PersistentCount, + rates = #rates { + avg_egress = AvgEgressRate, + avg_ingress = AvgIngressRate } }) -> + [ {q1 , queue:len(Q1)}, + {q2 , bpqueue:len(Q2)}, + {delta , Delta}, + {q3 , bpqueue:len(Q3)}, + {q4 , queue:len(Q4)}, + {len , Len}, + {pending_acks , dict:size(PA)}, + {outstanding_txns , length(From)}, + {target_ram_msg_count , TargetRamMsgCount}, + {ram_msg_count , RamMsgCount}, + {ram_index_count , RamIndexCount}, + {next_seq_id , NextSeqId}, + {persistent_count , PersistentCount}, + {avg_egress_rate , AvgEgressRate}, + {avg_ingress_rate , AvgIngressRate} ]. + +%%---------------------------------------------------------------------------- +%% Minor helpers +%%---------------------------------------------------------------------------- + +a(State = #vqstate { q1 = Q1, q2 = Q2, delta = Delta, q3 = Q3, q4 = Q4, + len = Len, + persistent_count = PersistentCount, + ram_msg_count = RamMsgCount, + ram_index_count = RamIndexCount }) -> + E1 = queue:is_empty(Q1), + E2 = bpqueue:is_empty(Q2), + ED = Delta#delta.count == 0, + E3 = bpqueue:is_empty(Q3), + E4 = queue:is_empty(Q4), + LZ = Len == 0, + + true = E1 or not E3, + true = E2 or not ED, + true = ED or not E3, + true = LZ == (E3 and E4), + + true = Len >= 0, + true = PersistentCount >= 0, + true = RamMsgCount >= 0, + true = RamIndexCount >= 0, + + State. + +m(MsgStatus = #msg_status { msg = Msg, + is_persistent = IsPersistent, + msg_on_disk = MsgOnDisk, + index_on_disk = IndexOnDisk }) -> + true = (not IsPersistent) or IndexOnDisk, + true = (not IndexOnDisk) or MsgOnDisk, + true = (Msg =/= undefined) or MsgOnDisk, + + MsgStatus. + +one_if(true ) -> 1; +one_if(false) -> 0. + +cons_if(true, E, L) -> [E | L]; +cons_if(false, _E, L) -> L. + +msg_status(IsPersistent, SeqId, Msg = #basic_message { guid = Guid }) -> + #msg_status { seq_id = SeqId, guid = Guid, msg = Msg, + is_persistent = IsPersistent, is_delivered = false, + msg_on_disk = false, index_on_disk = false }. + +find_msg_store(true) -> ?PERSISTENT_MSG_STORE; +find_msg_store(false) -> ?TRANSIENT_MSG_STORE. + +with_msg_store_state({{MSCStateP, PRef}, MSCStateT}, true, Fun) -> + {Result, MSCStateP1} = Fun(?PERSISTENT_MSG_STORE, MSCStateP), + {Result, {{MSCStateP1, PRef}, MSCStateT}}; +with_msg_store_state({MSCStateP, {MSCStateT, TRef}}, false, Fun) -> + {Result, MSCStateT1} = Fun(?TRANSIENT_MSG_STORE, MSCStateT), + {Result, {MSCStateP, {MSCStateT1, TRef}}}. + +read_from_msg_store(MSCState, IsPersistent, Guid) -> + with_msg_store_state( + MSCState, IsPersistent, + fun (MsgStore, MSCState1) -> + rabbit_msg_store:read(MsgStore, Guid, MSCState1) + end). + +maybe_write_delivered(false, _SeqId, IndexState) -> + IndexState; +maybe_write_delivered(true, SeqId, IndexState) -> + rabbit_queue_index:deliver([SeqId], IndexState). + +lookup_tx(Txn) -> case get({txn, Txn}) of + undefined -> #tx { pending_messages = [], + pending_acks = [] }; + V -> V + end. + +store_tx(Txn, Tx) -> put({txn, Txn}, Tx). + +erase_tx(Txn) -> erase({txn, Txn}). + +persistent_guids(Pubs) -> + [Guid || #basic_message { guid = Guid, is_persistent = true } <- Pubs]. + +betas_from_index_entries(List, TransientThreshold, IndexState) -> + {Filtered, Delivers, Acks} = + lists:foldr( + fun ({Guid, SeqId, IsPersistent, IsDelivered}, + {Filtered1, Delivers1, Acks1}) -> + case SeqId < TransientThreshold andalso not IsPersistent of + true -> {Filtered1, + cons_if(not IsDelivered, SeqId, Delivers1), + [SeqId | Acks1]}; + false -> {[m(#msg_status { msg = undefined, + guid = Guid, + seq_id = SeqId, + is_persistent = IsPersistent, + is_delivered = IsDelivered, + msg_on_disk = true, + index_on_disk = true + }) | Filtered1], + Delivers1, + Acks1} + end + end, {[], [], []}, List), + {bpqueue:from_list([{true, Filtered}]), + rabbit_queue_index:ack(Acks, + rabbit_queue_index:deliver(Delivers, IndexState))}. + +%% the first arg is the older delta +combine_deltas(?BLANK_DELTA_PATTERN(X), ?BLANK_DELTA_PATTERN(Y)) -> + ?BLANK_DELTA; +combine_deltas(?BLANK_DELTA_PATTERN(X), #delta { start_seq_id = Start, + count = Count, + end_seq_id = End } = B) -> + true = Start + Count =< End, %% ASSERTION + B; +combine_deltas(#delta { start_seq_id = Start, + count = Count, + end_seq_id = End } = A, ?BLANK_DELTA_PATTERN(Y)) -> + true = Start + Count =< End, %% ASSERTION + A; +combine_deltas(#delta { start_seq_id = StartLow, + count = CountLow, + end_seq_id = EndLow }, + #delta { start_seq_id = StartHigh, + count = CountHigh, + end_seq_id = EndHigh }) -> + Count = CountLow + CountHigh, + true = (StartLow =< StartHigh) %% ASSERTIONS + andalso ((StartLow + CountLow) =< EndLow) + andalso ((StartHigh + CountHigh) =< EndHigh) + andalso ((StartLow + Count) =< EndHigh), + #delta { start_seq_id = StartLow, count = Count, end_seq_id = EndHigh }. + +beta_fold(Fun, Init, Q) -> + bpqueue:foldr(fun (_Prefix, Value, Acc) -> Fun(Value, Acc) end, Init, Q). + +update_rate(Now, Then, Count, {OThen, OCount}) -> + %% avg over the current period and the previous + {1000000.0 * (Count + OCount) / timer:now_diff(Now, OThen), {Then, Count}}. + +%%---------------------------------------------------------------------------- +%% Internal major helpers for Public API +%%---------------------------------------------------------------------------- + +msg_store_callback(PersistentGuids, Pubs, AckTags, Fun) -> + Self = self(), + F = fun () -> rabbit_amqqueue:maybe_run_queue_via_backing_queue( + Self, fun (StateN) -> tx_commit_post_msg_store( + true, Pubs, AckTags, Fun, StateN) + end) + end, + fun () -> spawn(fun () -> ok = rabbit_misc:with_exit_handler( + fun () -> rabbit_msg_store:remove( + ?PERSISTENT_MSG_STORE, + PersistentGuids) + end, F) + end) + end. + +tx_commit_post_msg_store(HasPersistentPubs, Pubs, AckTags, Fun, + State = #vqstate { + on_sync = OnSync = #sync { + acks_persistent = SPAcks, + acks_all = SAcks, + pubs = SPubs, + funs = SFuns }, + pending_ack = PA, + durable = IsDurable }) -> + PersistentAcks = + case IsDurable of + true -> [AckTag || AckTag <- AckTags, + case dict:fetch(AckTag, PA) of + #msg_status {} -> false; + {IsPersistent, _Guid} -> IsPersistent + end]; + false -> [] + end, + case IsDurable andalso (HasPersistentPubs orelse PersistentAcks =/= []) of + true -> State #vqstate { on_sync = #sync { + acks_persistent = [PersistentAcks | SPAcks], + acks_all = [AckTags | SAcks], + pubs = [Pubs | SPubs], + funs = [Fun | SFuns] }}; + false -> State1 = tx_commit_index( + State #vqstate { on_sync = #sync { + acks_persistent = [], + acks_all = [AckTags], + pubs = [Pubs], + funs = [Fun] } }), + State1 #vqstate { on_sync = OnSync } + end. + +tx_commit_index(State = #vqstate { on_sync = ?BLANK_SYNC }) -> + State; +tx_commit_index(State = #vqstate { on_sync = #sync { + acks_persistent = SPAcks, + acks_all = SAcks, + pubs = SPubs, + funs = SFuns }, + durable = IsDurable }) -> + PAcks = lists:append(SPAcks), + Acks = lists:append(SAcks), + Pubs = lists:append(lists:reverse(SPubs)), + {SeqIds, State1 = #vqstate { index_state = IndexState }} = + lists:foldl( + fun (Msg = #basic_message { is_persistent = IsPersistent }, + {SeqIdsAcc, State2}) -> + IsPersistent1 = IsDurable andalso IsPersistent, + {SeqId, State3} = publish(Msg, false, IsPersistent1, State2), + {cons_if(IsPersistent1, SeqId, SeqIdsAcc), State3} + end, {PAcks, ack(Acks, State)}, Pubs), + IndexState1 = rabbit_queue_index:sync(SeqIds, IndexState), + [ Fun() || Fun <- lists:reverse(SFuns) ], + reduce_memory_use( + State1 #vqstate { index_state = IndexState1, on_sync = ?BLANK_SYNC }). + +purge_betas_and_deltas(State = #vqstate { q3 = Q3, + index_state = IndexState }) -> + case bpqueue:is_empty(Q3) of + true -> State; + false -> IndexState1 = remove_queue_entries(fun beta_fold/3, Q3, + IndexState), + purge_betas_and_deltas( + maybe_deltas_to_betas( + State #vqstate { q3 = bpqueue:new(), + index_state = IndexState1 })) + end. + +remove_queue_entries(Fold, Q, IndexState) -> + {GuidsByStore, Delivers, Acks} = + Fold(fun remove_queue_entries1/2, {orddict:new(), [], []}, Q), + ok = orddict:fold(fun (MsgStore, Guids, ok) -> + rabbit_msg_store:remove(MsgStore, Guids) + end, ok, GuidsByStore), + rabbit_queue_index:ack(Acks, + rabbit_queue_index:deliver(Delivers, IndexState)). + +remove_queue_entries1( + #msg_status { guid = Guid, seq_id = SeqId, + is_delivered = IsDelivered, msg_on_disk = MsgOnDisk, + index_on_disk = IndexOnDisk, is_persistent = IsPersistent }, + {GuidsByStore, Delivers, Acks}) -> + {case MsgOnDisk of + true -> rabbit_misc:orddict_cons(find_msg_store(IsPersistent), Guid, + GuidsByStore); + false -> GuidsByStore + end, + cons_if(IndexOnDisk andalso not IsDelivered, SeqId, Delivers), + cons_if(IndexOnDisk, SeqId, Acks)}. + +%%---------------------------------------------------------------------------- +%% Internal gubbins for publishing +%%---------------------------------------------------------------------------- + +publish(Msg = #basic_message { is_persistent = IsPersistent }, + IsDelivered, MsgOnDisk, + State = #vqstate { q1 = Q1, q3 = Q3, q4 = Q4, + next_seq_id = SeqId, + len = Len, + in_counter = InCount, + persistent_count = PCount, + durable = IsDurable, + ram_msg_count = RamMsgCount }) -> + IsPersistent1 = IsDurable andalso IsPersistent, + MsgStatus = (msg_status(IsPersistent1, SeqId, Msg)) + #msg_status { is_delivered = IsDelivered, msg_on_disk = MsgOnDisk }, + {MsgStatus1, State1} = maybe_write_to_disk(false, false, MsgStatus, State), + State2 = case bpqueue:is_empty(Q3) of + false -> State1 #vqstate { q1 = queue:in(m(MsgStatus1), Q1) }; + true -> State1 #vqstate { q4 = queue:in(m(MsgStatus1), Q4) } + end, + PCount1 = PCount + one_if(IsPersistent1), + {SeqId, State2 #vqstate { next_seq_id = SeqId + 1, + len = Len + 1, + in_counter = InCount + 1, + persistent_count = PCount1, + ram_msg_count = RamMsgCount + 1}}. + +maybe_write_msg_to_disk(_Force, MsgStatus = #msg_status { + msg_on_disk = true }, MSCState) -> + {MsgStatus, MSCState}; +maybe_write_msg_to_disk(Force, MsgStatus = #msg_status { + msg = Msg, guid = Guid, + is_persistent = IsPersistent }, MSCState) + when Force orelse IsPersistent -> + {ok, MSCState1} = + with_msg_store_state( + MSCState, IsPersistent, + fun (MsgStore, MSCState2) -> + Msg1 = Msg #basic_message { + %% don't persist any recoverable decoded properties + content = rabbit_binary_parser:clear_decoded_content( + Msg #basic_message.content)}, + rabbit_msg_store:write(MsgStore, Guid, Msg1, MSCState2) + end), + {MsgStatus #msg_status { msg_on_disk = true }, MSCState1}; +maybe_write_msg_to_disk(_Force, MsgStatus, MSCState) -> + {MsgStatus, MSCState}. + +maybe_write_index_to_disk(_Force, MsgStatus = #msg_status { + index_on_disk = true }, IndexState) -> + true = MsgStatus #msg_status.msg_on_disk, %% ASSERTION + {MsgStatus, IndexState}; +maybe_write_index_to_disk(Force, MsgStatus = #msg_status { + guid = Guid, seq_id = SeqId, + is_persistent = IsPersistent, + is_delivered = IsDelivered }, IndexState) + when Force orelse IsPersistent -> + true = MsgStatus #msg_status.msg_on_disk, %% ASSERTION + IndexState1 = rabbit_queue_index:publish(Guid, SeqId, IsPersistent, + IndexState), + {MsgStatus #msg_status { index_on_disk = true }, + maybe_write_delivered(IsDelivered, SeqId, IndexState1)}; +maybe_write_index_to_disk(_Force, MsgStatus, IndexState) -> + {MsgStatus, IndexState}. + +maybe_write_to_disk(ForceMsg, ForceIndex, MsgStatus, + State = #vqstate { index_state = IndexState, + msg_store_clients = MSCState }) -> + {MsgStatus1, MSCState1} = maybe_write_msg_to_disk( + ForceMsg, MsgStatus, MSCState), + {MsgStatus2, IndexState1} = maybe_write_index_to_disk( + ForceIndex, MsgStatus1, IndexState), + {MsgStatus2, State #vqstate { index_state = IndexState1, + msg_store_clients = MSCState1 }}. + +%%---------------------------------------------------------------------------- +%% Internal gubbins for acks +%%---------------------------------------------------------------------------- + +record_pending_ack(#msg_status { guid = Guid, seq_id = SeqId, + is_persistent = IsPersistent, + msg_on_disk = MsgOnDisk } = MsgStatus, PA) -> + AckEntry = case MsgOnDisk of + true -> {IsPersistent, Guid}; + false -> MsgStatus + end, + dict:store(SeqId, AckEntry, PA). + +remove_pending_ack(KeepPersistent, + State = #vqstate { pending_ack = PA, + index_state = IndexState }) -> + {SeqIds, GuidsByStore} = dict:fold(fun accumulate_ack/3, + {[], orddict:new()}, PA), + State1 = State #vqstate { pending_ack = dict:new() }, + case KeepPersistent of + true -> case orddict:find(?TRANSIENT_MSG_STORE, GuidsByStore) of + error -> State1; + {ok, Guids} -> ok = rabbit_msg_store:remove( + ?TRANSIENT_MSG_STORE, Guids), + State1 + end; + false -> IndexState1 = rabbit_queue_index:ack(SeqIds, IndexState), + ok = orddict:fold( + fun (MsgStore, Guids, ok) -> + rabbit_msg_store:remove(MsgStore, Guids) + end, ok, GuidsByStore), + State1 #vqstate { index_state = IndexState1 } + end. + +ack(_MsgStoreFun, _Fun, [], State) -> + State; +ack(MsgStoreFun, Fun, AckTags, State) -> + {{SeqIds, GuidsByStore}, State1 = #vqstate { index_state = IndexState, + persistent_count = PCount }} = + lists:foldl( + fun (SeqId, {Acc, State2 = #vqstate { pending_ack = PA }}) -> + {ok, AckEntry} = dict:find(SeqId, PA), + {accumulate_ack(SeqId, AckEntry, Acc), + Fun(AckEntry, State2 #vqstate { + pending_ack = dict:erase(SeqId, PA) })} + end, {{[], orddict:new()}, State}, AckTags), + IndexState1 = rabbit_queue_index:ack(SeqIds, IndexState), + ok = orddict:fold(fun (MsgStore, Guids, ok) -> + MsgStoreFun(MsgStore, Guids) + end, ok, GuidsByStore), + PCount1 = PCount - case orddict:find(?PERSISTENT_MSG_STORE, GuidsByStore) of + error -> 0; + {ok, Guids} -> length(Guids) + end, + State1 #vqstate { index_state = IndexState1, + persistent_count = PCount1 }. + +accumulate_ack(_SeqId, #msg_status { is_persistent = false, %% ASSERTIONS + msg_on_disk = false, + index_on_disk = false }, Acc) -> + Acc; +accumulate_ack(SeqId, {IsPersistent, Guid}, {SeqIdsAcc, Dict}) -> + {cons_if(IsPersistent, SeqId, SeqIdsAcc), + rabbit_misc:orddict_cons(find_msg_store(IsPersistent), Guid, Dict)}. + +%%---------------------------------------------------------------------------- +%% Phase changes +%%---------------------------------------------------------------------------- + +%% Determine whether a reduction in memory use is necessary, and call +%% functions to perform the required phase changes. The function can +%% also be used to just do the former, by passing in dummy phase +%% change functions. +%% +%% The function does not report on any needed beta->delta conversions, +%% though the conversion function for that is called as necessary. The +%% reason is twofold. Firstly, this is safe because the conversion is +%% only ever necessary just after a transition to a +%% target_ram_msg_count of zero or after an incremental alpha->beta +%% conversion. In the former case the conversion is performed straight +%% away (i.e. any betas present at the time are converted to deltas), +%% and in the latter case the need for a conversion is flagged up +%% anyway. Secondly, this is necessary because we do not have a +%% precise and cheap predicate for determining whether a beta->delta +%% conversion is necessary - due to the complexities of retaining up +%% one segment's worth of messages in q3 - and thus would risk +%% perpetually reporting the need for a conversion when no such +%% conversion is needed. That in turn could cause an infinite loop. +reduce_memory_use(AlphaBetaFun, BetaGammaFun, BetaDeltaFun, State) -> + {Reduce, State1} = case chunk_size(State #vqstate.ram_msg_count, + State #vqstate.target_ram_msg_count) of + 0 -> {false, State}; + S1 -> {true, AlphaBetaFun(S1, State)} + end, + case State1 #vqstate.target_ram_msg_count of + infinity -> {Reduce, State1}; + 0 -> {Reduce, BetaDeltaFun(State1)}; + _ -> case chunk_size(State1 #vqstate.ram_index_count, + permitted_ram_index_count(State1)) of + ?IO_BATCH_SIZE = S2 -> {true, BetaGammaFun(S2, State1)}; + _ -> {Reduce, State1} + end + end. + +reduce_memory_use(State) -> + {_, State1} = reduce_memory_use(fun push_alphas_to_betas/2, + fun limit_ram_index/2, + fun push_betas_to_deltas/1, + State), + State1. + +limit_ram_index(Quota, State = #vqstate { q2 = Q2, q3 = Q3, + index_state = IndexState, + ram_index_count = RamIndexCount }) -> + {Q2a, {Quota1, IndexState1}} = limit_ram_index( + fun bpqueue:map_fold_filter_r/4, + Q2, {Quota, IndexState}), + %% TODO: we shouldn't be writing index entries for messages that + %% can never end up in delta due them residing in the only segment + %% held by q3. + {Q3a, {Quota2, IndexState2}} = limit_ram_index( + fun bpqueue:map_fold_filter_r/4, + Q3, {Quota1, IndexState1}), + State #vqstate { q2 = Q2a, q3 = Q3a, + index_state = IndexState2, + ram_index_count = RamIndexCount - (Quota - Quota2) }. + +limit_ram_index(_MapFoldFilterFun, Q, {0, IndexState}) -> + {Q, {0, IndexState}}; +limit_ram_index(MapFoldFilterFun, Q, {Quota, IndexState}) -> + MapFoldFilterFun( + fun erlang:'not'/1, + fun (MsgStatus, {0, _IndexStateN}) -> + false = MsgStatus #msg_status.index_on_disk, %% ASSERTION + stop; + (MsgStatus, {N, IndexStateN}) when N > 0 -> + false = MsgStatus #msg_status.index_on_disk, %% ASSERTION + {MsgStatus1, IndexStateN1} = + maybe_write_index_to_disk(true, MsgStatus, IndexStateN), + {true, m(MsgStatus1), {N-1, IndexStateN1}} + end, {Quota, IndexState}, Q). + +permitted_ram_index_count(#vqstate { len = 0 }) -> + infinity; +permitted_ram_index_count(#vqstate { len = Len, + q2 = Q2, + q3 = Q3, + delta = #delta { count = DeltaCount } }) -> + BetaLen = bpqueue:len(Q2) + bpqueue:len(Q3), + BetaLen - trunc(BetaLen * BetaLen / (Len - DeltaCount)). + +chunk_size(Current, Permitted) + when Permitted =:= infinity orelse Permitted >= Current -> + 0; +chunk_size(Current, Permitted) -> + lists:min([Current - Permitted, ?IO_BATCH_SIZE]). + +fetch_from_q3_to_q4(State = #vqstate { + q1 = Q1, + q2 = Q2, + delta = #delta { count = DeltaCount }, + q3 = Q3, + q4 = Q4, + ram_msg_count = RamMsgCount, + ram_index_count = RamIndexCount, + msg_store_clients = MSCState }) -> + case bpqueue:out(Q3) of + {empty, _Q3} -> + {empty, State}; + {{value, IndexOnDisk, MsgStatus = #msg_status { + msg = undefined, guid = Guid, + is_persistent = IsPersistent }}, Q3a} -> + {{ok, Msg = #basic_message {}}, MSCState1} = + read_from_msg_store(MSCState, IsPersistent, Guid), + Q4a = queue:in(m(MsgStatus #msg_status { msg = Msg }), Q4), + RamIndexCount1 = RamIndexCount - one_if(not IndexOnDisk), + true = RamIndexCount1 >= 0, %% ASSERTION + State1 = State #vqstate { q3 = Q3a, + q4 = Q4a, + ram_msg_count = RamMsgCount + 1, + ram_index_count = RamIndexCount1, + msg_store_clients = MSCState1 }, + State2 = + case {bpqueue:is_empty(Q3a), 0 == DeltaCount} of + {true, true} -> + %% q3 is now empty, it wasn't before; delta is + %% still empty. So q2 must be empty, and q1 + %% can now be joined onto q4 + true = bpqueue:is_empty(Q2), %% ASSERTION + State1 #vqstate { q1 = queue:new(), + q4 = queue:join(Q4a, Q1) }; + {true, false} -> + maybe_deltas_to_betas(State1); + {false, _} -> + %% q3 still isn't empty, we've not touched + %% delta, so the invariants between q1, q2, + %% delta and q3 are maintained + State1 + end, + {loaded, State2} + end. + +maybe_deltas_to_betas(State = #vqstate { delta = ?BLANK_DELTA_PATTERN(X) }) -> + State; +maybe_deltas_to_betas(State = #vqstate { + q2 = Q2, + delta = Delta, + q3 = Q3, + index_state = IndexState, + target_ram_msg_count = TargetRamMsgCount, + transient_threshold = TransientThreshold }) -> + case bpqueue:is_empty(Q3) orelse (TargetRamMsgCount /= 0) of + false -> + State; + true -> + #delta { start_seq_id = DeltaSeqId, + count = DeltaCount, + end_seq_id = DeltaSeqIdEnd } = Delta, + DeltaSeqId1 = + lists:min([rabbit_queue_index:next_segment_boundary(DeltaSeqId), + DeltaSeqIdEnd]), + {List, IndexState1} = + rabbit_queue_index:read(DeltaSeqId, DeltaSeqId1, IndexState), + {Q3a, IndexState2} = betas_from_index_entries( + List, TransientThreshold, IndexState1), + State1 = State #vqstate { index_state = IndexState2 }, + case bpqueue:len(Q3a) of + 0 -> + %% we ignored every message in the segment due to + %% it being transient and below the threshold + maybe_deltas_to_betas( + State #vqstate { + delta = Delta #delta { start_seq_id = DeltaSeqId1 }}); + Q3aLen -> + Q3b = bpqueue:join(Q3, Q3a), + case DeltaCount - Q3aLen of + 0 -> + %% delta is now empty, but it wasn't + %% before, so can now join q2 onto q3 + State1 #vqstate { q2 = bpqueue:new(), + delta = ?BLANK_DELTA, + q3 = bpqueue:join(Q3b, Q2) }; + N when N > 0 -> + Delta1 = #delta { start_seq_id = DeltaSeqId1, + count = N, + end_seq_id = DeltaSeqIdEnd }, + State1 #vqstate { delta = Delta1, + q3 = Q3b } + end + end + end. + +push_alphas_to_betas(Quota, State) -> + { Quota1, State1} = maybe_push_q1_to_betas(Quota, State), + {_Quota2, State2} = maybe_push_q4_to_betas(Quota1, State1), + State2. + +maybe_push_q1_to_betas(Quota, State = #vqstate { q1 = Q1 }) -> + maybe_push_alphas_to_betas( + fun queue:out/1, + fun (MsgStatus = #msg_status { index_on_disk = IndexOnDisk }, + Q1a, State1 = #vqstate { q3 = Q3, delta = #delta { count = 0 } }) -> + State1 #vqstate { q1 = Q1a, + q3 = bpqueue:in(IndexOnDisk, MsgStatus, Q3) }; + (MsgStatus = #msg_status { index_on_disk = IndexOnDisk }, + Q1a, State1 = #vqstate { q2 = Q2 }) -> + State1 #vqstate { q1 = Q1a, + q2 = bpqueue:in(IndexOnDisk, MsgStatus, Q2) } + end, Quota, Q1, State). + +maybe_push_q4_to_betas(Quota, State = #vqstate { q4 = Q4 }) -> + maybe_push_alphas_to_betas( + fun queue:out_r/1, + fun (MsgStatus = #msg_status { index_on_disk = IndexOnDisk }, + Q4a, State1 = #vqstate { q3 = Q3 }) -> + State1 #vqstate { q3 = bpqueue:in_r(IndexOnDisk, MsgStatus, Q3), + q4 = Q4a } + end, Quota, Q4, State). + +maybe_push_alphas_to_betas(_Generator, _Consumer, Quota, _Q, + State = #vqstate { + ram_msg_count = RamMsgCount, + target_ram_msg_count = TargetRamMsgCount }) + when Quota =:= 0 orelse + TargetRamMsgCount =:= infinity orelse TargetRamMsgCount >= RamMsgCount -> + {Quota, State}; +maybe_push_alphas_to_betas(Generator, Consumer, Quota, Q, State) -> + case Generator(Q) of + {empty, _Q} -> + {Quota, State}; + {{value, MsgStatus}, Qa} -> + {MsgStatus1 = #msg_status { msg_on_disk = true, + index_on_disk = IndexOnDisk }, + State1 = #vqstate { ram_msg_count = RamMsgCount, + ram_index_count = RamIndexCount }} = + maybe_write_to_disk(true, false, MsgStatus, State), + MsgStatus2 = m(MsgStatus1 #msg_status { msg = undefined }), + RamIndexCount1 = RamIndexCount + one_if(not IndexOnDisk), + State2 = State1 #vqstate { ram_msg_count = RamMsgCount - 1, + ram_index_count = RamIndexCount1 }, + maybe_push_alphas_to_betas(Generator, Consumer, Quota - 1, Qa, + Consumer(MsgStatus2, Qa, State2)) + end. + +push_betas_to_deltas(State = #vqstate { q2 = Q2, + delta = Delta, + q3 = Q3, + index_state = IndexState, + ram_index_count = RamIndexCount }) -> + {Delta2, Q2a, RamIndexCount2, IndexState2} = + push_betas_to_deltas(fun (Q2MinSeqId) -> Q2MinSeqId end, + fun bpqueue:out/1, Q2, + RamIndexCount, IndexState), + {Delta3, Q3a, RamIndexCount3, IndexState3} = + push_betas_to_deltas(fun rabbit_queue_index:next_segment_boundary/1, + fun bpqueue:out_r/1, Q3, + RamIndexCount2, IndexState2), + Delta4 = combine_deltas(Delta3, combine_deltas(Delta, Delta2)), + State #vqstate { q2 = Q2a, + delta = Delta4, + q3 = Q3a, + index_state = IndexState3, + ram_index_count = RamIndexCount3 }. + +push_betas_to_deltas(LimitFun, Generator, Q, RamIndexCount, IndexState) -> + case bpqueue:out(Q) of + {empty, _Q} -> + {?BLANK_DELTA, Q, RamIndexCount, IndexState}; + {{value, _IndexOnDisk1, #msg_status { seq_id = MinSeqId }}, _Qa} -> + {{value, _IndexOnDisk2, #msg_status { seq_id = MaxSeqId }}, _Qb} = + bpqueue:out_r(Q), + Limit = LimitFun(MinSeqId), + case MaxSeqId < Limit of + true -> {?BLANK_DELTA, Q, RamIndexCount, IndexState}; + false -> {Len, Qc, RamIndexCount1, IndexState1} = + push_betas_to_deltas(Generator, Limit, Q, 0, + RamIndexCount, IndexState), + {#delta { start_seq_id = Limit, + count = Len, + end_seq_id = MaxSeqId + 1 }, + Qc, RamIndexCount1, IndexState1} + end + end. + +push_betas_to_deltas(Generator, Limit, Q, Count, RamIndexCount, IndexState) -> + case Generator(Q) of + {empty, _Q} -> + {Count, Q, RamIndexCount, IndexState}; + {{value, _IndexOnDisk, #msg_status { seq_id = SeqId }}, _Qa} + when SeqId < Limit -> + {Count, Q, RamIndexCount, IndexState}; + {{value, IndexOnDisk, MsgStatus}, Qa} -> + {RamIndexCount1, IndexState1} = + case IndexOnDisk of + true -> {RamIndexCount, IndexState}; + false -> {#msg_status { index_on_disk = true }, + IndexState2} = + maybe_write_index_to_disk(true, MsgStatus, + IndexState), + {RamIndexCount - 1, IndexState2} + end, + push_betas_to_deltas( + Generator, Limit, Qa, Count + 1, RamIndexCount1, IndexState1) + end. diff --git a/src/rabbit_writer.erl b/src/rabbit_writer.erl index 3d10dc12..f90ee734 100644 --- a/src/rabbit_writer.erl +++ b/src/rabbit_writer.erl @@ -33,14 +33,14 @@ -include("rabbit.hrl"). -include("rabbit_framing.hrl"). --export([start/3, start_link/3, shutdown/1, mainloop/1]). +-export([start/4, start_link/4, shutdown/1, mainloop/1]). -export([send_command/2, send_command/3, send_command_and_signal_back/3, send_command_and_signal_back/4, send_command_and_notify/5]). --export([internal_send_command/3, internal_send_command/5]). +-export([internal_send_command/4, internal_send_command/6]). -import(gen_tcp). --record(wstate, {sock, channel, frame_max}). +-record(wstate, {sock, channel, frame_max, protocol}). -define(HIBERNATE_AFTER, 5000). @@ -48,34 +48,53 @@ -ifdef(use_specs). --spec(start/3 :: (socket(), channel_number(), non_neg_integer()) -> pid()). --spec(start_link/3 :: (socket(), channel_number(), non_neg_integer()) -> pid()). --spec(send_command/2 :: (pid(), amqp_method_record()) -> 'ok'). --spec(send_command/3 :: (pid(), amqp_method_record(), content()) -> 'ok'). --spec(send_command_and_signal_back/3 :: (pid(), amqp_method(), pid()) -> 'ok'). +-spec(start/4 :: + (rabbit_net:socket(), rabbit_channel:channel_number(), + non_neg_integer(), rabbit_types:protocol()) + -> rabbit_types:ok(pid())). +-spec(start_link/4 :: + (rabbit_net:socket(), rabbit_channel:channel_number(), + non_neg_integer(), rabbit_types:protocol()) + -> rabbit_types:ok(pid())). +-spec(send_command/2 :: + (pid(), rabbit_framing:amqp_method_record()) -> 'ok'). +-spec(send_command/3 :: + (pid(), rabbit_framing:amqp_method_record(), rabbit_types:content()) + -> 'ok'). +-spec(send_command_and_signal_back/3 :: + (pid(), rabbit_framing:amqp_method(), pid()) -> 'ok'). -spec(send_command_and_signal_back/4 :: - (pid(), amqp_method(), content(), pid()) -> 'ok'). + (pid(), rabbit_framing:amqp_method(), rabbit_types:content(), pid()) + -> 'ok'). -spec(send_command_and_notify/5 :: - (pid(), pid(), pid(), amqp_method_record(), content()) -> 'ok'). --spec(internal_send_command/3 :: - (socket(), channel_number(), amqp_method_record()) -> 'ok'). --spec(internal_send_command/5 :: - (socket(), channel_number(), amqp_method_record(), - content(), non_neg_integer()) -> 'ok'). + (pid(), pid(), pid(), rabbit_framing:amqp_method_record(), + rabbit_types:content()) + -> 'ok'). +-spec(internal_send_command/4 :: + (rabbit_net:socket(), rabbit_channel:channel_number(), + rabbit_framing:amqp_method_record(), rabbit_types:protocol()) + -> 'ok'). +-spec(internal_send_command/6 :: + (rabbit_net:socket(), rabbit_channel:channel_number(), + rabbit_framing:amqp_method_record(), rabbit_types:content(), + non_neg_integer(), rabbit_types:protocol()) + -> 'ok'). -endif. %%---------------------------------------------------------------------------- -start(Sock, Channel, FrameMax) -> - spawn(?MODULE, mainloop, [#wstate{sock = Sock, - channel = Channel, - frame_max = FrameMax}]). - -start_link(Sock, Channel, FrameMax) -> - spawn_link(?MODULE, mainloop, [#wstate{sock = Sock, +start(Sock, Channel, FrameMax, Protocol) -> + {ok, spawn(?MODULE, mainloop, [#wstate{sock = Sock, channel = Channel, - frame_max = FrameMax}]). + frame_max = FrameMax, + protocol = Protocol}])}. + +start_link(Sock, Channel, FrameMax, Protocol) -> + {ok, spawn_link(?MODULE, mainloop, [#wstate{sock = Sock, + channel = Channel, + frame_max = FrameMax, + protocol = Protocol}])}. mainloop(State) -> receive @@ -85,35 +104,40 @@ mainloop(State) -> end. handle_message({send_command, MethodRecord}, - State = #wstate{sock = Sock, channel = Channel}) -> - ok = internal_send_command_async(Sock, Channel, MethodRecord), + State = #wstate{sock = Sock, channel = Channel, + protocol = Protocol}) -> + ok = internal_send_command_async(Sock, Channel, MethodRecord, Protocol), State; handle_message({send_command, MethodRecord, Content}, State = #wstate{sock = Sock, channel = Channel, - frame_max = FrameMax}) -> + frame_max = FrameMax, + protocol = Protocol}) -> ok = internal_send_command_async(Sock, Channel, MethodRecord, - Content, FrameMax), + Content, FrameMax, Protocol), State; handle_message({send_command_and_signal_back, MethodRecord, Parent}, - State = #wstate{sock = Sock, channel = Channel}) -> - ok = internal_send_command_async(Sock, Channel, MethodRecord), + State = #wstate{sock = Sock, channel = Channel, + protocol = Protocol}) -> + ok = internal_send_command_async(Sock, Channel, MethodRecord, Protocol), Parent ! rabbit_writer_send_command_signal, State; handle_message({send_command_and_signal_back, MethodRecord, Content, Parent}, State = #wstate{sock = Sock, channel = Channel, - frame_max = FrameMax}) -> + frame_max = FrameMax, + protocol = Protocol}) -> ok = internal_send_command_async(Sock, Channel, MethodRecord, - Content, FrameMax), + Content, FrameMax, Protocol), Parent ! rabbit_writer_send_command_signal, State; handle_message({send_command_and_notify, QPid, ChPid, MethodRecord, Content}, State = #wstate{sock = Sock, channel = Channel, - frame_max = FrameMax}) -> + frame_max = FrameMax, + protocol = Protocol}) -> ok = internal_send_command_async(Sock, Channel, MethodRecord, - Content, FrameMax), + Content, FrameMax, Protocol), rabbit_amqqueue:notify_sent(QPid, ChPid), State; handle_message({inet_reply, _, ok}, State) -> @@ -149,34 +173,37 @@ send_command_and_notify(W, Q, ChPid, MethodRecord, Content) -> shutdown(W) -> W ! shutdown, + rabbit_misc:unlink_and_capture_exit(W), ok. %--------------------------------------------------------------------------- -assemble_frames(Channel, MethodRecord) -> +assemble_frames(Channel, MethodRecord, Protocol) -> ?LOGMESSAGE(out, Channel, MethodRecord, none), - rabbit_binary_generator:build_simple_method_frame(Channel, MethodRecord). + rabbit_binary_generator:build_simple_method_frame(Channel, MethodRecord, + Protocol). -assemble_frames(Channel, MethodRecord, Content, FrameMax) -> +assemble_frames(Channel, MethodRecord, Content, FrameMax, Protocol) -> ?LOGMESSAGE(out, Channel, MethodRecord, Content), MethodName = rabbit_misc:method_record_type(MethodRecord), - true = rabbit_framing:method_has_content(MethodName), % assertion + true = Protocol:method_has_content(MethodName), % assertion MethodFrame = rabbit_binary_generator:build_simple_method_frame( - Channel, MethodRecord), + Channel, MethodRecord, Protocol), ContentFrames = rabbit_binary_generator:build_simple_content_frames( - Channel, Content, FrameMax), + Channel, Content, FrameMax, Protocol), [MethodFrame | ContentFrames]. tcp_send(Sock, Data) -> rabbit_misc:throw_on_error(inet_error, fun () -> rabbit_net:send(Sock, Data) end). -internal_send_command(Sock, Channel, MethodRecord) -> - ok = tcp_send(Sock, assemble_frames(Channel, MethodRecord)). +internal_send_command(Sock, Channel, MethodRecord, Protocol) -> + ok = tcp_send(Sock, assemble_frames(Channel, MethodRecord, Protocol)). -internal_send_command(Sock, Channel, MethodRecord, Content, FrameMax) -> +internal_send_command(Sock, Channel, MethodRecord, Content, FrameMax, + Protocol) -> ok = tcp_send(Sock, assemble_frames(Channel, MethodRecord, - Content, FrameMax)). + Content, FrameMax, Protocol)). %% gen_tcp:send/2 does a selective receive of {inet_reply, Sock, %% Status} to obtain the result. That is bad when it is called from @@ -196,13 +223,14 @@ internal_send_command(Sock, Channel, MethodRecord, Content, FrameMax) -> %% Also note that the port has bounded buffers and port_command blocks %% when these are full. So the fact that we process the result %% asynchronously does not impact flow control. -internal_send_command_async(Sock, Channel, MethodRecord) -> - true = port_cmd(Sock, assemble_frames(Channel, MethodRecord)), +internal_send_command_async(Sock, Channel, MethodRecord, Protocol) -> + true = port_cmd(Sock, assemble_frames(Channel, MethodRecord, Protocol)), ok. -internal_send_command_async(Sock, Channel, MethodRecord, Content, FrameMax) -> +internal_send_command_async(Sock, Channel, MethodRecord, Content, FrameMax, + Protocol) -> true = port_cmd(Sock, assemble_frames(Channel, MethodRecord, - Content, FrameMax)), + Content, FrameMax, Protocol)), ok. port_cmd(Sock, Data) -> diff --git a/src/supervisor2.erl b/src/supervisor2.erl index 0b1d7265..fb4c9b02 100644 --- a/src/supervisor2.erl +++ b/src/supervisor2.erl @@ -4,27 +4,50 @@ %% 1) the module name is supervisor2 %% %% 2) there is a new strategy called -%% simple_one_for_one_terminate. This is exactly the same as for -%% simple_one_for_one, except that children *are* explicitly -%% terminated as per the shutdown component of the child_spec. +%% simple_one_for_one_terminate. This is exactly the same as for +%% simple_one_for_one, except that children *are* explicitly +%% terminated as per the shutdown component of the child_spec. %% -%% All modifications are (C) 2010 LShift Ltd. +%% 3) child specifications can contain, as the restart type, a tuple +%% {permanent, Delay} | {transient, Delay} where Delay >= 0. The +%% delay, in seconds, indicates what should happen if a child, upon +%% being restarted, exceeds the MaxT and MaxR parameters. Thus, if +%% a child exits, it is restarted as normal. If it exits +%% sufficiently quickly and often to exceed the boundaries set by +%% the MaxT and MaxR parameters, and a Delay is specified, then +%% rather than stopping the supervisor, the supervisor instead +%% continues and tries to start up the child again, Delay seconds +%% later. +%% +%% Note that you can never restart more frequently than the MaxT +%% and MaxR parameters allow: i.e. you must wait until *both* the +%% Delay has passed *and* the MaxT and MaxR parameters allow the +%% child to be restarted. +%% +%% Also note that the Delay is a *minimum*. There is no guarantee +%% that the child will be restarted within that time, especially if +%% other processes are dying and being restarted at the same time - +%% essentially we have to wait for the delay to have passed and for +%% the MaxT and MaxR parameters to permit the child to be +%% restarted. This may require waiting for longer than Delay. +%% +%% All modifications are (C) 2010 Rabbit Technologies Ltd. %% %% %CopyrightBegin% -%% +%% %% Copyright Ericsson AB 1996-2009. All Rights Reserved. -%% +%% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in %% compliance with the License. You should have received a copy of the %% Erlang Public License along with this software. If not, it can be %% retrieved online at http://www.erlang.org/. -%% +%% %% Software distributed under the License is distributed on an "AS IS" %% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See %% the License for the specific language governing rights and limitations %% under the License. -%% +%% %% %CopyrightEnd% %% -module(supervisor2). @@ -43,6 +66,7 @@ %% Internal exports -export([init/1, handle_call/3, handle_info/2, terminate/2, code_change/3]). -export([handle_cast/2]). +-export([delayed_restart/2]). -define(DICT, dict). @@ -119,6 +143,9 @@ check_childspecs(ChildSpecs) when is_list(ChildSpecs) -> end; check_childspecs(X) -> {error, {badarg, X}}. +delayed_restart(Supervisor, RestartDetails) -> + gen_server:cast(Supervisor, {delayed_restart, RestartDetails}). + %%% --------------------------------------------------- %%% %%% Initialize the supervisor. @@ -315,6 +342,20 @@ handle_call(which_children, _From, State) -> {reply, Resp, State}. +handle_cast({delayed_restart, {RestartType, Reason, Child}}, State) + when ?is_simple(State) -> + {ok, NState} = do_restart(RestartType, Reason, Child, State), + {noreply, NState}; +handle_cast({delayed_restart, {RestartType, Reason, Child}}, State) + when not (?is_simple(State)) -> + case get_child(Child#child.name, State) of + {value, Child} -> + {ok, NState} = do_restart(RestartType, Reason, Child, State), + {noreply, NState}; + _ -> + {noreply, State} + end; + %%% Hopefully cause a function-clause as there is no API function %%% that utilizes cast. handle_cast(null, State) -> @@ -480,6 +521,16 @@ restart_child(Pid, Reason, State) -> {ok, State} end. +do_restart({RestartType, Delay}, Reason, Child, State) -> + case restart1(Child, State) of + {ok, NState} -> + {ok, NState}; + {terminate, NState} -> + {ok, _TRef} = timer:apply_after( + trunc(Delay*1000), ?MODULE, delayed_restart, + [self(), {{RestartType, Delay}, Reason, Child}]), + {ok, NState} + end; do_restart(permanent, Reason, Child, State) -> report_error(child_terminated, Reason, Child, State#state.name), restart(Child, State); @@ -500,14 +551,27 @@ do_restart(temporary, Reason, Child, State) -> restart(Child, State) -> case add_restart(State) of {ok, NState} -> - restart(NState#state.strategy, Child, NState); + restart(NState#state.strategy, Child, NState, fun restart/2); {terminate, NState} -> report_error(shutdown, reached_max_restart_intensity, Child, State#state.name), {shutdown, remove_child(Child, NState)} end. -restart(Strategy, Child, State) +restart1(Child, State) -> + case add_restart(State) of + {ok, NState} -> + restart(NState#state.strategy, Child, NState, fun restart1/2); + {terminate, _NState} -> + %% we've reached the max restart intensity, but the + %% add_restart will have added to the restarts + %% field. Given we don't want to die here, we need to go + %% back to the old restarts field otherwise we'll never + %% attempt to restart later. + {terminate, State} + end. + +restart(Strategy, Child, State, Restart) when Strategy =:= simple_one_for_one orelse Strategy =:= simple_one_for_one_terminate -> #child{mfa = {M, F, A}} = Child, @@ -521,9 +585,9 @@ restart(Strategy, Child, State) {ok, NState}; {error, Error} -> report_error(start_error, Error, Child, State#state.name), - restart(Child, State) + Restart(Child, State) end; -restart(one_for_one, Child, State) -> +restart(one_for_one, Child, State, Restart) -> case do_start_child(State#state.name, Child) of {ok, Pid} -> NState = replace_child(Child#child{pid = Pid}, State), @@ -533,25 +597,25 @@ restart(one_for_one, Child, State) -> {ok, NState}; {error, Reason} -> report_error(start_error, Reason, Child, State#state.name), - restart(Child, State) + Restart(Child, State) end; -restart(rest_for_one, Child, State) -> +restart(rest_for_one, Child, State, Restart) -> {ChAfter, ChBefore} = split_child(Child#child.pid, State#state.children), ChAfter2 = terminate_children(ChAfter, State#state.name), case start_children(ChAfter2, State#state.name) of {ok, ChAfter3} -> {ok, State#state{children = ChAfter3 ++ ChBefore}}; {error, ChAfter3} -> - restart(Child, State#state{children = ChAfter3 ++ ChBefore}) + Restart(Child, State#state{children = ChAfter3 ++ ChBefore}) end; -restart(one_for_all, Child, State) -> +restart(one_for_all, Child, State, Restart) -> Children1 = del_child(Child#child.pid, State#state.children), Children2 = terminate_children(Children1, State#state.name), case start_children(Children2, State#state.name) of {ok, NChs} -> {ok, State#state{children = NChs}}; {error, NChs} -> - restart(Child, State#state{children = NChs}) + Restart(Child, State#state{children = NChs}) end. %%----------------------------------------------------------------- @@ -769,7 +833,9 @@ supname(N,_) -> N. %%% {Name, Func, RestartType, Shutdown, ChildType, Modules} %%% where Name is an atom %%% Func is {Mod, Fun, Args} == {atom, atom, list} -%%% RestartType is permanent | temporary | transient +%%% RestartType is permanent | temporary | transient | +%%% {permanent, Delay} | +%%% {transient, Delay} where Delay >= 0 %%% Shutdown = integer() | infinity | brutal_kill %%% ChildType = supervisor | worker %%% Modules = [atom()] | dynamic @@ -815,10 +881,17 @@ validFunc({M, F, A}) when is_atom(M), is_list(A) -> true; validFunc(Func) -> throw({invalid_mfa, Func}). -validRestartType(permanent) -> true; -validRestartType(temporary) -> true; -validRestartType(transient) -> true; -validRestartType(RestartType) -> throw({invalid_restart_type, RestartType}). +validRestartType(permanent) -> true; +validRestartType(temporary) -> true; +validRestartType(transient) -> true; +validRestartType({permanent, Delay}) -> validDelay(Delay); +validRestartType({transient, Delay}) -> validDelay(Delay); +validRestartType(RestartType) -> throw({invalid_restart_type, + RestartType}). + +validDelay(Delay) when is_number(Delay), + Delay >= 0 -> true; +validDelay(What) -> throw({invalid_delay, What}). validShutdown(Shutdown, _) when is_integer(Shutdown), Shutdown > 0 -> true; diff --git a/src/test_sup.erl b/src/test_sup.erl new file mode 100644 index 00000000..f41793bc --- /dev/null +++ b/src/test_sup.erl @@ -0,0 +1,94 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License at +%% http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +%% License for the specific language governing rights and limitations +%% under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developers of the Original Code are LShift Ltd, +%% Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd, +%% Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd +%% are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial +%% Technologies LLC, and Rabbit Technologies Ltd. +%% +%% Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift +%% Ltd. Portions created by Cohesive Financial Technologies LLC are +%% Copyright (C) 2007-2010 Cohesive Financial Technologies +%% LLC. Portions created by Rabbit Technologies Ltd are Copyright +%% (C) 2007-2010 Rabbit Technologies Ltd. +%% +%% All Rights Reserved. +%% +%% Contributor(s): ______________________________________. +%% + +-module(test_sup). + +-behaviour(supervisor2). + +-export([test_supervisor_delayed_restart/0, + init/1, start_child/0]). + +test_supervisor_delayed_restart() -> + passed = with_sup(simple_one_for_one_terminate, + fun (SupPid) -> + {ok, _ChildPid} = + supervisor2:start_child(SupPid, []), + test_supervisor_delayed_restart(SupPid) + end), + passed = with_sup(one_for_one, fun test_supervisor_delayed_restart/1). + +test_supervisor_delayed_restart(SupPid) -> + ok = ping_child(SupPid), + ok = exit_child(SupPid), + timer:sleep(10), + ok = ping_child(SupPid), + ok = exit_child(SupPid), + timer:sleep(10), + timeout = ping_child(SupPid), + timer:sleep(1010), + ok = ping_child(SupPid), + passed. + +with_sup(RestartStrategy, Fun) -> + {ok, SupPid} = supervisor2:start_link(?MODULE, [RestartStrategy]), + Res = Fun(SupPid), + exit(SupPid, shutdown), + rabbit_misc:unlink_and_capture_exit(SupPid), + Res. + +init([RestartStrategy]) -> + {ok, {{RestartStrategy, 1, 1}, + [{test, {test_sup, start_child, []}, {permanent, 1}, + 16#ffffffff, worker, [test_sup]}]}}. + +start_child() -> + {ok, proc_lib:spawn_link(fun run_child/0)}. + +ping_child(SupPid) -> + Ref = make_ref(), + get_child_pid(SupPid) ! {ping, Ref, self()}, + receive {pong, Ref} -> ok + after 1000 -> timeout + end. + +exit_child(SupPid) -> + true = exit(get_child_pid(SupPid), abnormal), + ok. + +get_child_pid(SupPid) -> + [{_Id, ChildPid, worker, [test_sup]}] = + supervisor2:which_children(SupPid), + ChildPid. + +run_child() -> + receive {ping, Ref, Pid} -> Pid ! {pong, Ref}, + run_child() + end. diff --git a/src/vm_memory_monitor.erl b/src/vm_memory_monitor.erl index cd03fcc6..bbc3a8c0 100644 --- a/src/vm_memory_monitor.erl +++ b/src/vm_memory_monitor.erl @@ -72,8 +72,10 @@ -ifdef(use_specs). --spec(start_link/1 :: (float()) -> - ('ignore' | {'error', any()} | {'ok', pid()})). +-spec(start_link/1 :: + (float()) -> 'ignore' | + rabbit_types:error(any()) | + rabbit_types:ok(pid())). -spec(update/0 :: () -> 'ok'). -spec(get_total_memory/0 :: () -> (non_neg_integer() | 'unknown')). -spec(get_vm_limit/0 :: () -> (non_neg_integer() | 'unknown')). diff --git a/src/worker_pool.erl b/src/worker_pool.erl index 97e07545..01ce3535 100644 --- a/src/worker_pool.erl +++ b/src/worker_pool.erl @@ -52,7 +52,7 @@ -ifdef(use_specs). --spec(start_link/0 :: () -> {'ok', pid()} | 'ignore' | {'error', any()}). +-spec(start_link/0 :: () -> 'ignore' | rabbit_types:ok_or_error2(pid(), any())). -spec(submit/1 :: (fun (() -> A) | {atom(), atom(), [any()]}) -> A). -spec(submit_async/1 :: (fun (() -> any()) | {atom(), atom(), [any()]}) -> 'ok'). diff --git a/src/worker_pool_sup.erl b/src/worker_pool_sup.erl index 4ded63a8..afa21164 100644 --- a/src/worker_pool_sup.erl +++ b/src/worker_pool_sup.erl @@ -41,9 +41,9 @@ -ifdef(use_specs). --spec(start_link/0 :: () -> {'ok', pid()} | 'ignore' | {'error', any()}). --spec(start_link/1 :: - (non_neg_integer()) -> {'ok', pid()} | 'ignore' | {'error', any()}). +-spec(start_link/0 :: () -> 'ignore' | rabbit_types:ok_or_error2(pid(), any())). +-spec(start_link/1 :: (non_neg_integer()) -> + 'ignore' | rabbit_types:ok_or_error2(pid(), any())). -endif. diff --git a/src/worker_pool_worker.erl b/src/worker_pool_worker.erl index 57901fd5..a61e4cc3 100644 --- a/src/worker_pool_worker.erl +++ b/src/worker_pool_worker.erl @@ -44,7 +44,8 @@ -ifdef(use_specs). --spec(start_link/1 :: (any()) -> {'ok', pid()} | 'ignore' | {'error', any()}). +-spec(start_link/1 :: + (any()) -> {'ok', pid()} | 'ignore' | rabbit_types:error(any())). -spec(submit/2 :: (pid(), fun (() -> A) | {atom(), atom(), [any()]}) -> A). -spec(submit_async/2 :: (pid(), fun (() -> any()) | {atom(), atom(), [any()]}) -> 'ok'). |