91 files changed, 8644 insertions, 1597 deletions
diff --git a/.hgignore b/.hgignore
index caaa3ace..03b60914 100644
--- a/.hgignore
+++ b/.hgignore
@@ -10,7 +10,8 @@ syntax: regexp
 ^cover/
 ^dist/
 ^include/rabbit_framing\.hrl$
-^src/rabbit_framing\.erl$
+^include/rabbit_framing_spec\.hrl$
+^src/rabbit_framing_amqp.*\.erl$
 ^src/.*\_usage.erl$
 ^rabbit\.plt$
 ^basic.plt$
diff --git a/Makefile b/Makefile
index ad75bcb9..1767808c 100644
--- a/Makefile
+++ b/Makefile
@@ -12,10 +12,10 @@ EBIN_DIR=ebin
 INCLUDE_DIR=include
 DOCS_DIR=docs
 INCLUDES=$(wildcard $(INCLUDE_DIR)/*.hrl) $(INCLUDE_DIR)/rabbit_framing.hrl
-SOURCES=$(wildcard $(SOURCE_DIR)/*.erl) $(SOURCE_DIR)/rabbit_framing.erl $(USAGES_ERL)
+SOURCES=$(wildcard $(SOURCE_DIR)/*.erl) $(SOURCE_DIR)/rabbit_framing_amqp_0_9_1.erl $(SOURCE_DIR)/rabbit_framing_amqp_0_8.erl $(USAGES_ERL)
 BEAM_TARGETS=$(patsubst $(SOURCE_DIR)/%.erl, $(EBIN_DIR)/%.beam, $(SOURCES))
 TARGETS=$(EBIN_DIR)/rabbit.app $(INCLUDE_DIR)/rabbit_framing.hrl $(BEAM_TARGETS)
-WEB_URL=http://stage.rabbitmq.com/
+WEB_URL=http://www.rabbitmq.com/
 MANPAGES=$(patsubst %.xml, %.gz, $(wildcard $(DOCS_DIR)/*.[0-9].xml))
 WEB_MANPAGES=$(patsubst %.xml, %.man.xml, $(wildcard $(DOCS_DIR)/*.[0-9].xml) $(DOCS_DIR)/rabbitmq-service.xml)
 USAGES_XML=$(DOCS_DIR)/rabbitmqctl.1.xml $(DOCS_DIR)/rabbitmq-multi.1.xml
@@ -40,11 +40,11 @@ BASIC_PLT=basic.plt
 RABBIT_PLT=rabbit.plt
 
 ifndef USE_SPECS
-# our type specs rely on features / bug fixes in dialyzer that are
-# only available in R13B01 upwards (R13B01 is eshell 5.7.2)
+# our type specs rely on features and bug fixes in dialyzer that are
+# only available in R14A upwards (R13B04 is erts 5.7.5)
 #
 # NB: the test assumes that version number will only contain single digits
-USE_SPECS=$(shell if [ $$(erl -noshell -eval 'io:format(erlang:system_info(version)), halt().') \> "5.7.1" ]; then echo "true"; else echo "false"; fi)
+USE_SPECS=$(shell if [ $$(erl -noshell -eval 'io:format(erlang:system_info(version)), halt().') \> "5.7.5" ]; then echo "true"; else echo "false"; fi)
 endif
 
 #other args: +native +"{hipe,[o3,verbose]}" -Ddebug=true +debug_info +no_strict_record_tests
@@ -56,7 +56,8 @@ TARGET_SRC_DIR=dist/$(TARBALL_NAME)
 
 SIBLING_CODEGEN_DIR=../rabbitmq-codegen/
 AMQP_CODEGEN_DIR=$(shell [ -d $(SIBLING_CODEGEN_DIR) ] && echo $(SIBLING_CODEGEN_DIR) || echo codegen)
-AMQP_SPEC_JSON_PATH=$(AMQP_CODEGEN_DIR)/amqp-0.8.json
+AMQP_SPEC_JSON_FILES_0_9_1=$(AMQP_CODEGEN_DIR)/amqp-rabbitmq-0.9.1.json
+AMQP_SPEC_JSON_FILES_0_8=$(AMQP_CODEGEN_DIR)/amqp-rabbitmq-0.8.json
 
 ERL_CALL=erl_call -sname $(RABBITMQ_NODENAME) -e
 
@@ -70,6 +71,24 @@ define usage_dep
   $(call usage_xml_to_erl, $(1)): $(1) $(DOCS_DIR)/usage.xsl
 endef
 
+ifneq "$(SBIN_DIR)" ""
+ifneq "$(TARGET_DIR)" ""
+SCRIPTS_REL_PATH=$(shell ./calculate-relative $(TARGET_DIR)/sbin $(SBIN_DIR))
+endif
+endif
+
+# Versions prior to this are not supported
+NEED_MAKE := 3.80
+ifneq "$(NEED_MAKE)" "$(firstword $(sort $(NEED_MAKE) $(MAKE_VERSION)))"
+$(error Versions of make prior to $(NEED_MAKE) are not supported)
+endif
+
+# .DEFAULT_GOAL introduced in 3.81
+DEFAULT_GOAL_MAKE := 3.81
+ifneq "$(DEFAULT_GOAL_MAKE)" "$(firstword $(sort $(DEFAULT_GOAL_MAKE) $(MAKE_VERSION)))"
+.DEFAULT_GOAL=all
+endif
+
 all: $(TARGETS)
 
 $(DEPS_FILE): $(SOURCES) $(INCLUDES)
@@ -81,11 +100,14 @@ $(EBIN_DIR)/rabbit.app: $(EBIN_DIR)/rabbit_app.in $(BEAM_TARGETS) generate_app
 $(EBIN_DIR)/%.beam:
 	erlc $(ERLC_OPTS) -pa $(EBIN_DIR) $<
 
-$(INCLUDE_DIR)/rabbit_framing.hrl: codegen.py $(AMQP_CODEGEN_DIR)/amqp_codegen.py $(AMQP_SPEC_JSON_PATH)
-	$(PYTHON) codegen.py header $(AMQP_SPEC_JSON_PATH) $@
+$(INCLUDE_DIR)/rabbit_framing.hrl: codegen.py $(AMQP_CODEGEN_DIR)/amqp_codegen.py $(AMQP_SPEC_JSON_FILES_0_9_1) $(AMQP_SPEC_JSON_FILES_0_8)
+	$(PYTHON) codegen.py --ignore-conflicts header $(AMQP_SPEC_JSON_FILES_0_9_1) $(AMQP_SPEC_JSON_FILES_0_8) $@
+
+$(SOURCE_DIR)/rabbit_framing_amqp_0_9_1.erl: codegen.py $(AMQP_CODEGEN_DIR)/amqp_codegen.py $(AMQP_SPEC_JSON_FILES_0_9_1)
+	$(PYTHON) codegen.py body $(AMQP_SPEC_JSON_FILES_0_9_1) $@
 
-$(SOURCE_DIR)/rabbit_framing.erl: codegen.py $(AMQP_CODEGEN_DIR)/amqp_codegen.py $(AMQP_SPEC_JSON_PATH)
-	$(PYTHON) codegen.py body   $(AMQP_SPEC_JSON_PATH) $@
+$(SOURCE_DIR)/rabbit_framing_amqp_0_8.erl: codegen.py $(AMQP_CODEGEN_DIR)/amqp_codegen.py $(AMQP_SPEC_JSON_FILES_0_8)
+	$(PYTHON) codegen.py body $(AMQP_SPEC_JSON_FILES_0_8) $@
 
 dialyze: $(BEAM_TARGETS) $(BASIC_PLT)
 	$(ERL_EBIN) -eval \
@@ -110,7 +132,7 @@ $(BASIC_PLT): $(BEAM_TARGETS)
 clean:
 	rm -f $(EBIN_DIR)/*.beam
 	rm -f $(EBIN_DIR)/rabbit.app $(EBIN_DIR)/rabbit.boot $(EBIN_DIR)/rabbit.script $(EBIN_DIR)/rabbit.rel
-	rm -f $(INCLUDE_DIR)/rabbit_framing.hrl $(SOURCE_DIR)/rabbit_framing.erl codegen.pyc
+	rm -f $(INCLUDE_DIR)/rabbit_framing.hrl $(SOURCE_DIR)/rabbit_framing_amqp_*.erl codegen.pyc
 	rm -f $(DOCS_DIR)/*.[0-9].gz $(DOCS_DIR)/*.man.xml $(DOCS_DIR)/*.erl $(USAGES_ERL)
 	rm -f $(RABBIT_PLT)
 	rm -f $(DEPS_FILE)
@@ -184,7 +206,7 @@ srcdist: distclean
 		>> $(TARGET_SRC_DIR)/INSTALL
 	cp README.in $(TARGET_SRC_DIR)/README
 	elinks -dump -no-references -no-numbering $(WEB_URL)build-server.html \
-		>> $(TARGET_SRC_DIR)/BUILD
+		>> $(TARGET_SRC_DIR)/README
 	sed -i.save 's/%%VSN%%/$(VERSION)/' $(TARGET_SRC_DIR)/ebin/rabbit_app.in && rm -f $(TARGET_SRC_DIR)/ebin/rabbit_app.in.save
 
 	cp -r $(AMQP_CODEGEN_DIR)/* $(TARGET_SRC_DIR)/codegen/
@@ -205,9 +227,10 @@ distclean: clean
 
 # xmlto can not read from standard input, so we mess with a tmp file.
 %.gz: %.xml $(DOCS_DIR)/examples-to-end.xsl
-	xsltproc $(DOCS_DIR)/examples-to-end.xsl $< > $<.tmp && \
-	xmlto man -o $(DOCS_DIR) --stringparam man.indent.verbatims=0 $<.tmp && \
-	gzip -f $(DOCS_DIR)/`basename $< .xml`
+	xmlto --version | grep -E '^xmlto version 0\.0\.([0-9]|1[1-8])$$' >/dev/null || opt='--stringparam man.indent.verbatims=0' ; \
+	    xsltproc $(DOCS_DIR)/examples-to-end.xsl $< > $<.tmp && \
+	    xmlto man -o $(DOCS_DIR) $$opt $<.tmp && \
+	    gzip -f $(DOCS_DIR)/`basename $< .xml`
 	rm -f $<.tmp
 
 # Use tmp files rather than a pipeline so that we get meaningful errors
@@ -234,13 +257,7 @@ $(SOURCE_DIR)/%_usage.erl:
 
 docs_all: $(MANPAGES) $(WEB_MANPAGES)
 
-install: SCRIPTS_REL_PATH=$(shell ./calculate-relative $(TARGET_DIR)/sbin $(SBIN_DIR))
 install: all docs_all install_dirs
-	@[ -n "$(TARGET_DIR)" ] || (echo "Please set TARGET_DIR."; false)
-	@[ -n "$(SBIN_DIR)" ] || (echo "Please set SBIN_DIR."; false)
-	@[ -n "$(MAN_DIR)" ] || (echo "Please set MAN_DIR."; false)
-
-	mkdir -p $(TARGET_DIR)
 	cp -r ebin include LICENSE LICENSE-MPL-RabbitMQ INSTALL $(TARGET_DIR)
 
 	chmod 0755 scripts/*
@@ -256,10 +273,16 @@ install: all docs_all install_dirs
 	done
 
 install_dirs:
-	mkdir -p $(SBIN_DIR)
+	@ OK=true && \
+	  { [ -n "$(TARGET_DIR)" ] || { echo "Please set TARGET_DIR."; OK=false; }; } && \
+	  { [ -n "$(SBIN_DIR)" ] || { echo "Please set SBIN_DIR."; OK=false; }; } && \
+	  { [ -n "$(MAN_DIR)" ] || { echo "Please set MAN_DIR."; OK=false; }; } && $$OK
+
 	mkdir -p $(TARGET_DIR)/sbin
+	mkdir -p $(SBIN_DIR)
+	mkdir -p $(MAN_DIR)
 
-$(foreach XML, $(USAGES_XML), $(eval $(call usage_dep, $(XML))))
+$(foreach XML,$(USAGES_XML),$(eval $(call usage_dep, $(XML))))
 
 # Note that all targets which depend on clean must have clean in their
 # name.  Also any target that doesn't depend on clean should not have
diff --git a/codegen.py b/codegen.py
index 91c70e81..230d785e 100644
--- a/codegen.py
+++ b/codegen.py
@@ -93,6 +93,27 @@ class PackedMethodBitField:
     def full(self):
         return self.count() == 8
 
+def multiLineFormat(things, prologue, separator, lineSeparator, epilogue, thingsPerLine = 4):
+    r = [prologue]
+    i = 0
+    for t in things:
+        if i != 0:
+            if i % thingsPerLine == 0:
+                r += [lineSeparator]
+            else:
+                r += [separator]
+        r += [t]
+        i += 1
+    r += [epilogue]
+    return "".join(r)
+
+def prettyType(typeName, subTypes, typesPerLine = 4):
+    """Pretty print a type signature made up of many alternative subtypes"""
+    sTs = multiLineFormat(subTypes,
+                          "( ", " | ", "\n       | ", " )",
+                          thingsPerLine = typesPerLine)
+    return "-type(%s ::\n       %s)." % (typeName, sTs)
+
 def printFileHeader():
     print """%%   Autogenerated code. Do not edit.
 %%
@@ -294,11 +315,16 @@ def genErl(spec):
     methods = spec.allMethods()
 
     printFileHeader()
-    print """-module(rabbit_framing).
--include("rabbit_framing.hrl").
-
+    module = "rabbit_framing_amqp_%d_%d" % (spec.major, spec.minor)
+    if spec.revision != 0:
+        module = "%s_%d" % (module, spec.revision)
+    if module == "rabbit_framing_amqp_8_0":
+        module = "rabbit_framing_amqp_0_8"
+    print "-module(%s)." % module
+    print """-include("rabbit_framing.hrl").
+
+-export([version/0]).
 -export([lookup_method_name/1]).
-
 -export([method_id/1]).
 -export([method_has_content/1]).
 -export([is_method_synchronous/1]).
@@ -311,10 +337,92 @@ def genErl(spec):
 -export([lookup_amqp_exception/1]).
 -export([amqp_exception/1]).
 
+"""
+    print "%% Various types"
+    print "-ifdef(use_specs)."
+
+    print """-export_type([amqp_table/0, amqp_property_type/0, amqp_method_record/0,
+              amqp_method_name/0, amqp_method/0, amqp_class_id/0,
+              amqp_value/0, amqp_array/0, amqp_exception/0, amqp_property_record/0]).
+
+-type(amqp_field_type() ::
+      'longstr' | 'signedint' | 'decimal' | 'timestamp' |
+      'table' | 'byte' | 'double' | 'float' | 'long' |
+      'short' | 'bool' | 'binary' | 'void').
+-type(amqp_property_type() ::
+      'shortstr' | 'longstr' | 'octet' | 'shortint' | 'longint' |
+      'longlongint' | 'timestamp' | 'bit' | 'table').
+
+-type(amqp_table() :: [{binary(), amqp_field_type(), amqp_value()}]).
+-type(amqp_array() :: [{amqp_field_type(), amqp_value()}]).
+-type(amqp_value() :: binary() |    % longstr
+                      integer() |   % signedint
+                      {non_neg_integer(), non_neg_integer()} | % decimal
+                      amqp_table() |
+                      amqp_array() |
+                      byte() |      % byte
+                      float() |     % double
+                      integer() |   % long
+                      integer() |   % short
+                      boolean() |   % bool
+                      binary() |    % binary
+                      'undefined' | % void
+                      non_neg_integer() % timestamp
+     ).
+"""
+
+    print prettyType("amqp_method_name()",
+                     [m.erlangName() for m in methods])
+    print prettyType("amqp_method()",
+                     ["{%s, %s}" % (m.klass.index, m.index) for m in methods],
+                     6)
+    print prettyType("amqp_method_record()",
+                     ["#%s{}" % (m.erlangName()) for m in methods])
+    fieldNames = set()
+    for m in methods:
+        fieldNames.update(m.arguments)
+    fieldNames = [erlangize(f.name) for f in fieldNames]
+    print prettyType("amqp_method_field_name()",
+                     fieldNames)
+    print prettyType("amqp_property_record()",
+                     ["#'P_%s'{}" % erlangize(c.name) for c in spec.allClasses()])
+    print prettyType("amqp_exception()",
+                     ["'%s'" % erlangConstantName(c).lower() for (c, v, cls) in spec.constants])
+    print prettyType("amqp_exception_code()",
+                     ["%i" % v for (c, v, cls) in spec.constants])
+    classIds = set()
+    for m in spec.allMethods():
+        classIds.add(m.klass.index)
+    print prettyType("amqp_class_id()",
+                     ["%i" % ci for ci in classIds])
+    print "-endif. % use_specs"
+
+    print """
+%% Method signatures
+-ifdef(use_specs).
+-spec(version/0 :: () -> {non_neg_integer(), non_neg_integer(), non_neg_integer()}).
+-spec(lookup_method_name/1 :: (amqp_method()) -> amqp_method_name()).
+-spec(method_id/1 :: (amqp_method_name()) -> amqp_method()).
+-spec(method_has_content/1 :: (amqp_method_name()) -> boolean()).
+-spec(is_method_synchronous/1 :: (amqp_method_record()) -> boolean()).
+-spec(method_record/1 :: (amqp_method_name()) -> amqp_method_record()).
+-spec(method_fieldnames/1 :: (amqp_method_name()) -> [amqp_method_field_name()]).
+-spec(decode_method_fields/2 :: (amqp_method_name(), binary()) -> amqp_method_record()).
+-spec(decode_properties/2 :: (non_neg_integer(), binary()) -> amqp_property_record()).
+-spec(encode_method_fields/1 :: (amqp_method_record()) -> binary()).
+-spec(encode_properties/1 :: (amqp_method_record()) -> binary()).
+-spec(lookup_amqp_exception/1 :: (amqp_exception()) -> {boolean(), amqp_exception_code(), binary()}).
+-spec(amqp_exception/1 :: (amqp_exception_code()) -> amqp_exception()).
+-endif. % use_specs
+
 bitvalue(true) -> 1;
 bitvalue(false) -> 0;
 bitvalue(undefined) -> 0.
 """
+    version = "{%d, %d, %d}" % (spec.major, spec.minor, spec.revision)
+    if version == '{8, 0, 0}': version = '{0, 8, 0}'
+    print "version() -> %s." % (version)
+
     for m in methods: genLookupMethodName(m)
     print "lookup_method_name({_ClassId, _MethodId} = Id) -> exit({unknown_method_id, Id})."
 
@@ -373,8 +481,6 @@ def genHrl(spec):
     methods = spec.allMethods()
 
     printFileHeader()
-    print "-define(PROTOCOL_VERSION_MAJOR, %d)." % (spec.major)
-    print "-define(PROTOCOL_VERSION_MINOR, %d)." % (spec.minor)
     print "-define(PROTOCOL_PORT, %d)." % (spec.port)
 
     for (c,v,cls) in spec.constants:
@@ -388,6 +494,7 @@ def genHrl(spec):
     for c in spec.allClasses():
         print "-record('P_%s', {%s})." % (erlangize(c.name), fieldNameList(c.fields))
 
+
 def generateErl(specPath):
     genErl(AmqpSpec(specPath))
 
@@ -395,5 +502,6 @@ def generateHrl(specPath):
     genHrl(AmqpSpec(specPath))
 
 if __name__ == "__main__":
-    do_main(generateHrl, generateErl)
+    do_main_dict({"header": generateHrl,
+                  "body": generateErl})
 
diff --git a/docs/html-to-website-xml.xsl b/docs/html-to-website-xml.xsl
index f2117e26..662dbea0 100644
--- a/docs/html-to-website-xml.xsl
+++ b/docs/html-to-website-xml.xsl
@@ -58,13 +58,13 @@
 <!-- Specific instructions to revert the DocBook HTML to be more like our ad-hoc XML schema -->
 
 <xsl:template match="div[@class='refsect1'] | div[@class='refnamediv'] | div[@class='refsynopsisdiv']">
-  <doc:section name="{@title}">
+  <doc:section name="{h2}">
     <xsl:apply-templates select="node()"/>
   </doc:section>
 </xsl:template>
 
 <xsl:template match="div[@class='refsect2']">
-  <doc:subsection name="{@title}">
+  <doc:subsection name="{h3}">
     <xsl:apply-templates select="node()"/>
   </doc:subsection>
 </xsl:template>
diff --git a/docs/rabbitmqctl.1.xml b/docs/rabbitmqctl.1.xml
index 5e2668c1..a7d064f1 100644
--- a/docs/rabbitmqctl.1.xml
+++ b/docs/rabbitmqctl.1.xml
@@ -270,8 +270,8 @@
       <title>Cluster management</title>
 
       <variablelist>
-        <varlistentry>
-          <term><cmdsynopsis><command>cluster</command> <arg choice="req"><replaceable>clusternode</replaceable></arg></cmdsynopsis></term>
+        <varlistentry id="cluster">
+          <term><cmdsynopsis><command>cluster</command><arg choice="req" role="usage-option-list"><replaceable>clusternode</replaceable> ...</arg></cmdsynopsis></term>
           <listitem>
             <variablelist>
               <varlistentry>
@@ -281,7 +281,8 @@
             </variablelist>
             <para>
               Instruct the node to become member of a cluster with the
-              specified nodes.
+              specified nodes.  To cluster with currently offline nodes,
+              use <link linkend="force_cluster"><command>force_cluster</command></link>.
             </para>
             <para>
               Cluster nodes can be of two types: disk or ram. Disk nodes
@@ -334,6 +335,29 @@
             </para>
           </listitem>
         </varlistentry>
+        <varlistentry id="force_cluster">
+          <term><cmdsynopsis><command>force_cluster</command><arg choice="req" role="usage-option-list"><replaceable>clusternode</replaceable> ...</arg></cmdsynopsis></term>
+          <listitem>
+            <variablelist>
+              <varlistentry>
+                <term>clusternode</term>
+                <listitem><para>Subset of the nodes of the cluster to which this node should be connected.</para></listitem>
+              </varlistentry>
+            </variablelist>
+            <para>
+              Instruct the node to become member of a cluster with the
+              specified nodes.  This will succeed even if the specified nodes
+              are offline.  For a more detailed description, see
+              <link linkend="cluster"><command>cluster</command>.</link>
+            </para>
+            <para>
+              Note that this variant of the cluster command just
+              ignores the current status of the specified nodes.
+              Clustering may still fail for a variety of other
+              reasons.
+            </para>
+          </listitem>
+        </varlistentry>
       </variablelist>
     </refsect2>
 
@@ -603,10 +627,12 @@
             <para role="example-prefix">For example:</para>
             <screen role="example">rabbitmqctl list_permissions -p /myvhost</screen>
             <para role="example">
-              This command instructs the RabbitMQ broker to list all the
-              users which have been granted access to the virtual host
-              called <command>/myvhost</command>, and the permissions they
-              have for operations on resources in that virtual host.
+              This command instructs the RabbitMQ broker to list all
+              the users which have been granted access to the virtual
+              host called <command>/myvhost</command>, and the
+              permissions they have for operations on resources in
+              that virtual host.  Note that an empty string means no
+              permissions granted.
             </para>
           </listitem>
         </varlistentry>
@@ -862,6 +888,10 @@
                 <listitem><para>Number of channels using the connection.</para></listitem>
               </varlistentry>
               <varlistentry>
+                <term>protocol</term>
+                <listitem><para>Version of the AMQP protocol in use (currently one of <command>{0,9,1}</command> or <command>{0,8,0}</command>). Note that if a client requests an AMQP 0-9 connection, we treat it as AMQP 0-9-1.</para></listitem>
+              </varlistentry>
+              <varlistentry>
                 <term>user</term>
                 <listitem><para>Username associated with the connection.</para></listitem>
               </varlistentry>
diff --git a/ebin/rabbit_app.in b/ebin/rabbit_app.in
index bdf407eb..2cd28abb 100644
--- a/ebin/rabbit_app.in
+++ b/ebin/rabbit_app.in
@@ -11,16 +11,19 @@
                 rabbit_sup,
                 rabbit_tcp_client_sup]},
   {applications, [kernel, stdlib, sasl, mnesia, os_mon]},
-%% we also depend on ssl but it shouldn't be in here as we don't
-%% actually want to start it
+%% we also depend on crypto, public_key and ssl but they shouldn't be
+%% in here as we don't actually want to start it
   {mod, {rabbit, []}},
   {env, [{tcp_listeners, [{"0.0.0.0", 5672}]},
          {ssl_listeners, []},
          {ssl_options, []},
          {vm_memory_high_watermark, 0.4},
-         {backing_queue_module, rabbit_invariable_queue},
+         {msg_store_index_module, rabbit_msg_store_ets_index},
+         {backing_queue_module, rabbit_variable_queue},
          {persister_max_wrap_entries, 500},
          {persister_hibernate_after, 10000},
+         {msg_store_file_size_limit, 16777216},
+         {queue_index_max_journal_entries, 262144},
          {default_user, <<"guest">>},
          {default_pass, <<"guest">>},
          {default_vhost, <<"/">>},
diff --git a/include/rabbit.hrl b/include/rabbit.hrl
index 145f6104..6364d60f 100644
--- a/include/rabbit.hrl
+++ b/include/rabbit.hrl
@@ -36,7 +36,8 @@
 
 -record(vhost, {virtual_host, dummy}).
 
--record(connection, {user, timeout_sec, frame_max, vhost, client_properties}).
+-record(connection, {protocol, user, timeout_sec, frame_max, vhost,
+                     client_properties}).
 
 -record(content,
         {class_id,
@@ -44,6 +45,7 @@
          properties_bin, %% either 'none', or an encoded properties binary
          %% Note: at most one of properties and properties_bin can be
          %% 'none' at once.
+         protocol, %% The protocol under which properties_bin was encoded
          payload_fragments_rev %% list of binaries, in reverse order (!)
          }).
 
@@ -51,7 +53,8 @@
 
 -record(exchange, {name, type, durable, auto_delete, arguments}).
 
--record(amqqueue, {name, durable, auto_delete, arguments, pid}).
+-record(amqqueue, {name, durable, auto_delete, exclusive_owner = none,
+                   arguments, pid}).
 
 %% mnesia doesn't like unary records, so we add a dummy 'value' field
 -record(route, {binding, value = const}).
@@ -67,115 +70,13 @@
 
 -record(ssl_socket, {tcp, ssl}).
 -record(delivery, {mandatory, immediate, txn, sender, message}).
-
 -record(amqp_error, {name, explanation, method = none}).
 
 %%----------------------------------------------------------------------------
 
--ifdef(use_specs).
-
--include("rabbit_framing_spec.hrl").
-
--type(maybe(T) :: T | 'none').
--type(erlang_node() :: atom()).
--type(ssl_socket() :: #ssl_socket{}).
--type(socket() :: port() | ssl_socket()).
--type(thunk(T) :: fun(() -> T)).
--type(info_key() :: atom()).
--type(info() :: {info_key(), any()}).
--type(regexp() :: binary()).
--type(file_path() :: string()).
-
-%% this is really an abstract type, but dialyzer does not support them
--type(guid() :: binary()).
--type(txn() :: guid()).
--type(pkey() :: guid()).
--type(r(Kind) ::
-      #resource{virtual_host :: vhost(),
-                kind         :: Kind,
-                name         :: resource_name()}).
--type(queue_name() :: r('queue')).
--type(exchange_name() :: r('exchange')).
--type(user() ::
-      #user{username :: username(),
-            password :: password()}).
--type(permission() ::
-      #permission{configure :: regexp(),
-                  write     :: regexp(),
-                  read      :: regexp()}).
--type(amqqueue() ::
-      #amqqueue{name          :: queue_name(),
-                durable       :: boolean(),
-                auto_delete   :: boolean(),
-                arguments     :: amqp_table(),
-                pid           :: maybe(pid())}).
--type(exchange() ::
-      #exchange{name        :: exchange_name(),
-                type        :: exchange_type(),
-                durable     :: boolean(),
-                auto_delete :: boolean(),
-                arguments   :: amqp_table()}).
--type(binding() ::
-      #binding{exchange_name    :: exchange_name(),
-               queue_name       :: queue_name(),
-               key              :: binding_key()}).
-%% TODO: make this more precise by tying specific class_ids to
-%% specific properties
--type(undecoded_content() ::
-      #content{class_id              :: amqp_class_id(),
-               properties            :: 'none',
-               properties_bin        :: binary(),
-               payload_fragments_rev :: [binary()]} |
-      #content{class_id              :: amqp_class_id(),
-               properties            :: amqp_properties(),
-               properties_bin        :: 'none',
-               payload_fragments_rev :: [binary()]}).
--type(unencoded_content() :: undecoded_content()).
--type(decoded_content() ::
-      #content{class_id              :: amqp_class_id(),
-               properties            :: amqp_properties(),
-               properties_bin        :: maybe(binary()),
-               payload_fragments_rev :: [binary()]}).
--type(encoded_content() ::
-      #content{class_id              :: amqp_class_id(),
-               properties            :: maybe(amqp_properties()),
-               properties_bin        :: binary(),
-               payload_fragments_rev :: [binary()]}).
--type(content() :: undecoded_content() | decoded_content()).
--type(basic_message() ::
-      #basic_message{exchange_name  :: exchange_name(),
-                     routing_key    :: routing_key(),
-                     content        :: content(),
-                     guid           :: guid(),
-                     is_persistent  :: boolean()}).
--type(message() :: basic_message()).
--type(delivery() ::
-      #delivery{mandatory :: boolean(),
-                immediate :: boolean(),
-                txn       :: maybe(txn()),
-                sender    :: pid(),
-                message   :: message()}).
-%% this really should be an abstract type
--type(msg_id() :: non_neg_integer()).
--type(qmsg() :: {queue_name(), pid(), msg_id(), boolean(), message()}).
--type(listener() ::
-      #listener{node     :: erlang_node(),
-                protocol :: atom(),
-                host     :: string() | atom(),
-                port     :: non_neg_integer()}).
--type(not_found() :: {'error', 'not_found'}).
--type(routing_result() :: 'routed' | 'unroutable' | 'not_delivered').
--type(amqp_error() ::
-      #amqp_error{name        :: atom(),
-                  explanation :: string(),
-                  method      :: atom()}).
-
--endif.
-
-%%----------------------------------------------------------------------------
-
 -define(COPYRIGHT_MESSAGE, "Copyright (C) 2007-2010 LShift Ltd., Cohesive Financial Technologies LLC., and Rabbit Technologies Ltd.").
 -define(INFORMATION_MESSAGE, "Licensed under the MPL.  See http://www.rabbitmq.com/").
+-define(PROTOCOL_VERSION, "AMQP 0-9-1 / 0-9 / 0-8").
 -define(ERTS_MINIMUM, "5.6.3").
 
 -define(MAX_WAIT, 16#ffffffff).
diff --git a/include/rabbit_backing_queue_spec.hrl b/include/rabbit_backing_queue_spec.hrl
index 1b536dfa..005994f0 100644
--- a/include/rabbit_backing_queue_spec.hrl
+++ b/include/rabbit_backing_queue_spec.hrl
@@ -18,11 +18,11 @@
 %%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
 %%   Technologies LLC, and Rabbit Technologies Ltd.
 %%
-%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift
 %%   Ltd. Portions created by Cohesive Financial Technologies LLC are
-%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
+%%   Copyright (C) 2007-2010 Cohesive Financial Technologies
 %%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
-%%   (C) 2007-2009 Rabbit Technologies Ltd.
+%%   (C) 2007-2010 Rabbit Technologies Ltd.
 %%
 %%   All Rights Reserved.
 %%
@@ -31,33 +31,34 @@
 
 -type(fetch_result() ::
                  %% Message,  IsDelivered,  AckTag,  Remaining_Len
-        ('empty'|{basic_message(), boolean(), ack(), non_neg_integer()})).
+        ('empty'|{rabbit_types:basic_message(), boolean(), ack(), non_neg_integer()})).
 -type(is_durable() :: boolean()).
 -type(attempt_recovery() :: boolean()).
 -type(purged_msg_count() :: non_neg_integer()).
 -type(ack_required() :: boolean()).
 
--spec(start/1 :: ([queue_name()]) -> 'ok').
--spec(init/3 :: (queue_name(), is_durable(), attempt_recovery()) -> state()).
+-spec(start/1 :: ([rabbit_amqqueue:name()]) -> 'ok').
+-spec(stop/0 :: () -> 'ok').
+-spec(init/3 :: (rabbit_amqqueue:name(), is_durable(), attempt_recovery()) -> state()).
 -spec(terminate/1 :: (state()) -> state()).
 -spec(delete_and_terminate/1 :: (state()) -> state()).
 -spec(purge/1 :: (state()) -> {purged_msg_count(), state()}).
--spec(publish/2 :: (basic_message(), state()) -> state()).
+-spec(publish/2 :: (rabbit_types:basic_message(), state()) -> state()).
 -spec(publish_delivered/3 ::
-        (ack_required(), basic_message(), state()) -> {ack(), state()}).
+        (ack_required(), rabbit_types:basic_message(), state()) -> {ack(), state()}).
 -spec(fetch/2 :: (ack_required(), state()) -> {fetch_result(), state()}).
 -spec(ack/2 :: ([ack()], state()) -> state()).
--spec(tx_publish/3 :: (txn(), basic_message(), state()) -> state()).
--spec(tx_ack/3 :: (txn(), [ack()], state()) -> state()).
--spec(tx_rollback/2 :: (txn(), state()) -> {[ack()], state()}).
--spec(tx_commit/3 :: (txn(), fun (() -> any()), state()) -> {[ack()], state()}).
+-spec(tx_publish/3 :: (rabbit_types:txn(), rabbit_types:basic_message(), state()) -> state()).
+-spec(tx_ack/3 :: (rabbit_types:txn(), [ack()], state()) -> state()).
+-spec(tx_rollback/2 :: (rabbit_types:txn(), state()) -> {[ack()], state()}).
+-spec(tx_commit/3 :: (rabbit_types:txn(), fun (() -> any()), state()) -> {[ack()], state()}).
 -spec(requeue/2 :: ([ack()], state()) -> state()).
 -spec(len/1 :: (state()) -> non_neg_integer()).
 -spec(is_empty/1 :: (state()) -> boolean()).
 -spec(set_ram_duration_target/2 ::
       (('undefined' | 'infinity' | number()), state()) -> state()).
 -spec(ram_duration/1 :: (state()) -> {number(), state()}).
--spec(needs_sync/1 :: (state()) -> boolean()).
--spec(sync/1 :: (state()) -> state()).
+-spec(needs_idle_timeout/1 :: (state()) -> boolean()).
+-spec(idle_timeout/1 :: (state()) -> state()).
 -spec(handle_pre_hibernate/1 :: (state()) -> state()).
 -spec(status/1 :: (state()) -> [{atom(), any()}]).
diff --git a/include/rabbit_exchange_type_spec.hrl b/include/rabbit_exchange_type_spec.hrl
index 9864f1eb..f05bcb84 100644
--- a/include/rabbit_exchange_type_spec.hrl
+++ b/include/rabbit_exchange_type_spec.hrl
@@ -18,11 +18,11 @@
 %%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
 %%   Technologies LLC, and Rabbit Technologies Ltd.
 %%
-%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift
 %%   Ltd. Portions created by Cohesive Financial Technologies LLC are
-%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
+%%   Copyright (C) 2007-2010 Cohesive Financial Technologies
 %%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
-%%   (C) 2007-2009 Rabbit Technologies Ltd.
+%%   (C) 2007-2010 Rabbit Technologies Ltd.
 %%
 %%   All Rights Reserved.
 %%
@@ -31,12 +31,19 @@
 -ifdef(use_specs).
 
 -spec(description/0 :: () -> [{atom(), any()}]).
--spec(publish/2 :: (exchange(), delivery()) -> {routing_result(), [pid()]}).
--spec(validate/1 :: (exchange()) -> 'ok').
--spec(create/1 :: (exchange()) -> 'ok').
--spec(recover/2 :: (exchange(), list(binding())) -> 'ok').
--spec(delete/2 :: (exchange(), list(binding())) -> 'ok').
--spec(add_binding/2 :: (exchange(), binding()) -> 'ok').
--spec(remove_bindings/2 :: (exchange(), list(binding())) -> 'ok').
+-spec(publish/2 :: (rabbit_types:exchange(), rabbit_types:delivery())
+                   -> {rabbit_router:routing_result(), [pid()]}).
+-spec(validate/1 :: (rabbit_types:exchange()) -> 'ok').
+-spec(create/1 :: (rabbit_types:exchange()) -> 'ok').
+-spec(recover/2 :: (rabbit_types:exchange(),
+                    [rabbit_types:binding()]) -> 'ok').
+-spec(delete/2 :: (rabbit_types:exchange(),
+                   [rabbit_types:binding()]) -> 'ok').
+-spec(add_binding/2 :: (rabbit_types:exchange(),
+                        rabbit_types:binding()) -> 'ok').
+-spec(remove_bindings/2 :: (rabbit_types:exchange(),
+                            [rabbit_types:binding()]) -> 'ok').
+-spec(assert_args_equivalence/2 :: (rabbit_types:exchange(),
+                                    rabbit_framing:amqp_table()) -> 'ok').
 
 -endif.
diff --git a/include/rabbit_framing_spec.hrl b/include/rabbit_msg_store.hrl
index 1a979899..d96fa758 100644
--- a/include/rabbit_framing_spec.hrl
+++ b/include/rabbit_msg_store.hrl
@@ -29,32 +29,13 @@
 %%   Contributor(s): ______________________________________.
 %%
 
-%% TODO: much of this should be generated
+-include("rabbit.hrl").
 
--type(amqp_field_type() ::
-      'longstr' | 'signedint' | 'decimal' | 'timestamp' |
-      'table' | 'byte' | 'double' | 'float' | 'long' |
-      'short' | 'bool' | 'binary' | 'void').
--type(amqp_property_type() ::
-      'shortstr' | 'longstr' | 'octet' | 'shortint' | 'longint' |
-      'longlongint' | 'timestamp' | 'bit' | 'table').
-%% we could make this more precise but ultimately are limited by
-%% dialyzer's lack of support for recursive types
--type(amqp_table() :: [{binary(), amqp_field_type(), any()}]).
-%% TODO: make this more precise
--type(amqp_class_id() :: non_neg_integer()).
-%% TODO: make this more precise
--type(amqp_properties() :: tuple()).
-%% TODO: make this more precise
--type(amqp_method() :: tuple()).
-%% TODO: make this more precise
--type(amqp_method_name() :: atom()).
--type(channel_number() :: non_neg_integer()).
--type(resource_name() :: binary()).
--type(routing_key() :: binary()).
--type(username() :: binary()).
--type(password() :: binary()).
--type(vhost() :: binary()).
--type(ctag() :: binary()).
--type(exchange_type() :: atom()).
--type(binding_key() :: binary()).
+-ifdef(use_specs).
+
+-type(msg() :: any()).
+
+-endif.
+
+-record(msg_location,
+        {guid, ref_count, file, offset, total_size}).
diff --git a/include/rabbit_msg_store_index.hrl b/include/rabbit_msg_store_index.hrl
new file mode 100644
index 00000000..fba0b7cd
--- /dev/null
+++ b/include/rabbit_msg_store_index.hrl
@@ -0,0 +1,59 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2010 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2010 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-include("rabbit_msg_store.hrl").
+
+%%----------------------------------------------------------------------------
+
+-ifdef(use_specs).
+
+-type(dir() :: any()).
+-type(index_state() :: any()).
+-type(keyvalue() :: any()).
+-type(fieldpos() :: non_neg_integer()).
+-type(fieldvalue() :: any()).
+
+-spec(new/1 :: (dir()) -> index_state()).
+-spec(recover/1 :: (dir()) -> rabbit_types:ok_or_error2(index_state(), any())).
+-spec(lookup/2 ::
+        (rabbit_guid:guid(), index_state()) -> ('not_found' | keyvalue())).
+-spec(insert/2 :: (keyvalue(), index_state()) -> 'ok').
+-spec(update/2 :: (keyvalue(), index_state()) -> 'ok').
+-spec(update_fields/3 :: (rabbit_guid:guid(), ({fieldpos(), fieldvalue()} |
+                                               [{fieldpos(), fieldvalue()}]),
+                          index_state()) -> 'ok').
+-spec(delete/2 :: (rabbit_guid:guid(), index_state()) -> 'ok').
+-spec(delete_by_file/2 :: (fieldvalue(), index_state()) -> 'ok').
+-spec(terminate/1 :: (index_state()) -> any()).
+
+-endif.
+
+%%----------------------------------------------------------------------------
diff --git a/packaging/RPMS/Fedora/rabbitmq-server.spec b/packaging/RPMS/Fedora/rabbitmq-server.spec
index 4a7240e0..bf03c6ac 100644
--- a/packaging/RPMS/Fedora/rabbitmq-server.spec
+++ b/packaging/RPMS/Fedora/rabbitmq-server.spec
@@ -105,6 +105,12 @@ if [ $1 = 0 ]; then
   # Leave rabbitmq user and group
 fi
 
+# Clean out plugin activation state, both on uninstall and upgrade
+rm -rf %{_rabbit_erllibdir}/priv
+for ext in rel script boot ; do
+    rm -f %{_rabbit_erllibdir}/ebin/rabbit.$ext
+done
+
 %files -f ../%{name}.files
 %defattr(-,root,root,-)
 %attr(0750, rabbitmq, rabbitmq) %dir %{_localstatedir}/lib/rabbitmq
@@ -120,6 +126,12 @@ fi
 rm -rf %{buildroot}
 
 %changelog
+* Wed Jul 14 2010 Emile Joubert <emile@rabbitmq.com> 1.8.1-1
+- New Upstream Release
+
+* Tue Jun 15 2010 Matthew Sackman <matthew@rabbitmq.com> 1.8.0-1
+- New Upstream Release
+
 * Mon Feb 15 2010 Matthew Sackman <matthew@lshift.net> 1.7.2-1
 - New Upstream Release
 
diff --git a/packaging/common/rabbitmq-server.ocf b/packaging/common/rabbitmq-server.ocf
index 97c58ea2..b969535a 100755
--- a/packaging/common/rabbitmq-server.ocf
+++ b/packaging/common/rabbitmq-server.ocf
@@ -35,21 +35,21 @@
 ##
 
 ## OCF instance parameters
-##	OCF_RESKEY_multi
-##	OCF_RESKEY_ctl
-##	OCF_RESKEY_nodename
-##	OCF_RESKEY_ip
-##	OCF_RESKEY_port
-##	OCF_RESKEY_cluster_config_file
-##	OCF_RESKEY_config_file
-##	OCF_RESKEY_log_base
-##	OCF_RESKEY_mnesia_base
-##	OCF_RESKEY_server_start_args
+##   OCF_RESKEY_multi
+##   OCF_RESKEY_ctl
+##   OCF_RESKEY_nodename
+##   OCF_RESKEY_ip
+##   OCF_RESKEY_port
+##   OCF_RESKEY_config_file
+##   OCF_RESKEY_log_base
+##   OCF_RESKEY_mnesia_base
+##   OCF_RESKEY_server_start_args
 
 #######################################################################
 # Initialization:
 
-. ${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/resource.d/heartbeat}
+. ${OCF_FUNCTIONS_DIR}/.ocf-shellfuncs
 
 #######################################################################
 
@@ -63,7 +63,7 @@ OCF_RESKEY_log_base_default="/var/log/rabbitmq"
 : ${OCF_RESKEY_log_base=${OCF_RESKEY_log_base_default}}
 
 meta_data() {
-	cat <<END
+    cat <<END
 <?xml version="1.0"?>
 <!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
 <resource-agent name="rabbitmq-server">
@@ -113,15 +113,7 @@ The IP address for rabbitmq-server to listen on
 The IP Port for rabbitmq-server to listen on
 </longdesc>
 <shortdesc lang="en">IP Port</shortdesc>
-<content type="string" default="" />
-</parameter>
-
-<parameter name="cluster_config_file" unique="0" required="0">
-<longdesc lang="en">
-Location of the cluster config file
-</longdesc>
-<shortdesc lang="en">Cluster config file path</shortdesc>
-<content type="string" default="" />
+<content type="integer" default="" />
 </parameter>
 
 <parameter name="config_file" unique="0" required="0">
@@ -161,7 +153,8 @@ Additional arguments provided to the server on startup
 <actions>
 <action name="start"        timeout="600" />
 <action name="stop"         timeout="120" />
-<action name="monitor"      timeout="20" interval="10" depth="0" start-delay="0" />
+<action name="status"       timeout="20" interval="10" />
+<action name="monitor"      timeout="20" interval="10" />
 <action name="validate-all" timeout="30" />
 <action name="meta-data"    timeout="5" />
 </actions>
@@ -170,8 +163,8 @@ END
 }
 
 rabbit_usage() {
-	cat <<END
-usage: $0 {start|stop|monitor|validate-all|meta-data}
+    cat <<END
+usage: $0 {start|stop|status|monitor|validate-all|meta-data}
 
 Expects to have a fully populated OCF RA-compliant environment set.
 END
@@ -182,7 +175,6 @@ RABBITMQ_CTL=$OCF_RESKEY_ctl
 RABBITMQ_NODENAME=$OCF_RESKEY_nodename
 RABBITMQ_NODE_IP_ADDRESS=$OCF_RESKEY_ip
 RABBITMQ_NODE_PORT=$OCF_RESKEY_port
-RABBITMQ_CLUSTER_CONFIG_FILE=$OCF_RESKEY_cluster_config_file
 RABBITMQ_CONFIG_FILE=$OCF_RESKEY_config_file
 RABBITMQ_LOG_BASE=$OCF_RESKEY_log_base
 RABBITMQ_MNESIA_BASE=$OCF_RESKEY_mnesia_base
@@ -193,7 +185,6 @@ RABBITMQ_SERVER_START_ARGS=$OCF_RESKEY_server_start_args
 export_vars() {
     [ ! -z $RABBITMQ_NODE_IP_ADDRESS ]     && export RABBITMQ_NODE_IP_ADDRESS
     [ ! -z $RABBITMQ_NODE_PORT ]           && export RABBITMQ_NODE_PORT
-    [ ! -z $RABBITMQ_CLUSTER_CONFIG_FILE ] && export RABBITMQ_CLUSTER_CONFIG_FILE
     [ ! -z $RABBITMQ_CONFIG_FILE ]         && export RABBITMQ_CONFIG_FILE
     [ ! -z $RABBITMQ_LOG_BASE ]            && export RABBITMQ_LOG_BASE
     [ ! -z $RABBITMQ_MNESIA_BASE ]         && export RABBITMQ_MNESIA_BASE
@@ -202,35 +193,30 @@ export_vars() {
 
 rabbit_validate_partial() {
     if [ ! -x $RABBITMQ_MULTI ]; then
-	ocf_log err "rabbitmq-server multi $RABBITMQ_MULTI does not exist or is not executable";
-	return $OCF_ERR_ARGS;
+        ocf_log err "rabbitmq-server multi $RABBITMQ_MULTI does not exist or is not executable";
+        exit $OCF_ERR_INSTALLED;
     fi
 
     if [ ! -x $RABBITMQ_CTL ]; then
-	ocf_log err "rabbitmq-server ctl $RABBITMQ_CTL does not exist or is not executable";
-	return $OCF_ERR_ARGS;
+        ocf_log err "rabbitmq-server ctl $RABBITMQ_CTL does not exist or is not executable";
+        exit $OCF_ERR_INSTALLED;
     fi
 }
 
 rabbit_validate_full() {
-    if [ ! -z $RABBITMQ_CLUSTER_CONFIG_FILE ] && [ ! -e $RABBITMQ_CLUSTER_CONFIG_FILE ]; then
-	ocf_log err "rabbitmq-server cluster_config_file $RABBITMQ_CLUSTER_CONFIG_FILE does not exist or is not a file";
-	return $OCF_ERR_ARGS;
-    fi
-
     if [ ! -z $RABBITMQ_CONFIG_FILE ] && [ ! -e $RABBITMQ_CONFIG_FILE ]; then
-	ocf_log err "rabbitmq-server config_file $RABBITMQ_CONFIG_FILE does not exist or is not a file";
-	return $OCF_ERR_ARGS;
+        ocf_log err "rabbitmq-server config_file $RABBITMQ_CONFIG_FILE does not exist or is not a file";
+        exit $OCF_ERR_INSTALLED;
     fi
 
     if [ ! -z $RABBITMQ_LOG_BASE ] && [ ! -d $RABBITMQ_LOG_BASE ]; then
-	ocf_log err "rabbitmq-server log_base $RABBITMQ_LOG_BASE does not exist or is not a directory";
-	return $OCF_ERR_ARGS;
+        ocf_log err "rabbitmq-server log_base $RABBITMQ_LOG_BASE does not exist or is not a directory";
+        exit $OCF_ERR_INSTALLED;
     fi
 
     if [ ! -z $RABBITMQ_MNESIA_BASE ] && [ ! -d $RABBITMQ_MNESIA_BASE ]; then
-	ocf_log err "rabbitmq-server mnesia_base $RABBITMQ_MNESIA_BASE does not exist or is not a directory";
-	return $OCF_ERR_ARGS;
+        ocf_log err "rabbitmq-server mnesia_base $RABBITMQ_MNESIA_BASE does not exist or is not a directory";
+        exit $OCF_ERR_INSTALLED;
     fi
 
     rabbit_validate_partial
@@ -243,25 +229,26 @@ rabbit_status() {
     $RABBITMQ_CTL $NODENAME_ARG status > /dev/null 2> /dev/null
     rc=$?
     case "$rc" in
-	0)
-	    return $OCF_SUCCESS
-	    ;;
-	2)
-	    return $OCF_NOT_RUNNING
-	    ;;
-	*)
-	    ocf_log err "Unexpected return from rabbitmqctl $NODENAME_ARG status: $rc"
-	    return $OCF_ERR_GENERIC
+        0)
+            ocf_log debug "RabbitMQ server is running normally"
+            return $OCF_SUCCESS
+            ;;
+        2)
+            ocf_log debug "RabbitMQ server is not running"
+            return $OCF_NOT_RUNNING
+            ;;
+        *)
+            ocf_log err "Unexpected return from rabbitmqctl $NODENAME_ARG status: $rc"
+            exit $OCF_ERR_GENERIC
     esac
 }
 
 rabbit_start() {
     local rc
 
-    rabbit_validate_full
-    rc=$?
-    if [ "$rc" != $OCF_SUCCESS ]; then
-        return $rc
+    if rabbit_status; then
+        ocf_log info "Resource already running."
+        return $OCF_SUCCESS
     fi
 
     export_vars
@@ -270,24 +257,23 @@ rabbit_start() {
     rc=$?
 
     if [ "$rc" != 0 ]; then
-	ocf_log err "rabbitmq-server start command failed: $RABBITMQ_MULTI start_all 1, $rc"
-  	return $rc
+        ocf_log err "rabbitmq-server start command failed: $RABBITMQ_MULTI start_all 1, $rc"
+        return $rc
     fi
 
     # Spin waiting for the server to come up.
     # Let the CRM/LRM time us out if required
     start_wait=1
     while [ $start_wait = 1 ]; do
-	rabbit_status
-	rc=$?
-	if [ "$rc" = $OCF_SUCCESS ]; then
-	    start_wait=0
-
-	elif [ "$rc" != $OCF_NOT_RUNNING ]; then
-	    ocf_log info "rabbitmq-server start failed: $rc"
-	    return $OCF_ERR_GENERIC
-	fi
-        sleep 2
+        rabbit_status
+        rc=$?
+        if [ "$rc" = $OCF_SUCCESS ]; then
+            start_wait=0
+        elif [ "$rc" != $OCF_NOT_RUNNING ]; then
+            ocf_log info "rabbitmq-server start failed: $rc"
+            exit $OCF_ERR_GENERIC
+        fi
+        sleep 1
     done
 
     return $OCF_SUCCESS
@@ -295,28 +281,34 @@ rabbit_start() {
 
 rabbit_stop() {
     local rc
+
+    if ! rabbit_status; then
+        ocf_log info "Resource not running."
+        return $OCF_SUCCESS
+    fi
+
     $RABBITMQ_MULTI stop_all &
     rc=$?
 
     if [ "$rc" != 0 ]; then
-	ocf_log err "rabbitmq-server stop command failed: $RABBITMQ_MULTI stop_all, $rc"
-  	return $rc
+        ocf_log err "rabbitmq-server stop command failed: $RABBITMQ_MULTI stop_all, $rc"
+        return $rc
     fi
 
     # Spin waiting for the server to shut down.
     # Let the CRM/LRM time us out if required
     stop_wait=1
     while [ $stop_wait = 1 ]; do
-	rabbit_status
-	rc=$?
-	if [ "$rc" = $OCF_NOT_RUNNING ]; then
-	    stop_wait=0
+        rabbit_status
+        rc=$?
+        if [ "$rc" = $OCF_NOT_RUNNING ]; then
+            stop_wait=0
             break
-	elif [ "$rc" != $OCF_SUCCESS ]; then
-	    ocf_log info "rabbitmq-server stop failed: $rc"
-	    return $OCF_ERR_GENERIC
-	fi
-        sleep 2
+        elif [ "$rc" != $OCF_SUCCESS ]; then
+            ocf_log info "rabbitmq-server stop failed: $rc"
+            exit $OCF_ERR_GENERIC
+        fi
+        sleep 1
     done
 
     return $OCF_SUCCESS
@@ -329,34 +321,38 @@ rabbit_monitor() {
 
 case $__OCF_ACTION in
     meta-data)
-	meta_data
-	exit $OCF_SUCCESS
-	;;
+        meta_data
+        exit $OCF_SUCCESS
+        ;;
     usage|help)
-	rabbit_usage
-	exit $OCF_SUCCESS
-	;;
+        rabbit_usage
+        exit $OCF_SUCCESS
+        ;;
 esac
 
-rabbit_validate_partial || exit
+if ocf_is_probe; then
+    rabbit_validate_partial
+else
+    rabbit_validate_full
+fi
 
 case $__OCF_ACTION in
     start)
-	rabbit_start
+        rabbit_start
         ;;
     stop)
-	rabbit_stop
+        rabbit_stop
         ;;
-    monitor)
-	rabbit_monitor
+    status|monitor)
+        rabbit_monitor
         ;;
     validate-all)
         exit $OCF_SUCCESS
-	;;
+        ;;
     *)
-	rabbit_usage
-	exit $OCF_ERR_UNIMPLEMENTED
-	;;
+        rabbit_usage
+        exit $OCF_ERR_UNIMPLEMENTED
+        ;;
 esac
 
-exit $?
-\ No newline at end of file
+exit $?
diff --git a/packaging/debs/Debian/debian/changelog b/packaging/debs/Debian/debian/changelog
index 63b50749..0dccf938 100644
--- a/packaging/debs/Debian/debian/changelog
+++ b/packaging/debs/Debian/debian/changelog
@@ -1,3 +1,15 @@
+rabbitmq-server (1.8.1-1) lucid; urgency=low
+
+  * New Upstream Release
+
+ -- Emile Joubert <emile@rabbitmq.com>  Wed, 14 Jul 2010 15:05:24 +0100
+
+rabbitmq-server (1.8.0-1) intrepid; urgency=low
+
+  * New Upstream Release
+
+ -- Matthew Sackman <matthew@rabbitmq.com>  Tue, 15 Jun 2010 12:48:48 +0100
+
 rabbitmq-server (1.7.2-1) intrepid; urgency=low
 
   * New Upstream Release
diff --git a/packaging/debs/Debian/debian/postrm.in b/packaging/debs/Debian/debian/postrm.in
index bfcf1f53..5290de9b 100644
--- a/packaging/debs/Debian/debian/postrm.in
+++ b/packaging/debs/Debian/debian/postrm.in
@@ -18,6 +18,13 @@ set -e
 # for details, see http://www.debian.org/doc/debian-policy/ or
 # the debian-policy package
 
+remove_plugin_traces() {
+    # Remove traces of plugins
+    rm -rf @RABBIT_LIB@/priv @RABBIT_LIB@/plugins
+    for ext in rel script boot ; do
+        rm -f @RABBIT_LIB@/ebin/rabbit.$ext
+    done
+}
 
 case "$1" in
     purge)
@@ -34,11 +41,7 @@ case "$1" in
         if [ -d /etc/rabbitmq ]; then
                 rm -r /etc/rabbitmq
         fi
-        # Remove traces of plugins
-        rm -rf @RABBIT_LIB@/priv @RABBIT_LIB@/plugins
-        for ext in rel script boot ; do
-                rm -f @RABBIT_LIB@/ebin/rabbit.$ext
-        done
+	remove_plugin_traces
         if getent passwd rabbitmq >/dev/null; then
                 # Stop epmd if run by the rabbitmq user
                 pkill -u rabbitmq epmd || :
@@ -50,7 +53,11 @@ case "$1" in
         fi
     ;;
 
-    remove|upgrade|failed-upgrade|abort-install|abort-upgrade|disappear)
+    remove|upgrade)
+	remove_plugin_traces
+    ;;
+
+    failed-upgrade|abort-install|abort-upgrade|disappear)
     ;;
 
     *)
diff --git a/packaging/debs/Debian/debian/rules b/packaging/debs/Debian/debian/rules
index 1238b142..6b6df33b 100644
--- a/packaging/debs/Debian/debian/rules
+++ b/packaging/debs/Debian/debian/rules
@@ -13,7 +13,7 @@ DOCDIR=$(DEB_DESTDIR)usr/share/doc/rabbitmq-server/
 
 install/rabbitmq-server::
 	mkdir -p $(DOCDIR)
-	rm $(RABBIT_LIB)LICENSE*
+	rm $(RABBIT_LIB)LICENSE* $(RABBIT_LIB)INSTALL*
 	for script in rabbitmqctl rabbitmq-server rabbitmq-multi; do \
 		install -p -D -m 0755 debian/rabbitmq-script-wrapper $(DEB_DESTDIR)usr/sbin/$$script; \
 	done
diff --git a/packaging/macports/Makefile b/packaging/macports/Makefile
index 0ef7dd5e..3a22eef0 100644
--- a/packaging/macports/Makefile
+++ b/packaging/macports/Makefile
@@ -31,15 +31,22 @@ $(DEST)/Portfile: Portfile.in
 	    -f checksums.sed <$^ >$@
 	rm checksums.sed
 
+# The purpose of the intricate substitution below is to set up similar
+# environment vars to the ones that su will on Linux.  On OS X, we
+# have to use the -m option to su in order to be able to set the shell
+# (which for the rabbitmq user would otherwise be /dev/null).  But the
+# -m option means that *all* environment vars get preserved.  Erlang
+# needs vars such as HOME to be set.  So we have to set them
+# explicitly.
 macports: dirs $(DEST)/Portfile
 	for f in rabbitmq-asroot-script-wrapper rabbitmq-script-wrapper ; do \
 	  cp $(COMMON_DIR)/$$f $(DEST)/files ; \
 	done
-	sed -i -e 's|@SU_RABBITMQ_SH_C@|SHELL=/bin/sh su -m rabbitmq -c|' \
+	sed -i -e 's|@SU_RABBITMQ_SH_C@|SHELL=/bin/sh HOME=/var/lib/rabbitmq USER=rabbitmq LOGNAME=rabbitmq PATH="$$(eval `PATH=MACPORTS_PREFIX/bin /usr/libexec/path_helper -s`; echo $$PATH)" su -m rabbitmq -c|' \
 	    $(DEST)/files/rabbitmq-script-wrapper
 	cp patch-org.macports.rabbitmq-server.plist.diff $(DEST)/files
 	if [ -n "$(MACPORTS_USERHOST)" ] ; then \
-	  tar cf - -C $(MACPORTS_DIR) . | ssh $(SSH_OPTS) lshift@macrabbit ' \
+	  tar cf - -C $(MACPORTS_DIR) . | ssh $(SSH_OPTS) $(MACPORTS_USERHOST) ' \
 	    d="/tmp/mkportindex.$$$$" ; \
 	    mkdir $$d \
 	      && cd $$d \
@@ -52,4 +59,4 @@ macports: dirs $(DEST)/Portfile
 	fi
 
 clean:
-	rm -rf $(DEST) checksums.sed
+	rm -rf $(MACPORTS_DIR) checksums.sed
diff --git a/packaging/macports/Portfile.in b/packaging/macports/Portfile.in
index 62cdad8e..f30460d3 100644
--- a/packaging/macports/Portfile.in
+++ b/packaging/macports/Portfile.in
@@ -4,9 +4,8 @@
 PortSystem 1.0
 name		rabbitmq-server
 version		@VERSION@
-revision	1
 categories	net 
-maintainers	rabbitmq.com:tonyg
+maintainers	paperplanes.de:meyer rabbitmq.com:tonyg openmaintainer
 platforms	darwin
 description	The RabbitMQ AMQP Server
 long_description	\
@@ -23,8 +22,8 @@ checksums \
     sha1 @sha1@ \
     rmd160 @rmd160@
 
-depends_build   port:erlang port:xmlto port:libxslt
-depends_run     port:erlang
+depends_lib	port:erlang
+depends_build	port:xmlto port:libxslt
 
 platform darwin 7 {
     depends_build-append port:py25-simplejson
@@ -76,26 +75,18 @@ post-destroot {
 
     reinplace -E "s:(/etc/rabbitmq/rabbitmq.conf):${prefix}\\1:g" \
         ${realsbin}/rabbitmq-env
-    reinplace -E "s:(CLUSTER_CONFIG_FILE)=/:\\1=${prefix}/:" \
-        ${realsbin}/rabbitmq-multi \
-        ${realsbin}/rabbitmq-server \
-        ${realsbin}/rabbitmqctl
-    reinplace -E "s:(LOG_BASE)=/:\\1=${prefix}/:" \
-        ${realsbin}/rabbitmq-multi \
-        ${realsbin}/rabbitmq-server \
-        ${realsbin}/rabbitmqctl
-    reinplace -E "s:(MNESIA_BASE)=/:\\1=${prefix}/:" \
-        ${realsbin}/rabbitmq-multi \
-        ${realsbin}/rabbitmq-server \
-        ${realsbin}/rabbitmqctl
-    reinplace -E "s:(PIDS_FILE)=/:\\1=${prefix}/:" \
-        ${realsbin}/rabbitmq-multi \
-        ${realsbin}/rabbitmq-server \
-        ${realsbin}/rabbitmqctl
+    foreach var {CONFIG_FILE CLUSTER_CONFIG_FILE LOG_BASE MNESIA_BASE PIDS_FILE} {
+        reinplace -E "s:^($var)=/:\\1=${prefix}/:" \
+            ${realsbin}/rabbitmq-multi \
+            ${realsbin}/rabbitmq-server \
+            ${realsbin}/rabbitmqctl
+    }
 
     xinstall -m 555 ${filespath}/rabbitmq-script-wrapper \
                 ${wrappersbin}/rabbitmq-multi
 
+    reinplace -E "s:MACPORTS_PREFIX/bin:${prefix}/bin:" \
+                ${wrappersbin}/rabbitmq-multi
     reinplace -E "s:/usr/lib/rabbitmq/bin/:${prefix}/lib/rabbitmq/bin/:" \
                 ${wrappersbin}/rabbitmq-multi
     reinplace -E "s:/var/lib/rabbitmq:${prefix}/var/lib/rabbitmq:" \
diff --git a/packaging/macports/make-port-diff.sh b/packaging/macports/make-port-diff.sh
new file mode 100755
index 00000000..3eb1b9f5
--- /dev/null
+++ b/packaging/macports/make-port-diff.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+# This script grabs the latest rabbitmq-server bits from the main
+# macports subversion repo, and from the rabbitmq.com macports repo,
+# and produces a diff from the former to the latter for submission
+# through the macports trac.
+
+set -e
+
+dir=/tmp/$(basename $0).$$
+mkdir -p $dir/macports $dir/rabbitmq
+
+# Get the files from the macports subversion repo
+cd $dir/macports
+svn checkout http://svn.macports.org/repository/macports/trunk/dports/net/rabbitmq-server/ 2>&1 >/dev/null
+
+# Clear out the svn $id tag
+sed -i -e 's|^# \$.*$|# $Id$|' rabbitmq-server/Portfile
+
+# Get the files from the rabbitmq.com macports repo
+cd ../rabbitmq
+curl -s http://www.rabbitmq.com/releases/macports/net/rabbitmq-server.tgz | tar xzf -
+
+cd ..
+diff -Naur --exclude=.svn macports rabbitmq
+cd /
+rm -rf $dir
diff --git a/scripts/rabbitmq-multi b/scripts/rabbitmq-multi
index 8341d35c..59050692 100755
--- a/scripts/rabbitmq-multi
+++ b/scripts/rabbitmq-multi
@@ -29,7 +29,8 @@
 ##
 ##   Contributor(s): ______________________________________.
 ##
-NODENAME=rabbit
+[ "x" = "x$HOSTNAME" ] && HOSTNAME=`env hostname -s`
+NODENAME=rabbit@${HOSTNAME%%.*}
 SCRIPT_HOME=$(dirname $0)
 PIDS_FILE=/var/lib/rabbitmq/pids
 MULTI_ERL_ARGS=
diff --git a/scripts/rabbitmq-multi.bat b/scripts/rabbitmq-multi.bat
index a4b7f2e9..a4f8c8b4 100644
--- a/scripts/rabbitmq-multi.bat
+++ b/scripts/rabbitmq-multi.bat
@@ -42,8 +42,12 @@ if "!RABBITMQ_BASE!"=="" (
     set RABBITMQ_BASE=!APPDATA!\RabbitMQ
 )
 
+if "!COMPUTERNAME!"=="" (
+    set COMPUTERNAME=localhost
+)
+
 if "!RABBITMQ_NODENAME!"=="" (
-    set RABBITMQ_NODENAME=rabbit
+    set RABBITMQ_NODENAME=rabbit@!COMPUTERNAME!
 )
 
 if "!RABBITMQ_NODE_IP_ADDRESS!"=="" (
diff --git a/scripts/rabbitmq-server b/scripts/rabbitmq-server
index aaa131ee..e2028728 100755
--- a/scripts/rabbitmq-server
+++ b/scripts/rabbitmq-server
@@ -30,7 +30,8 @@
 ##   Contributor(s): ______________________________________.
 ##
 
-NODENAME=rabbit
+[ "x" = "x$HOSTNAME" ] && HOSTNAME=`env hostname -s`
+NODENAME=rabbit@${HOSTNAME%%.*}
 SERVER_ERL_ARGS="+K true +A30 +P 1048576 \
 -kernel inet_default_listen_options [{nodelay,true}] \
 -kernel inet_default_connect_options [{nodelay,true}]"
@@ -81,12 +82,6 @@ fi
 [ -f  "${RABBITMQ_LOGS}" ] && cat "${RABBITMQ_LOGS}" >> "${RABBITMQ_LOGS}${RABBITMQ_BACKUP_EXTENSION}"
 [ -f  "${RABBITMQ_SASL_LOGS}" ] && cat "${RABBITMQ_SASL_LOGS}" >> "${RABBITMQ_SASL_LOGS}${RABBITMQ_BACKUP_EXTENSION}"
 
-if [ -f "$RABBITMQ_CLUSTER_CONFIG_FILE" ]; then
-    RABBITMQ_CLUSTER_CONFIG_OPTION="-rabbit cluster_config \"$RABBITMQ_CLUSTER_CONFIG_FILE\""
-else
-    RABBITMQ_CLUSTER_CONFIG_OPTION=""
-fi
-
 RABBITMQ_START_RABBIT=
 [ "x" = "x$RABBITMQ_ALLOW_INPUT" ] && RABBITMQ_START_RABBIT='-noinput' 
 
@@ -138,6 +133,5 @@ exec erl \
     -os_mon start_disksup false \
     -os_mon start_memsup false \
     -mnesia dir "\"${RABBITMQ_MNESIA_DIR}\"" \
-    ${RABBITMQ_CLUSTER_CONFIG_OPTION} \
     ${RABBITMQ_SERVER_START_ARGS} \
     "$@"
diff --git a/scripts/rabbitmq-server.bat b/scripts/rabbitmq-server.bat
index b3046594..86fbc463 100644
--- a/scripts/rabbitmq-server.bat
+++ b/scripts/rabbitmq-server.bat
@@ -42,8 +42,12 @@ if "!RABBITMQ_BASE!"=="" (
     set RABBITMQ_BASE=!APPDATA!\RabbitMQ
 )
 
+if "!COMPUTERNAME!"=="" (
+    set COMPUTERNAME=localhost
+)
+
 if "!RABBITMQ_NODENAME!"=="" (
-    set RABBITMQ_NODENAME=rabbit
+    set RABBITMQ_NODENAME=rabbit@!COMPUTERNAME!
 )
 
 if "!RABBITMQ_NODE_IP_ADDRESS!"=="" (
@@ -99,14 +103,6 @@ if exist "!SASL_LOGS!" (
 rem End of log management
 
 
-if "!RABBITMQ_CLUSTER_CONFIG_FILE!"=="" (
-    set RABBITMQ_CLUSTER_CONFIG_FILE=!RABBITMQ_BASE!\rabbitmq_cluster.config
-)
-set CLUSTER_CONFIG=
-if not exist "!RABBITMQ_CLUSTER_CONFIG_FILE!" GOTO L1
-set CLUSTER_CONFIG=-rabbit cluster_config \""!RABBITMQ_CLUSTER_CONFIG_FILE:\=/!"\"
-:L1
-
 if "!RABBITMQ_MNESIA_DIR!"=="" (
     set RABBITMQ_MNESIA_DIR=!RABBITMQ_MNESIA_BASE!/!RABBITMQ_NODENAME!-mnesia
 )
diff --git a/scripts/rabbitmq-service.bat b/scripts/rabbitmq-service.bat
index a25e236a..2217a451 100644
--- a/scripts/rabbitmq-service.bat
+++ b/scripts/rabbitmq-service.bat
@@ -48,8 +48,12 @@ if "!RABBITMQ_BASE!"=="" (
     set RABBITMQ_BASE=!APPDATA!\!RABBITMQ_SERVICENAME!
 )
 
+if "!COMPUTERNAME!"=="" (
+    set COMPUTERNAME=localhost
+)
+
 if "!RABBITMQ_NODENAME!"=="" (
-    set RABBITMQ_NODENAME=rabbit
+    set RABBITMQ_NODENAME=rabbit@!COMPUTERNAME!
 )
 
 if "!RABBITMQ_NODE_IP_ADDRESS!"=="" (
@@ -132,14 +136,6 @@ if exist "!SASL_LOGS!" (
 rem End of log management
 
 
-if "!RABBITMQ_CLUSTER_CONFIG_FILE!"=="" (
-    set RABBITMQ_CLUSTER_CONFIG_FILE=!RABBITMQ_BASE!\rabbitmq_cluster.config
-)
-set CLUSTER_CONFIG=
-if not exist "!RABBITMQ_CLUSTER_CONFIG_FILE!" GOTO L1
-set CLUSTER_CONFIG=-rabbit cluster_config \""!RABBITMQ_CLUSTER_CONFIG_FILE:\=/!"\"
-:L1
-
 if "!RABBITMQ_MNESIA_DIR!"=="" (
     set RABBITMQ_MNESIA_DIR=!RABBITMQ_MNESIA_BASE!/!RABBITMQ_NODENAME!-mnesia
 )
diff --git a/scripts/rabbitmqctl b/scripts/rabbitmqctl
index cfb775eb..92e5312b 100755
--- a/scripts/rabbitmqctl
+++ b/scripts/rabbitmqctl
@@ -30,7 +30,8 @@
 ##   Contributor(s): ______________________________________.
 ##
 
-NODENAME=rabbit
+[ "x" = "x$HOSTNAME" ] && HOSTNAME=`env hostname -s`
+NODENAME=rabbit@${HOSTNAME%%.*}
 
 . `dirname $0`/rabbitmq-env
 
diff --git a/scripts/rabbitmqctl.bat b/scripts/rabbitmqctl.bat
index 55572451..563b9e58 100644
--- a/scripts/rabbitmqctl.bat
+++ b/scripts/rabbitmqctl.bat
@@ -38,8 +38,12 @@ set TDP0=%~dp0
 set STAR=%*
 setlocal enabledelayedexpansion
 
+if "!COMPUTERNAME!"=="" (
+    set COMPUTERNAME=localhost
+)
+
 if "!RABBITMQ_NODENAME!"=="" (
-    set RABBITMQ_NODENAME=rabbit
+    set RABBITMQ_NODENAME=rabbit@!COMPUTERNAME!
 )
 
 if not exist "!ERLANG_HOME!\bin\erl.exe" (
diff --git a/src/bpqueue.erl b/src/bpqueue.erl
new file mode 100644
index 00000000..49874aa6
--- /dev/null
+++ b/src/bpqueue.erl
@@ -0,0 +1,286 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2010 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2010 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(bpqueue).
+
+%% Block-prefixed queue. From the perspective of the queue interface
+%% the datastructure acts like a regular queue where each value is
+%% paired with the prefix.
+%%
+%% This is implemented as a queue of queues, which is more space and
+%% time efficient, whilst supporting the normal queue interface. Each
+%% inner queue has a prefix, which does not need to be unique, and it
+%% is guaranteed that no two consecutive blocks have the same
+%% prefix. len/1 returns the flattened length of the queue and is
+%% O(1).
+
+-export([new/0, is_empty/1, len/1, in/3, in_r/3, out/1, out_r/1, join/2,
+         foldl/3, foldr/3, from_list/1, to_list/1, map_fold_filter_l/4,
+         map_fold_filter_r/4]).
+
+%%----------------------------------------------------------------------------
+
+-ifdef(use_specs).
+
+-export_type([bpqueue/0]).
+
+-type(bpqueue() :: {non_neg_integer(), queue()}).
+-type(prefix() :: any()).
+-type(value() :: any()).
+-type(result() :: ({'empty', bpqueue()} |
+                   {{'value', prefix(), value()}, bpqueue()})).
+
+-spec(new/0 :: () -> bpqueue()).
+-spec(is_empty/1 :: (bpqueue()) -> boolean()).
+-spec(len/1 :: (bpqueue()) -> non_neg_integer()).
+-spec(in/3 :: (prefix(), value(), bpqueue()) -> bpqueue()).
+-spec(in_r/3 :: (prefix(), value(), bpqueue()) -> bpqueue()).
+-spec(out/1 :: (bpqueue()) -> result()).
+-spec(out_r/1 :: (bpqueue()) -> result()).
+-spec(join/2 :: (bpqueue(), bpqueue()) -> bpqueue()).
+-spec(foldl/3 :: (fun ((prefix(), value(), B) -> B), B, bpqueue()) -> B).
+-spec(foldr/3 :: (fun ((prefix(), value(), B) -> B), B, bpqueue()) -> B).
+-spec(from_list/1 :: ([{prefix(), [value()]}]) -> bpqueue()).
+-spec(to_list/1 :: (bpqueue()) -> [{prefix(), [value()]}]).
+-spec(map_fold_filter_l/4 :: ((fun ((prefix()) -> boolean())),
+                              (fun ((value(), B) ->
+                                           ({prefix(), value(), B} | 'stop'))),
+                              B,
+                              bpqueue()) ->
+             {bpqueue(), B}).
+-spec(map_fold_filter_r/4 :: ((fun ((prefix()) -> boolean())),
+                              (fun ((value(), B) ->
+                                           ({prefix(), value(), B} | 'stop'))),
+                              B,
+                              bpqueue()) ->
+             {bpqueue(), B}).
+
+-endif.
+
+%%----------------------------------------------------------------------------
+
+new() -> {0, queue:new()}.
+
+is_empty({0, _Q}) -> true;
+is_empty(_BPQ)    -> false.
+
+len({N, _Q}) -> N.
+
+in(Prefix, Value, {0, Q}) ->
+    {1, queue:in({Prefix, queue:from_list([Value])}, Q)};
+in(Prefix, Value, BPQ) ->
+    in1({fun queue:in/2, fun queue:out_r/1}, Prefix, Value, BPQ).
+
+in_r(Prefix, Value, BPQ = {0, _Q}) ->
+    in(Prefix, Value, BPQ);
+in_r(Prefix, Value, BPQ) ->
+    in1({fun queue:in_r/2, fun queue:out/1}, Prefix, Value, BPQ).
+
+in1({In, Out}, Prefix, Value, {N, Q}) ->
+    {N+1, case Out(Q) of
+              {{value, {Prefix, InnerQ}}, Q1} ->
+                  In({Prefix, In(Value, InnerQ)}, Q1);
+              {{value, {_Prefix, _InnerQ}}, _Q1} ->
+                  In({Prefix, queue:in(Value, queue:new())}, Q)
+          end}.
+
+in_q(Prefix, Queue, BPQ = {0, Q}) ->
+    case queue:len(Queue) of
+        0 -> BPQ;
+        N -> {N, queue:in({Prefix, Queue}, Q)}
+    end;
+in_q(Prefix, Queue, BPQ) ->
+    in_q1({fun queue:in/2, fun queue:out_r/1,
+           fun queue:join/2},
+          Prefix, Queue, BPQ).
+
+in_q_r(Prefix, Queue, BPQ = {0, _Q}) ->
+    in_q(Prefix, Queue, BPQ);
+in_q_r(Prefix, Queue, BPQ) ->
+    in_q1({fun queue:in_r/2, fun queue:out/1,
+           fun (T, H) -> queue:join(H, T) end},
+          Prefix, Queue, BPQ).
+
+in_q1({In, Out, Join}, Prefix, Queue, BPQ = {N, Q}) ->
+    case queue:len(Queue) of
+        0 -> BPQ;
+        M -> {N + M, case Out(Q) of
+                         {{value, {Prefix, InnerQ}}, Q1} ->
+                             In({Prefix, Join(InnerQ, Queue)}, Q1);
+                         {{value, {_Prefix, _InnerQ}}, _Q1} ->
+                             In({Prefix, Queue}, Q)
+                     end}
+    end.
+
+out({0, _Q} = BPQ) -> {empty, BPQ};
+out(BPQ)           -> out1({fun queue:in_r/2, fun queue:out/1}, BPQ).
+
+out_r({0, _Q} = BPQ) -> {empty, BPQ};
+out_r(BPQ)           -> out1({fun queue:in/2, fun queue:out_r/1}, BPQ).
+
+out1({In, Out}, {N, Q}) ->
+    {{value, {Prefix, InnerQ}}, Q1} = Out(Q),
+    {{value, Value}, InnerQ1} = Out(InnerQ),
+    Q2 = case queue:is_empty(InnerQ1) of
+             true  -> Q1;
+             false -> In({Prefix, InnerQ1}, Q1)
+         end,
+    {{value, Prefix, Value}, {N-1, Q2}}.
+
+join({0, _Q}, BPQ) ->
+    BPQ;
+join(BPQ, {0, _Q}) ->
+    BPQ;
+join({NHead, QHead}, {NTail, QTail}) ->
+    {{value, {Prefix, InnerQHead}}, QHead1} = queue:out_r(QHead),
+    {NHead + NTail,
+     case queue:out(QTail) of
+         {{value, {Prefix, InnerQTail}}, QTail1} ->
+             queue:join(
+               queue:in({Prefix, queue:join(InnerQHead, InnerQTail)}, QHead1),
+               QTail1);
+         {{value, {_Prefix, _InnerQTail}}, _QTail1} ->
+             queue:join(QHead, QTail)
+     end}.
+
+foldl(_Fun, Init, {0, _Q}) -> Init;
+foldl( Fun, Init, {_N, Q}) -> fold1(fun queue:out/1, Fun, Init, Q).
+
+foldr(_Fun, Init, {0, _Q}) -> Init;
+foldr( Fun, Init, {_N, Q}) -> fold1(fun queue:out_r/1, Fun, Init, Q).
+
+fold1(Out, Fun, Init, Q) ->
+    case Out(Q) of
+        {empty, _Q} ->
+            Init;
+        {{value, {Prefix, InnerQ}}, Q1} ->
+            fold1(Out, Fun, fold1(Out, Fun, Prefix, Init, InnerQ), Q1)
+    end.
+
+fold1(Out, Fun, Prefix, Init, InnerQ) ->
+    case Out(InnerQ) of
+        {empty, _Q} ->
+            Init;
+        {{value, Value}, InnerQ1} ->
+            fold1(Out, Fun, Prefix, Fun(Prefix, Value, Init), InnerQ1)
+    end.
+
+from_list(List) ->
+    {FinalPrefix, FinalInnerQ, ListOfPQs1, Len} =
+        lists:foldl(
+          fun ({_Prefix, []}, Acc) ->
+                  Acc;
+              ({Prefix, InnerList}, {Prefix, InnerQ, ListOfPQs, LenAcc}) ->
+                  {Prefix, queue:join(InnerQ, queue:from_list(InnerList)),
+                   ListOfPQs, LenAcc + length(InnerList)};
+              ({Prefix1, InnerList}, {Prefix, InnerQ, ListOfPQs, LenAcc}) ->
+                  {Prefix1, queue:from_list(InnerList),
+                   [{Prefix, InnerQ} | ListOfPQs], LenAcc + length(InnerList)}
+          end, {undefined, queue:new(), [], 0}, List),
+    ListOfPQs2 = [{FinalPrefix, FinalInnerQ} | ListOfPQs1],
+    [{undefined, InnerQ1} | Rest] = All = lists:reverse(ListOfPQs2),
+    {Len, queue:from_list(case queue:is_empty(InnerQ1) of
+                              true  -> Rest;
+                              false -> All
+                          end)}.
+
+to_list({0, _Q}) -> [];
+to_list({_N, Q}) -> [{Prefix, queue:to_list(InnerQ)} ||
+                        {Prefix, InnerQ} <- queue:to_list(Q)].
+
+%% map_fold_filter_[lr](FilterFun, Fun, Init, BPQ) -> {BPQ, Init}
+%% where FilterFun(Prefix) -> boolean()
+%%       Fun(Value, Init) -> {Prefix, Value, Init} | stop
+%%
+%% The filter fun allows you to skip very quickly over blocks that
+%% you're not interested in. Such blocks appear in the resulting bpq
+%% without modification. The Fun is then used both to map the value,
+%% which also allows you to change the prefix (and thus block) of the
+%% value, and also to modify the Init/Acc (just like a fold).  If the
+%% Fun returns 'stop' then it is not applied to any further items.
+map_fold_filter_l(_PFilter, _Fun, Init, BPQ = {0, _Q}) ->
+    {BPQ, Init};
+map_fold_filter_l(PFilter, Fun, Init, {N, Q}) ->
+    map_fold_filter1({fun queue:out/1, fun queue:in/2,
+                      fun in_q/3, fun join/2},
+                     N, PFilter, Fun, Init, Q, new()).
+
+map_fold_filter_r(_PFilter, _Fun, Init, BPQ = {0, _Q}) ->
+    {BPQ, Init};
+map_fold_filter_r(PFilter, Fun, Init, {N, Q}) ->
+    map_fold_filter1({fun queue:out_r/1, fun queue:in_r/2,
+                      fun in_q_r/3, fun (T, H) -> join(H, T) end},
+                     N, PFilter, Fun, Init, Q, new()).
+
+map_fold_filter1(Funs = {Out, _In, InQ, Join}, Len, PFilter, Fun,
+                 Init, Q, QNew) ->
+    case Out(Q) of
+        {empty, _Q} ->
+            {QNew, Init};
+        {{value, {Prefix, InnerQ}}, Q1} ->
+            case PFilter(Prefix) of
+                true ->
+                    {Init1, QNew1, Cont} =
+                        map_fold_filter2(Funs, Fun, Prefix, Prefix,
+                                         Init, InnerQ, QNew, queue:new()),
+                    case Cont of
+                        false -> {Join(QNew1, {Len - len(QNew1), Q1}), Init1};
+                        true  -> map_fold_filter1(Funs, Len, PFilter, Fun,
+                                                  Init1, Q1, QNew1)
+                    end;
+                false ->
+                    map_fold_filter1(Funs, Len, PFilter, Fun,
+                                     Init, Q1, InQ(Prefix, InnerQ, QNew))
+            end
+    end.
+
+map_fold_filter2(Funs = {Out, In, InQ, _Join}, Fun, OrigPrefix, Prefix,
+                 Init, InnerQ, QNew, InnerQNew) ->
+    case Out(InnerQ) of
+        {empty, _Q} ->
+            {Init, InQ(OrigPrefix, InnerQ,
+                       InQ(Prefix, InnerQNew, QNew)), true};
+        {{value, Value}, InnerQ1} ->
+            case Fun(Value, Init) of
+                stop ->
+                    {Init, InQ(OrigPrefix, InnerQ,
+                               InQ(Prefix, InnerQNew, QNew)), false};
+                {Prefix1, Value1, Init1} ->
+                    {Prefix2, QNew1, InnerQNew1} =
+                        case Prefix1 =:= Prefix of
+                            true  -> {Prefix, QNew, In(Value1, InnerQNew)};
+                            false -> {Prefix1, InQ(Prefix, InnerQNew, QNew),
+                                      In(Value1, queue:new())}
+                        end,
+                    map_fold_filter2(Funs, Fun, OrigPrefix, Prefix2,
+                                     Init1, InnerQ1, QNew1, InnerQNew1)
+            end
+    end.
diff --git a/src/delegate.erl b/src/delegate.erl
index 12eb814f..3f57953b 100644
--- a/src/delegate.erl
+++ b/src/delegate.erl
@@ -44,9 +44,10 @@
 
 -ifdef(use_specs).
 
--spec(start_link/1 :: (non_neg_integer()) -> {'ok', pid()}).
--spec(invoke_no_result/2 :: (pid() | [pid()], fun((pid()) -> any())) -> 'ok').
--spec(invoke/2 :: (pid() | [pid()], fun((pid()) -> A)) -> A).
+-spec(start_link/1 :: (non_neg_integer()) -> rabbit_types:ok(pid())).
+-spec(invoke_no_result/2 ::
+        (pid() | [pid()], fun ((pid()) -> any())) -> 'ok').
+-spec(invoke/2 :: (pid() | [pid()], fun ((pid()) -> A)) -> A).
 
 -spec(process_count/0 :: () -> non_neg_integer()).
 
@@ -63,7 +64,7 @@ start_link(Hash) ->
     gen_server2:start_link({local, server(Hash)}, ?MODULE, [], []).
 
 invoke(Pid, Fun) when is_pid(Pid) ->
-    [Res] = invoke_per_node([{node(Pid), [Pid]}], Fun),
+    [Res] = invoke_per_node(split_delegate_per_node([Pid]), Fun),
     case Res of
         {ok, Result, _} ->
             Result;
@@ -73,7 +74,7 @@ invoke(Pid, Fun) when is_pid(Pid) ->
 
 invoke(Pids, Fun) when is_list(Pids) ->
     lists:foldl(
-        fun({Status, Result, Pid}, {Good, Bad}) ->
+        fun ({Status, Result, Pid}, {Good, Bad}) ->
             case Status of
                 ok    -> {[{Pid, Result}|Good], Bad};
                 error -> {Good, [{Pid, Result}|Bad]}
@@ -83,7 +84,7 @@ invoke(Pids, Fun) when is_list(Pids) ->
         invoke_per_node(split_delegate_per_node(Pids), Fun)).
 
 invoke_no_result(Pid, Fun) when is_pid(Pid) ->
-    invoke_no_result_per_node([{node(Pid), [Pid]}], Fun),
+    invoke_no_result_per_node(split_delegate_per_node([Pid]), Fun),
     ok;
 
 invoke_no_result(Pids, Fun) when is_list(Pids) ->
@@ -99,42 +100,47 @@ internal_cast(Node, Thunk) when is_atom(Node) ->
     gen_server2:cast({remote_server(Node), Node}, {thunk, Thunk}).
 
 split_delegate_per_node(Pids) ->
-    orddict:to_list(
-      lists:foldl(
-        fun (Pid, D) ->
-                orddict:update(node(Pid),
-                               fun (Pids1) -> [Pid | Pids1] end,
-                               [Pid], D)
-        end,
-        orddict:new(), Pids)).
+    LocalNode = node(),
+    {Local, Remote} =
+        lists:foldl(
+          fun (Pid, {L, D}) ->
+                  Node = node(Pid),
+                  case Node of
+                      LocalNode -> {[Pid|L], D};
+                      _         -> {L, orddict:append(Node, Pid, D)}
+                  end
+          end,
+          {[], orddict:new()}, Pids),
+    {Local, orddict:to_list(Remote)}.
 
-invoke_per_node([{Node, Pids}], Fun) when Node == node() ->
-    safe_invoke(Pids, Fun);
 invoke_per_node(NodePids, Fun) ->
     lists:append(delegate_per_node(NodePids, Fun, fun internal_call/2)).
 
-invoke_no_result_per_node([{Node, Pids}], Fun) when Node == node() ->
-    %% This is not actually async! However, in practice Fun will
-    %% always be something that does a gen_server:cast or similar, so
-    %% I don't think it's a problem unless someone misuses this
-    %% function. Making this *actually* async would be painful as we
-    %% can't spawn at this point or we break effect ordering.
-    safe_invoke(Pids, Fun);
 invoke_no_result_per_node(NodePids, Fun) ->
     delegate_per_node(NodePids, Fun, fun internal_cast/2),
     ok.
 
-delegate_per_node(NodePids, Fun, DelegateFun) ->
+delegate_per_node({LocalPids, NodePids}, Fun, DelegateFun) ->
+    %% In the case where DelegateFun is internal_cast, the safe_invoke
+    %% is not actually async! However, in practice Fun will always be
+    %% something that does a gen_server:cast or similar, so I don't
+    %% think it's a problem unless someone misuses this
+    %% function. Making this *actually* async would be painful as we
+    %% can't spawn at this point or we break effect ordering.
+    [safe_invoke(LocalPids, Fun)|
+     delegate_per_remote_node(NodePids, Fun, DelegateFun)].
+
+delegate_per_remote_node(NodePids, Fun, DelegateFun) ->
     Self = self(),
     %% Note that this is unsafe if the Fun requires reentrancy to the
     %% local_server. I.e. if self() == local_server(Node) then we'll
     %% block forever.
     [gen_server2:cast(
        local_server(Node),
-       {thunk, fun() ->
+       {thunk, fun () ->
                        Self ! {result,
                                DelegateFun(
-                                 Node, fun() -> safe_invoke(Pids, Fun) end)}
+                                 Node, fun () -> safe_invoke(Pids, Fun) end)}
                end}) || {Node, Pids} <- NodePids],
     [receive {result, Result} -> Result end || _ <- NodePids].
 
diff --git a/src/delegate_sup.erl b/src/delegate_sup.erl
index 1c1d62a9..39ef3f85 100644
--- a/src/delegate_sup.erl
+++ b/src/delegate_sup.erl
@@ -43,7 +43,7 @@
 
 -ifdef(use_specs).
 
--spec(start_link/0 :: () -> {'ok', pid()} | 'ignore' | {'error', any()}).
+-spec(start_link/0 :: () -> rabbit_types:ok_or_error2(pid(), any()) | 'ignore').
 
 -endif.
 
diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl
index 0f648dcd..e209ee6b 100644
--- a/src/file_handle_cache.erl
+++ b/src/file_handle_cache.erl
@@ -182,18 +182,18 @@
 -ifdef(use_specs).
 
 -type(ref() :: any()).
--type(error() :: {'error', any()}).
--type(ok_or_error() :: ('ok' | error())).
--type(val_or_error(T) :: ({'ok', T} | error())).
+-type(ok_or_error() :: rabbit_types:ok_or_error(any())).
+-type(val_or_error(T) :: rabbit_types:ok_or_error2(T, any())).
 -type(position() :: ('bof' | 'eof' | non_neg_integer() |
-                     {('bof' |'eof'), non_neg_integer()} | {'cur', integer()})).
+                     {('bof' |'eof'), non_neg_integer()} |
+                     {'cur', integer()})).
 -type(offset() :: non_neg_integer()).
 
 -spec(register_callback/3 :: (atom(), atom(), [any()]) -> 'ok').
 -spec(open/3 ::
-      (string(), [any()],
-       [{'write_buffer', (non_neg_integer() | 'infinity' | 'unbuffered')}]) ->
-             val_or_error(ref())).
+        (string(), [any()],
+         [{'write_buffer', (non_neg_integer() | 'infinity' | 'unbuffered')}])
+        -> val_or_error(ref())).
 -spec(close/1 :: (ref()) -> ok_or_error()).
 -spec(read/2 :: (ref(), non_neg_integer()) ->
              val_or_error([char()] | binary()) | 'eof').
diff --git a/src/gatherer.erl b/src/gatherer.erl
new file mode 100644
index 00000000..31dda16e
--- /dev/null
+++ b/src/gatherer.erl
@@ -0,0 +1,145 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2010 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2010 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(gatherer).
+
+-behaviour(gen_server2).
+
+-export([start_link/0, stop/1, fork/1, finish/1, in/2, out/1]).
+
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
+         terminate/2, code_change/3]).
+
+%%----------------------------------------------------------------------------
+
+-ifdef(use_specs).
+
+-spec(start_link/0 :: () -> {'ok', pid()} | 'ignore' | {'error', any()}).
+-spec(stop/1 :: (pid()) -> 'ok').
+-spec(fork/1 :: (pid()) -> 'ok').
+-spec(finish/1 :: (pid()) -> 'ok').
+-spec(in/2 :: (pid(), any()) -> 'ok').
+-spec(out/1 :: (pid()) -> {'value', any()} | 'empty').
+
+-endif.
+
+%%----------------------------------------------------------------------------
+
+-define(HIBERNATE_AFTER_MIN, 1000).
+-define(DESIRED_HIBERNATE, 10000).
+
+%%----------------------------------------------------------------------------
+
+-record(gstate, { forks, values, blocked }).
+
+%%----------------------------------------------------------------------------
+
+start_link() ->
+    gen_server2:start_link(?MODULE, [], [{timeout, infinity}]).
+
+stop(Pid) ->
+    gen_server2:call(Pid, stop, infinity).
+
+fork(Pid) ->
+    gen_server2:call(Pid, fork, infinity).
+
+finish(Pid) ->
+    gen_server2:cast(Pid, finish).
+
+in(Pid, Value) ->
+    gen_server2:cast(Pid, {in, Value}).
+
+out(Pid) ->
+    gen_server2:call(Pid, out, infinity).
+
+%%----------------------------------------------------------------------------
+
+init([]) ->
+    {ok, #gstate { forks = 0, values = queue:new(), blocked = queue:new() },
+     hibernate,
+     {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
+
+handle_call(stop, _From, State) ->
+    {stop, normal, ok, State};
+
+handle_call(fork, _From, State = #gstate { forks = Forks }) ->
+    {reply, ok, State #gstate { forks = Forks + 1 }, hibernate};
+
+handle_call(out, From, State = #gstate { forks   = Forks,
+                                         values  = Values,
+                                         blocked = Blocked }) ->
+    case queue:out(Values) of
+        {empty, _} ->
+            case Forks of
+                0 -> {reply, empty, State, hibernate};
+                _ -> {noreply,
+                      State #gstate { blocked = queue:in(From, Blocked) },
+                      hibernate}
+            end;
+        {{value, _Value} = V, NewValues} ->
+            {reply, V, State #gstate { values = NewValues }, hibernate}
+    end;
+
+handle_call(Msg, _From, State) ->
+    {stop, {unexpected_call, Msg}, State}.
+
+handle_cast(finish, State = #gstate { forks = Forks, blocked = Blocked }) ->
+    NewForks = Forks - 1,
+    NewBlocked = case NewForks of
+                     0 -> [gen_server2:reply(From, empty) ||
+                              From <- queue:to_list(Blocked)],
+                          queue:new();
+                     _ -> Blocked
+                 end,
+    {noreply, State #gstate { forks = NewForks, blocked = NewBlocked },
+     hibernate};
+
+handle_cast({in, Value}, State = #gstate { values  = Values,
+                                           blocked = Blocked }) ->
+    {noreply, case queue:out(Blocked) of
+                  {empty, _} ->
+                      State #gstate { values = queue:in(Value, Values) };
+                  {{value, From}, NewBlocked} ->
+                      gen_server2:reply(From, {value, Value}),
+                      State #gstate { blocked = NewBlocked }
+              end, hibernate};
+
+handle_cast(Msg, State) ->
+    {stop, {unexpected_cast, Msg}, State}.
+
+handle_info(Msg, State) ->
+    {stop, {unexpected_info, Msg}, State}.
+
+code_change(_OldVsn, State, _Extra) ->
+    {ok, State}.
+
+terminate(_Reason, State) ->
+    State.
diff --git a/src/gen_server2.erl b/src/gen_server2.erl
index 5b899cdb..49ae63c1 100644
--- a/src/gen_server2.erl
+++ b/src/gen_server2.erl
@@ -186,7 +186,7 @@
 -ifdef(use_specs).
 
 -spec(handle_common_termination/6 ::
-      (any(), any(), any(), atom(), any(), any()) -> no_return()). 
+      (any(), any(), any(), atom(), any(), any()) -> no_return()).
 
 -spec(hibernate/7 ::
       (pid(), any(), any(), atom(), any(), queue(), any()) -> no_return()).
@@ -639,7 +639,7 @@ do_multi_call(Nodes, Name, Req, Timeout) ->
     Caller = self(),
     Receiver =
 	spawn(
-	  fun() ->
+	  fun () ->
 		  %% Middleman process. Should be unsensitive to regular
 		  %% exit signals. The sychronization is needed in case
 		  %% the receiver would exit before the caller started
diff --git a/src/pg_local.erl b/src/pg_local.erl
index 1501331d..f5ded123 100644
--- a/src/pg_local.erl
+++ b/src/pg_local.erl
@@ -36,8 +36,8 @@
 
 -export([join/2, leave/2, get_members/1]).
 -export([sync/0]). %% intended for testing only; not part of official API
--export([start/0,start_link/0,init/1,handle_call/3,handle_cast/2,handle_info/2,
-         terminate/2]).
+-export([start/0, start_link/0, init/1, handle_call/3, handle_cast/2,
+         handle_info/2, terminate/2]).
 
 %%----------------------------------------------------------------------------
 
@@ -45,8 +45,8 @@
 
 -type(name() :: term()).
 
--spec(start_link/0 :: () -> {'ok', pid()} | {'error', term()}).
--spec(start/0 :: () -> {'ok', pid()} | {'error', term()}).
+-spec(start_link/0 :: () -> rabbit_types:ok_or_error2(pid(), term())).
+-spec(start/0 :: () -> rabbit_types:ok_or_error2(pid(), term())).
 -spec(join/2 :: (name(), pid()) -> 'ok').
 -spec(leave/2 :: (name(), pid()) -> 'ok').
 -spec(get_members/1 :: (name()) -> [pid()]).
diff --git a/src/rabbit.erl b/src/rabbit.erl
index 67f8df94..ada2c38e 100644
--- a/src/rabbit.erl
+++ b/src/rabbit.erl
@@ -33,7 +33,8 @@
 
 -behaviour(application).
 
--export([prepare/0, start/0, stop/0, stop_and_halt/0, status/0, rotate_logs/1]).
+-export([prepare/0, start/0, stop/0, stop_and_halt/0, status/0,
+         rotate_logs/1]).
 
 -export([start/2, stop/1]).
 
@@ -183,18 +184,19 @@
 
 -ifdef(use_specs).
 
--type(log_location() :: 'tty' | 'undefined' | string()).
 -type(file_suffix() :: binary()).
+%% this really should be an abstract type
+-type(log_location() :: 'tty' | 'undefined' | file:filename()).
 
 -spec(prepare/0 :: () -> 'ok').
 -spec(start/0 :: () -> 'ok').
 -spec(stop/0 :: () -> 'ok').
 -spec(stop_and_halt/0 :: () -> 'ok').
--spec(rotate_logs/1 :: (file_suffix()) -> 'ok' | {'error', any()}).
--spec(status/0 :: () ->
-             [{running_applications, [{atom(), string(), string()}]} |
-              {nodes, [erlang_node()]} |
-              {running_nodes, [erlang_node()]}]).
+-spec(rotate_logs/1 :: (file_suffix()) -> rabbit_types:ok_or_error(any())).
+-spec(status/0 ::
+        () -> [{running_applications, [{atom(), string(), string()}]} |
+               {nodes, [{rabbit_mnesia:node_type(), [node()]}]} |
+               {running_nodes, [node()]}]).
 -spec(log_location/1 :: ('sasl' | 'kernel') -> log_location()).
 
 -endif.
@@ -299,6 +301,18 @@ run_boot_step({StepName, Attributes}) ->
             ok
     end.
 
+module_attributes(Module) ->
+    case catch Module:module_info(attributes) of
+        {'EXIT', {undef, [{Module, module_info, _} | _]}} ->
+            io:format("WARNING: module ~p not found, so not scanned for boot steps.~n",
+                      [Module]),
+            [];
+        {'EXIT', Reason} ->
+            exit(Reason);
+        V ->
+            V
+    end.
+
 boot_steps() ->
     AllApps = [App || {App, _, _} <- application:loaded_applications()],
     Modules = lists:usort(
@@ -310,7 +324,7 @@ boot_steps() ->
         lists:flatmap(fun (Module) ->
                               [{StepName, Attributes}
                                || {rabbit_boot_step, [{StepName, Attributes}]}
-                                      <- Module:module_info(attributes)]
+                                      <- module_attributes(Module)]
                       end, Modules),
     sort_boot_steps(UnsortedSteps).
 
@@ -412,9 +426,9 @@ print_banner() ->
               "| ~s  +---+   |~n"
               "|                   |~n"
               "+-------------------+~n"
-              "AMQP ~p-~p~n~s~n~s~n~n",
+              "~s~n~s~n~s~n~n",
               [Product, string:right([$v|Version], ProductLen),
-               ?PROTOCOL_VERSION_MAJOR, ?PROTOCOL_VERSION_MINOR,
+               ?PROTOCOL_VERSION,
                ?COPYRIGHT_MESSAGE, ?INFORMATION_MESSAGE]),
     Settings = [{"node",           node()},
                 {"app descriptor", app_location()},
diff --git a/src/rabbit_access_control.erl b/src/rabbit_access_control.erl
index a445f441..3aaf5928 100644
--- a/src/rabbit_access_control.erl
+++ b/src/rabbit_access_control.erl
@@ -45,28 +45,38 @@
 
 -ifdef(use_specs).
 
+-export_type([username/0, password/0]).
+
 -type(permission_atom() :: 'configure' | 'read' | 'write').
+-type(username() :: binary()).
+-type(password() :: binary()).
+-type(regexp() :: binary()).
 
--spec(check_login/2 :: (binary(), binary()) -> user()).
--spec(user_pass_login/2 :: (username(), password()) -> user()).
--spec(check_vhost_access/2 :: (user(), vhost()) -> 'ok').
+-spec(check_login/2 :: (binary(), binary()) -> rabbit_types:user()).
+-spec(user_pass_login/2 :: (username(), password()) -> rabbit_types:user()).
+-spec(check_vhost_access/2 ::
+        (rabbit_types:user(), rabbit_types:vhost()) -> 'ok').
 -spec(check_resource_access/3 ::
-      (username(), r(atom()), permission_atom()) -> 'ok').
+        (username(), rabbit_types:r(atom()), permission_atom()) -> 'ok').
 -spec(add_user/2 :: (username(), password()) -> 'ok').
 -spec(delete_user/1 :: (username()) -> 'ok').
 -spec(change_password/2 :: (username(), password()) -> 'ok').
 -spec(list_users/0 :: () -> [username()]).
--spec(lookup_user/1 :: (username()) -> {'ok', user()} | not_found()).
--spec(add_vhost/1 :: (vhost()) -> 'ok').
--spec(delete_vhost/1 :: (vhost()) -> 'ok').
--spec(list_vhosts/0 :: () -> [vhost()]).
--spec(set_permissions/5 ::
-      (username(), vhost(), regexp(), regexp(), regexp()) -> 'ok').
--spec(clear_permissions/2 :: (username(), vhost()) -> 'ok').
+-spec(lookup_user/1 ::
+        (username()) -> rabbit_types:ok(rabbit_types:user())
+                            | rabbit_types:error('not_found')).
+-spec(add_vhost/1 :: (rabbit_types:vhost()) -> 'ok').
+-spec(delete_vhost/1 :: (rabbit_types:vhost()) -> 'ok').
+-spec(list_vhosts/0 :: () -> [rabbit_types:vhost()]).
+-spec(set_permissions/5 ::(username(), rabbit_types:vhost(), regexp(),
+                           regexp(), regexp()) -> 'ok').
+-spec(clear_permissions/2 :: (username(), rabbit_types:vhost()) -> 'ok').
 -spec(list_vhost_permissions/1 ::
-      (vhost()) -> [{username(), regexp(), regexp(), regexp()}]).
+        (rabbit_types:vhost())
+        -> [{username(), regexp(), regexp(), regexp()}]).
 -spec(list_user_permissions/1 ::
-      (username()) -> [{vhost(), regexp(), regexp(), regexp()}]).
+        (username())
+        -> [{rabbit_types:vhost(), regexp(), regexp(), regexp()}]).
 
 -endif.
 
@@ -162,11 +172,14 @@ check_resource_access(Username,
               [] ->
                   false;
               [#user_permission{permission = P}] ->
-                  case regexp:match(
-                         binary_to_list(Name),
-                         binary_to_list(element(permission_index(Permission), P))) of
-                      {match, _, _} -> true;
-                      nomatch       -> false
+                  PermRegexp = case element(permission_index(Permission), P) of
+                                   %% <<"^$">> breaks Emacs' erlang mode
+                                   <<"">> -> <<$^, $$>>;
+                                   RE     -> RE
+                               end,
+                  case re:run(Name, PermRegexp, [{capture, none}]) of
+                      match    -> true;
+                      nomatch  -> false
                   end
           end,
     if Res  -> ok;
@@ -291,7 +304,7 @@ list_vhosts() ->
 
 validate_regexp(RegexpBin) ->
     Regexp = binary_to_list(RegexpBin),
-    case regexp:parse(Regexp) of
+    case re:compile(Regexp) of
         {ok, _}         -> ok;
         {error, Reason} -> throw({error, {invalid_regexp, Regexp, Reason}})
     end.
diff --git a/src/rabbit_alarm.erl b/src/rabbit_alarm.erl
index 7e96d9a3..53c713e6 100644
--- a/src/rabbit_alarm.erl
+++ b/src/rabbit_alarm.erl
@@ -47,7 +47,7 @@
 -type(mfa_tuple() :: {atom(), atom(), list()}).
 -spec(start/0 :: () -> 'ok').
 -spec(stop/0 :: () -> 'ok').
--spec(register/2 :: (pid(), mfa_tuple()) -> 'ok').
+-spec(register/2 :: (pid(), mfa_tuple()) -> boolean()).
 
 -endif.
 
@@ -67,9 +67,9 @@ stop() ->
     ok = alarm_handler:delete_alarm_handler(?MODULE).
 
 register(Pid, HighMemMFA) ->
-    ok = gen_event:call(alarm_handler, ?MODULE,
-                        {register, Pid, HighMemMFA},
-                        infinity).
+    gen_event:call(alarm_handler, ?MODULE,
+                   {register, Pid, HighMemMFA},
+                   infinity).
 
 %%----------------------------------------------------------------------------
 
@@ -84,7 +84,8 @@ handle_call({register, Pid, {M, F, A} = HighMemMFA},
              false -> ok
          end,
     NewAlertees = dict:store(Pid, HighMemMFA, Alertess),
-    {ok, ok, State#alarms{alertees = NewAlertees}};
+    {ok, State#alarms.vm_memory_high_watermark,
+     State#alarms{alertees = NewAlertees}};
 
 handle_call(_Request, State) ->
     {ok, not_understood, State}.
diff --git a/src/rabbit_amqqueue.erl b/src/rabbit_amqqueue.erl
index 7b88c45d..870c119a 100644
--- a/src/rabbit_amqqueue.erl
+++ b/src/rabbit_amqqueue.erl
@@ -31,18 +31,18 @@
 
 -module(rabbit_amqqueue).
 
--export([start/0, declare/4, delete/3, purge/1]).
+-export([start/0, stop/0, declare/5, delete/3, purge/1]).
 -export([internal_declare/2, internal_delete/1,
          maybe_run_queue_via_backing_queue/2,
          update_ram_duration/1, set_ram_duration_target/2,
-         set_maximum_since_use/2]).
+         set_maximum_since_use/2, maybe_expire/1]).
 -export([pseudo_queue/2]).
--export([lookup/1, with/2, with_or_die/2,
-         stat/1, stat_all/0, deliver/2, requeue/3, ack/4]).
+-export([lookup/1, with/2, with_or_die/2, assert_equivalence/5,
+         check_exclusive_access/2, with_exclusive_access_or_die/3,
+         stat/1, deliver/2, requeue/3, ack/4, reject/4]).
 -export([list/1, info_keys/0, info/1, info/2, info_all/1, info_all/2]).
 -export([consumers/1, consumers_all/1]).
--export([claim_queue/2]).
--export([basic_get/3, basic_consume/8, basic_cancel/4]).
+-export([basic_get/3, basic_consume/7, basic_cancel/4]).
 -export([notify_sent/2, unblock/2, flush_all/2]).
 -export([commit_all/3, rollback_all/3, notify_down_all/2, limit_all/3]).
 -export([on_node_down/1]).
@@ -55,68 +55,103 @@
 -include("rabbit.hrl").
 -include_lib("stdlib/include/qlc.hrl").
 
+-define(EXPIRES_TYPE, long).
+
 %%----------------------------------------------------------------------------
 
 -ifdef(use_specs).
 
--type(qstats() :: {'ok', queue_name(), non_neg_integer(), non_neg_integer()}).
--type(qlen() :: {'ok', non_neg_integer()}).
--type(qfun(A) :: fun ((amqqueue()) -> A)).
+-export_type([name/0, qmsg/0]).
+
+-type(name() :: rabbit_types:r('queue')).
+
+-type(qlen() :: rabbit_types:ok(non_neg_integer())).
+-type(qfun(A) :: fun ((rabbit_types:amqqueue()) -> A)).
+-type(qmsg() :: {name(), pid(), msg_id(), boolean(), rabbit_types:message()}).
+-type(msg_id() :: non_neg_integer()).
 -type(ok_or_errors() ::
       'ok' | {'error', [{'error' | 'exit' | 'throw', any()}]}).
 
 -spec(start/0 :: () -> 'ok').
--spec(declare/4 :: (queue_name(), boolean(), boolean(), amqp_table()) ->
-             amqqueue()).
--spec(lookup/1 :: (queue_name()) -> {'ok', amqqueue()} | not_found()).
--spec(with/2 :: (queue_name(), qfun(A)) -> A | not_found()).
--spec(with_or_die/2 :: (queue_name(), qfun(A)) -> A).
--spec(list/1 :: (vhost()) -> [amqqueue()]).
--spec(info_keys/0 :: () -> [info_key()]).
--spec(info/1 :: (amqqueue()) -> [info()]).
--spec(info/2 :: (amqqueue(), [info_key()]) -> [info()]).
--spec(info_all/1 :: (vhost()) -> [[info()]]).
--spec(info_all/2 :: (vhost(), [info_key()]) -> [[info()]]).
--spec(consumers/1 :: (amqqueue()) -> [{pid(), ctag(), boolean()}]).
+-spec(stop/0 :: () -> 'ok').
+-spec(declare/5 ::
+        (name(), boolean(), boolean(),
+         rabbit_framing:amqp_table(), rabbit_types:maybe(pid()))
+        -> {'new' | 'existing', rabbit_types:amqqueue()}).
+-spec(lookup/1 ::
+        (name()) -> rabbit_types:ok(rabbit_types:amqqueue()) |
+                    rabbit_types:error('not_found')).
+-spec(with/2 :: (name(), qfun(A)) -> A | rabbit_types:error('not_found')).
+-spec(with_or_die/2 :: (name(), qfun(A)) -> A).
+-spec(assert_equivalence/5 ::
+        (rabbit_types:amqqueue(), boolean(), boolean(),
+         rabbit_framing:amqp_table(), rabbit_types:maybe(pid()))
+        -> 'ok' | no_return()).
+-spec(check_exclusive_access/2 :: (rabbit_types:amqqueue(), pid()) -> 'ok').
+-spec(with_exclusive_access_or_die/3 :: (name(), pid(), qfun(A)) -> A).
+-spec(list/1 :: (rabbit_types:vhost()) -> [rabbit_types:amqqueue()]).
+-spec(info_keys/0 :: () -> [rabbit_types:info_key()]).
+-spec(info/1 :: (rabbit_types:amqqueue()) -> [rabbit_types:info()]).
+-spec(info/2 ::
+        (rabbit_types:amqqueue(), [rabbit_types:info_key()])
+        -> [rabbit_types:info()]).
+-spec(info_all/1 :: (rabbit_types:vhost()) -> [[rabbit_types:info()]]).
+-spec(info_all/2 :: (rabbit_types:vhost(), [rabbit_types:info_key()])
+                    -> [[rabbit_types:info()]]).
+-spec(consumers/1 ::
+        (rabbit_types:amqqueue())
+        -> [{pid(), rabbit_types:ctag(), boolean()}]).
 -spec(consumers_all/1 ::
-      (vhost()) -> [{queue_name(), pid(), ctag(), boolean()}]).
--spec(stat/1 :: (amqqueue()) -> qstats()).
--spec(stat_all/0 :: () -> [qstats()]).
+        (rabbit_types:vhost())
+        -> [{name(), pid(), rabbit_types:ctag(), boolean()}]).
+-spec(stat/1 ::
+        (rabbit_types:amqqueue())
+        -> {'ok', non_neg_integer(), non_neg_integer()}).
 -spec(delete/3 ::
-      (amqqueue(), 'false', 'false') -> qlen();
-      (amqqueue(), 'true' , 'false') -> qlen() | {'error', 'in_use'};
-      (amqqueue(), 'false', 'true' ) -> qlen() | {'error', 'not_empty'};
-      (amqqueue(), 'true' , 'true' ) -> qlen() |
-                                            {'error', 'in_use'} |
-                                            {'error', 'not_empty'}).
--spec(purge/1 :: (amqqueue()) -> qlen()).
--spec(deliver/2 :: (pid(), delivery()) -> boolean()).
+      (rabbit_types:amqqueue(), 'false', 'false')
+        -> qlen();
+      (rabbit_types:amqqueue(), 'true' , 'false')
+        -> qlen() | rabbit_types:error('in_use');
+      (rabbit_types:amqqueue(), 'false', 'true' )
+        -> qlen() | rabbit_types:error('not_empty');
+      (rabbit_types:amqqueue(), 'true' , 'true' )
+        -> qlen() |
+           rabbit_types:error('in_use') |
+           rabbit_types:error('not_empty')).
+-spec(purge/1 :: (rabbit_types:amqqueue()) -> qlen()).
+-spec(deliver/2 :: (pid(), rabbit_types:delivery()) -> boolean()).
 -spec(requeue/3 :: (pid(), [msg_id()],  pid()) -> 'ok').
--spec(ack/4 :: (pid(), maybe(txn()), [msg_id()], pid()) -> 'ok').
--spec(commit_all/3 :: ([pid()], txn(), pid()) -> ok_or_errors()).
--spec(rollback_all/3 :: ([pid()], txn(), pid()) -> 'ok').
+-spec(ack/4 ::
+        (pid(), rabbit_types:maybe(rabbit_types:txn()), [msg_id()], pid())
+        -> 'ok').
+-spec(reject/4 :: (pid(), [msg_id()], boolean(), pid()) -> 'ok').
+-spec(commit_all/3 :: ([pid()], rabbit_types:txn(), pid()) -> ok_or_errors()).
+-spec(rollback_all/3 :: ([pid()], rabbit_types:txn(), pid()) -> 'ok').
 -spec(notify_down_all/2 :: ([pid()], pid()) -> ok_or_errors()).
 -spec(limit_all/3 :: ([pid()], pid(), pid() | 'undefined') -> ok_or_errors()).
--spec(claim_queue/2 :: (amqqueue(), pid()) -> 'ok' | 'locked').
--spec(basic_get/3 :: (amqqueue(), pid(), boolean()) ->
+-spec(basic_get/3 :: (rabbit_types:amqqueue(), pid(), boolean()) ->
              {'ok', non_neg_integer(), qmsg()} | 'empty').
--spec(basic_consume/8 ::
-      (amqqueue(), boolean(), pid(), pid(), pid() | 'undefined', ctag(),
-       boolean(), any()) ->
-             'ok' | {'error', 'queue_owned_by_another_connection' |
-                     'exclusive_consume_unavailable'}).
--spec(basic_cancel/4 :: (amqqueue(), pid(), ctag(), any()) -> 'ok').
+-spec(basic_consume/7 ::
+      (rabbit_types:amqqueue(), boolean(), pid(), pid() | 'undefined',
+       rabbit_types:ctag(), boolean(), any())
+        -> rabbit_types:ok_or_error('exclusive_consume_unavailable')).
+-spec(basic_cancel/4 ::
+        (rabbit_types:amqqueue(), pid(), rabbit_types:ctag(), any()) -> 'ok').
 -spec(notify_sent/2 :: (pid(), pid()) -> 'ok').
 -spec(unblock/2 :: (pid(), pid()) -> 'ok').
 -spec(flush_all/2 :: ([pid()], pid()) -> 'ok').
--spec(internal_declare/2 :: (amqqueue(), boolean()) -> amqqueue() | 'not_found').
--spec(internal_delete/1 :: (queue_name()) -> 'ok' | not_found()).
--spec(maybe_run_queue_via_backing_queue/2 :: (pid(), (fun ((A) -> A))) -> 'ok').
+-spec(internal_declare/2 ::
+        (rabbit_types:amqqueue(), boolean())
+        -> rabbit_types:amqqueue() | 'not_found').
+-spec(internal_delete/1 :: (name()) -> rabbit_types:ok_or_error('not_found')).
+-spec(maybe_run_queue_via_backing_queue/2 ::
+        (pid(), (fun ((A) -> A))) -> 'ok').
 -spec(update_ram_duration/1 :: (pid()) -> 'ok').
--spec(set_ram_duration_target/2 :: (pid(), number()) -> 'ok').
+-spec(set_ram_duration_target/2 :: (pid(), number() | 'infinity') -> 'ok').
 -spec(set_maximum_since_use/2 :: (pid(), non_neg_integer()) -> 'ok').
--spec(on_node_down/1 :: (erlang_node()) -> 'ok').
--spec(pseudo_queue/2 :: (binary(), pid()) -> amqqueue()).
+-spec(maybe_expire/1 :: (pid()) -> 'ok').
+-spec(on_node_down/1 :: (node()) -> 'ok').
+-spec(pseudo_queue/2 :: (binary(), pid()) -> rabbit_types:amqqueue()).
 
 -endif.
 
@@ -124,7 +159,7 @@
 
 start() ->
     DurableQueues = find_durable_queues(),
-    {ok, BQ} = application:get_env(backing_queue_module),
+    {ok, BQ} = application:get_env(rabbit, backing_queue_module),
     ok = BQ:start([QName || #amqqueue{name = QName} <- DurableQueues]),
     {ok,_} = supervisor:start_child(
                rabbit_sup,
@@ -134,6 +169,12 @@ start() ->
     _RealDurableQueues = recover_durable_queues(DurableQueues),
     ok.
 
+stop() ->
+    ok = supervisor:terminate_child(rabbit_sup, rabbit_amqqueue_sup),
+    ok = supervisor:delete_child(rabbit_sup, rabbit_amqqueue_sup),
+    {ok, BQ} = application:get_env(rabbit, backing_queue_module),
+    ok = BQ:stop().
+
 find_durable_queues() ->
     Node = node(),
     %% TODO: use dirty ops instead
@@ -148,11 +189,13 @@ recover_durable_queues(DurableQueues) ->
     Qs = [start_queue_process(Q) || Q <- DurableQueues],
     [Q || Q <- Qs, gen_server2:call(Q#amqqueue.pid, {init, true}) == Q].
 
-declare(QueueName, Durable, AutoDelete, Args) ->
+declare(QueueName, Durable, AutoDelete, Args, Owner) ->
+    ok = check_declare_arguments(QueueName, Args),
     Q = start_queue_process(#amqqueue{name = QueueName,
                                       durable = Durable,
                                       auto_delete = AutoDelete,
                                       arguments = Args,
+                                      exclusive_owner = Owner,
                                       pid = none}),
     case gen_server2:call(Q#amqqueue.pid, {init, false}) of
         not_found -> rabbit_misc:not_found(QueueName);
@@ -197,7 +240,8 @@ start_queue_process(Q) ->
 add_default_binding(#amqqueue{name = QueueName}) ->
     Exchange = rabbit_misc:r(QueueName, exchange, <<>>),
     RoutingKey = QueueName#resource.name,
-    rabbit_exchange:add_binding(Exchange, QueueName, RoutingKey, []),
+    rabbit_exchange:add_binding(Exchange, QueueName, RoutingKey, [],
+                                fun (_X, _Q) -> ok end),
     ok.
 
 lookup(Name) ->
@@ -214,6 +258,59 @@ with(Name, F) ->
 with_or_die(Name, F) ->
     with(Name, F, fun () -> rabbit_misc:not_found(Name) end).
 
+assert_equivalence(#amqqueue{durable     = Durable,
+                             auto_delete = AutoDelete} = Q,
+                   Durable, AutoDelete, RequiredArgs, Owner) ->
+    assert_args_equivalence(Q, RequiredArgs),
+    check_exclusive_access(Q, Owner, strict);
+assert_equivalence(#amqqueue{name = QueueName},
+                   _Durable, _AutoDelete, _RequiredArgs, _Owner) ->
+    rabbit_misc:protocol_error(
+      not_allowed, "parameters for ~s not equivalent",
+      [rabbit_misc:rs(QueueName)]).
+
+check_exclusive_access(Q, Owner) -> check_exclusive_access(Q, Owner, lax).
+
+check_exclusive_access(#amqqueue{exclusive_owner = Owner}, Owner, _MatchType) ->
+    ok;
+check_exclusive_access(#amqqueue{exclusive_owner = none}, _ReaderPid, lax) ->
+    ok;
+check_exclusive_access(#amqqueue{name = QueueName}, _ReaderPid, _MatchType) ->
+    rabbit_misc:protocol_error(
+      resource_locked,
+      "cannot obtain exclusive access to locked ~s",
+      [rabbit_misc:rs(QueueName)]).
+
+with_exclusive_access_or_die(Name, ReaderPid, F) ->
+    with_or_die(Name,
+                fun (Q) -> check_exclusive_access(Q, ReaderPid), F(Q) end).
+
+assert_args_equivalence(#amqqueue{name = QueueName, arguments = Args},
+                       RequiredArgs) ->
+    rabbit_misc:assert_args_equivalence(Args, RequiredArgs, QueueName,
+                                        [<<"x-expires">>]).
+
+check_declare_arguments(QueueName, Args) ->
+    [case Fun(rabbit_misc:table_lookup(Args, Key)) of
+         ok             -> ok;
+         {error, Error} -> rabbit_misc:protocol_error(
+                             precondition_failed,
+                             "Invalid arguments in declaration of queue ~s: "
+                             "~w (on argument: ~w)",
+                             [rabbit_misc:rs(QueueName), Error, Key])
+     end || {Key, Fun} <- [{<<"x-expires">>, fun check_expires_argument/1}]],
+    ok.
+
+check_expires_argument(undefined) ->
+    ok;
+check_expires_argument({?EXPIRES_TYPE, Expires})
+  when is_integer(Expires) andalso Expires > 0 ->
+    ok;
+check_expires_argument({?EXPIRES_TYPE, _Expires}) ->
+    {error, expires_zero_or_less};
+check_expires_argument(_) ->
+    {error, expires_not_of_type_long}.
+
 list(VHostPath) ->
     mnesia:dirty_match_object(
       rabbit_queue,
@@ -248,9 +345,6 @@ consumers_all(VHostPath) ->
 
 stat(#amqqueue{pid = QPid}) -> delegate_call(QPid, stat, infinity).
 
-stat_all() ->
-    lists:map(fun stat/1, rabbit_misc:dirty_read_all(rabbit_queue)).
-
 delete(#amqqueue{ pid = QPid }, IfUnused, IfEmpty) ->
     delegate_call(QPid, {delete, IfUnused, IfEmpty}, infinity).
 
@@ -269,11 +363,14 @@ deliver(QPid, #delivery{txn = Txn, sender = ChPid, message = Message}) ->
     true.
 
 requeue(QPid, MsgIds, ChPid) ->
-    delegate_cast(QPid, {requeue, MsgIds, ChPid}).
+    delegate_call(QPid, {requeue, MsgIds, ChPid}, infinity).
 
 ack(QPid, Txn, MsgIds, ChPid) ->
     delegate_pcast(QPid, 7, {ack, Txn, MsgIds, ChPid}).
 
+reject(QPid, MsgIds, Requeue, ChPid) ->
+    delegate_pcast(QPid, 7, {reject, MsgIds, Requeue, ChPid}).
+
 commit_all(QPids, Txn, ChPid) ->
     safe_delegate_call_ok(
       fun (QPid) -> exit({queue_disappeared, QPid}) end,
@@ -298,15 +395,12 @@ limit_all(QPids, ChPid, LimiterPid) ->
                      gen_server2:cast(QPid, {limit, ChPid, LimiterPid})
              end).
 
-claim_queue(#amqqueue{pid = QPid}, ReaderPid) ->
-    delegate_call(QPid, {claim_queue, ReaderPid}, infinity).
-
 basic_get(#amqqueue{pid = QPid}, ChPid, NoAck) ->
     delegate_call(QPid, {basic_get, ChPid, NoAck}, infinity).
 
-basic_consume(#amqqueue{pid = QPid}, NoAck, ReaderPid, ChPid, LimiterPid,
+basic_consume(#amqqueue{pid = QPid}, NoAck, ChPid, LimiterPid,
               ConsumerTag, ExclusiveConsume, OkMsg) ->
-    delegate_call(QPid, {basic_consume, NoAck, ReaderPid, ChPid,
+    delegate_call(QPid, {basic_consume, NoAck, ChPid,
                          LimiterPid, ConsumerTag, ExclusiveConsume, OkMsg},
                   infinity).
 
@@ -324,19 +418,21 @@ flush_all(QPids, ChPid) ->
     delegate:invoke_no_result(
       QPids, fun (QPid) -> gen_server2:cast(QPid, {flush, ChPid}) end).
 
+internal_delete1(QueueName) ->
+    ok = mnesia:delete({rabbit_queue, QueueName}),
+    ok = mnesia:delete({rabbit_durable_queue, QueueName}),
+    %% we want to execute some things, as
+    %% decided by rabbit_exchange, after the
+    %% transaction.
+    rabbit_exchange:delete_queue_bindings(QueueName).
+
 internal_delete(QueueName) ->
     case
         rabbit_misc:execute_mnesia_transaction(
           fun () ->
                   case mnesia:wread({rabbit_queue, QueueName}) of
                       []  -> {error, not_found};
-                      [_] ->
-                          ok = mnesia:delete({rabbit_queue, QueueName}),
-                          ok = mnesia:delete({rabbit_durable_queue, QueueName}),
-                          %% we want to execute some things, as
-                          %% decided by rabbit_exchange, after the
-                          %% transaction.
-                          rabbit_exchange:delete_queue_bindings(QueueName)
+                      [_] -> internal_delete1(QueueName)
                   end
           end) of
         Err = {error, _} -> Err;
@@ -358,6 +454,9 @@ set_ram_duration_target(QPid, Duration) ->
 set_maximum_since_use(QPid, Age) ->
     gen_server2:pcast(QPid, 8, {set_maximum_since_use, Age}).
 
+maybe_expire(QPid) ->
+    gen_server2:pcast(QPid, 8, maybe_expire).
+
 on_node_down(Node) ->
     [Hook() ||
         Hook <- rabbit_misc:execute_mnesia_transaction(
@@ -394,15 +493,13 @@ safe_delegate_call_ok(H, F, Pids) ->
     end.
 
 delegate_call(Pid, Msg, Timeout) ->
-    delegate:invoke(Pid, fun(P) -> gen_server2:call(P, Msg, Timeout) end).
+    delegate:invoke(Pid, fun (P) -> gen_server2:call(P, Msg, Timeout) end).
 
 delegate_pcall(Pid, Pri, Msg, Timeout) ->
-    delegate:invoke(Pid, fun(P) -> gen_server2:pcall(P, Pri, Msg, Timeout) end).
-
-delegate_cast(Pid, Msg) ->
-    delegate:invoke_no_result(Pid, fun(P) -> gen_server2:cast(P, Msg) end).
+    delegate:invoke(Pid,
+                    fun (P) -> gen_server2:pcall(P, Pri, Msg, Timeout) end).
 
 delegate_pcast(Pid, Pri, Msg) ->
     delegate:invoke_no_result(Pid,
-                              fun(P) -> gen_server2:pcast(P, Pri, Msg) end).
+                              fun (P) -> gen_server2:pcast(P, Pri, Msg) end).
 
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
index f12e1b70..ac5fb7f9 100644
--- a/src/rabbit_amqqueue_process.erl
+++ b/src/rabbit_amqqueue_process.erl
@@ -35,7 +35,7 @@
 
 -behaviour(gen_server2).
 
--define(UNSENT_MESSAGE_LIMIT,        100).
+-define(UNSENT_MESSAGE_LIMIT,          100).
 -define(SYNC_INTERVAL,                 5). %% milliseconds
 -define(RAM_DURATION_UPDATE_INTERVAL,  5000).
 
@@ -50,15 +50,16 @@
 
 % Queue's state
 -record(q, {q,
-            owner,
             exclusive_consumer,
             has_had_consumers,
             backing_queue,
             backing_queue_state,
             active_consumers,
             blocked_consumers,
+            expires,
             sync_timer_ref,
-            rate_timer_ref
+            rate_timer_ref,
+            expiry_timer_ref
            }).
 
 -record(consumer, {tag, ack_required}).
@@ -103,16 +104,17 @@ init(Q) ->
     process_flag(trap_exit, true),
     {ok, BQ} = application:get_env(backing_queue_module),
 
-    {ok, #q{q = Q#amqqueue{pid = self()},
-            owner = none,
-            exclusive_consumer = none,
-            has_had_consumers = false,
-            backing_queue = BQ,
+    {ok, #q{q                   = Q#amqqueue{pid = self()},
+            exclusive_consumer  = none,
+            has_had_consumers   = false,
+            backing_queue       = BQ,
             backing_queue_state = undefined,
-            active_consumers = queue:new(),
-            blocked_consumers = queue:new(),
-            sync_timer_ref = undefined,
-            rate_timer_ref = undefined}, hibernate,
+            active_consumers    = queue:new(),
+            blocked_consumers   = queue:new(),
+            expires             = undefined,
+            sync_timer_ref      = undefined,
+            rate_timer_ref      = undefined,
+            expiry_timer_ref    = undefined}, hibernate,
      {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
 
 terminate(shutdown,      State = #q{backing_queue = BQ}) ->
@@ -134,6 +136,29 @@ code_change(_OldVsn, State, _Extra) ->
 
 %%----------------------------------------------------------------------------
 
+init_expires(State = #q{q = #amqqueue{arguments = Arguments}}) ->
+    case rabbit_misc:table_lookup(Arguments, <<"x-expires">>) of
+        {long, Expires} -> ensure_expiry_timer(State#q{expires = Expires});
+        undefined       -> State
+    end.
+
+declare(Recover, From,
+        State = #q{q = Q = #amqqueue{name = QName, durable = IsDurable},
+                   backing_queue = BQ, backing_queue_state = undefined}) ->
+    case rabbit_amqqueue:internal_declare(Q, Recover) of
+        not_found -> {stop, normal, not_found, State};
+        Q         -> gen_server2:reply(From, {new, Q}),
+                     ok = file_handle_cache:register_callback(
+                            rabbit_amqqueue, set_maximum_since_use,
+                            [self()]),
+                     ok = rabbit_memory_monitor:register(
+                            self(), {rabbit_amqqueue,
+                                     set_ram_duration_target, [self()]}),
+                     BQS = BQ:init(QName, IsDurable, Recover),
+                     noreply(init_expires(State#q{backing_queue_state = BQS}));
+        Q1        -> {stop, normal, {existing, Q1}, State}
+    end.
+
 terminate_shutdown(Fun, State) ->
     State1 = #q{backing_queue = BQ, backing_queue_state = BQS} =
         stop_sync_timer(stop_rate_timer(State)),
@@ -164,7 +189,7 @@ noreply(NewState) ->
 next_state(State) ->
     State1 = #q{backing_queue = BQ, backing_queue_state = BQS} =
         ensure_rate_timer(State),
-    case BQ:needs_sync(BQS)of
+    case BQ:needs_idle_timeout(BQS)of
         true  -> {ensure_sync_timer(State1), 0};
         false -> {stop_sync_timer(State1), hibernate}
     end.
@@ -173,7 +198,7 @@ ensure_sync_timer(State = #q{sync_timer_ref = undefined, backing_queue = BQ}) ->
     {ok, TRef} = timer:apply_after(
                    ?SYNC_INTERVAL,
                    rabbit_amqqueue, maybe_run_queue_via_backing_queue,
-                   [self(), fun (BQS) -> BQ:sync(BQS) end]),
+                   [self(), fun (BQS) -> BQ:idle_timeout(BQS) end]),
     State#q{sync_timer_ref = TRef};
 ensure_sync_timer(State) ->
     State.
@@ -203,6 +228,27 @@ stop_rate_timer(State = #q{rate_timer_ref = TRef}) ->
     {ok, cancel} = timer:cancel(TRef),
     State#q{rate_timer_ref = undefined}.
 
+stop_expiry_timer(State = #q{expiry_timer_ref = undefined}) ->
+    State;
+stop_expiry_timer(State = #q{expiry_timer_ref = TRef}) ->
+    {ok, cancel} = timer:cancel(TRef),
+    State#q{expiry_timer_ref = undefined}.
+
+%% We only wish to expire where there are no consumers *and* when
+%% basic.get hasn't been called for the configured period.
+ensure_expiry_timer(State = #q{expires = undefined}) ->
+    State;
+ensure_expiry_timer(State = #q{expires = Expires}) ->
+    case is_unused(State) of
+        true ->
+            NewState = stop_expiry_timer(State),
+            {ok, TRef} = timer:apply_after(
+                           Expires, rabbit_amqqueue, maybe_expire, [self()]),
+            NewState#q{expiry_timer_ref = TRef};
+        false ->
+            State
+    end.
+
 assert_invariant(#q{active_consumers = AC,
                     backing_queue = BQ, backing_queue_state = BQS}) ->
     true = (queue:is_empty(AC) orelse BQ:is_empty(BQS)).
@@ -424,7 +470,8 @@ handle_ch_down(DownPid, State = #q{exclusive_consumer = Holder}) ->
                                       _    -> rollback_transaction(Txn, ChPid,
                                                                    State1)
                                   end,
-                         {ok, requeue_and_run(sets:to_list(ChAckTags), State2)}
+                         {ok, requeue_and_run(sets:to_list(ChAckTags),
+                                              ensure_expiry_timer(State2))}
             end
     end.
 
@@ -433,10 +480,6 @@ cancel_holder(ChPid, ConsumerTag, {ChPid, ConsumerTag}) ->
 cancel_holder(_ChPid, _ConsumerTag, Holder) ->
     Holder.
 
-check_queue_owner(none,           _)         -> ok;
-check_queue_owner({ReaderPid, _}, ReaderPid) -> ok;
-check_queue_owner({_,         _}, _)         -> mismatch.
-
 check_exclusive_access({_ChPid, _ConsumerTag}, _ExclusiveConsume, _State) ->
     in_use;
 check_exclusive_access(none, false, _State) ->
@@ -488,10 +531,10 @@ i(auto_delete, #q{q = #amqqueue{auto_delete = AutoDelete}}) -> AutoDelete;
 i(arguments,   #q{q = #amqqueue{arguments   = Arguments}})  -> Arguments;
 i(pid, _) ->
     self();
-i(owner_pid, #q{owner = none}) ->
+i(owner_pid, #q{q = #amqqueue{exclusive_owner = none}}) ->
     '';
-i(owner_pid, #q{owner = {ReaderPid, _MonitorRef}}) ->
-    ReaderPid;
+i(owner_pid, #q{q = #amqqueue{exclusive_owner = ExclusiveOwner}}) ->
+    ExclusiveOwner;
 i(exclusive_consumer_pid, #q{exclusive_consumer = none}) ->
     '';
 i(exclusive_consumer_pid, #q{exclusive_consumer = {ChPid, _ConsumerTag}}) ->
@@ -520,25 +563,24 @@ i(Item, _) ->
 %---------------------------------------------------------------------------
 
 handle_call({init, Recover}, From,
-            State = #q{q = Q = #amqqueue{name = QName, durable = IsDurable},
-                       backing_queue = BQ, backing_queue_state = undefined}) ->
-    %% TODO: If we're exclusively owned && our owner isn't alive &&
-    %% Recover then we should BQ:init and then {stop, normal,
-    %% not_found, State}, relying on terminate to delete the queue.
-    case rabbit_amqqueue:internal_declare(Q, Recover) of
-        not_found ->
-            {stop, normal, not_found, State};
-        Q ->
-            gen_server2:reply(From, Q),
-            ok = file_handle_cache:register_callback(
-                   rabbit_amqqueue, set_maximum_since_use, [self()]),
-            ok = rabbit_memory_monitor:register(
-                   self(),
-                   {rabbit_amqqueue, set_ram_duration_target, [self()]}),
-            noreply(State#q{backing_queue_state =
-                                BQ:init(QName, IsDurable, Recover)});
-        Q1 ->
-            {stop, normal, Q1, State}
+            State = #q{q = #amqqueue{exclusive_owner = none}}) ->
+    declare(Recover, From, State);
+
+handle_call({init, Recover}, From,
+            State = #q{q = #amqqueue{exclusive_owner = Owner}}) ->
+    case rpc:call(node(Owner), erlang, is_process_alive, [Owner]) of
+        true -> erlang:monitor(process, Owner),
+                declare(Recover, From, State);
+        _    -> #q{q = #amqqueue{name = QName, durable = IsDurable},
+                   backing_queue = BQ, backing_queue_state = undefined} = State,
+                case Recover of
+                    true -> ok;
+                    _    -> rabbit_log:warning(
+                              "Queue ~p exclusive owner went away~n", [QName])
+                end,
+                BQS = BQ:init(QName, IsDurable, Recover),
+                %% Rely on terminate to delete the queue.
+                {stop, normal, not_found, State#q{backing_queue_state = BQS}}
     end;
 
 handle_call(info, _From, State) ->
@@ -600,8 +642,9 @@ handle_call({basic_get, ChPid, NoAck}, _From,
             State = #q{q = #amqqueue{name = QName},
                        backing_queue_state = BQS, backing_queue = BQ}) ->
     AckRequired = not NoAck,
+    State1 = ensure_expiry_timer(State),
     case BQ:fetch(AckRequired, BQS) of
-        {empty, BQS1} -> reply(empty, State#q{backing_queue_state = BQS1});
+        {empty, BQS1} -> reply(empty, State1#q{backing_queue_state = BQS1});
         {{Message, IsDelivered, AckTag, Remaining}, BQS1} ->
             case AckRequired of
                 true ->  C = #cr{acktags = ChAckTags} = ch_record(ChPid),
@@ -610,54 +653,47 @@ handle_call({basic_get, ChPid, NoAck}, _From,
                 false -> ok
             end,
             Msg = {QName, self(), AckTag, IsDelivered, Message},
-            reply({ok, Remaining, Msg}, State#q{backing_queue_state = BQS1})
+            reply({ok, Remaining, Msg}, State1#q{backing_queue_state = BQS1})
     end;
 
-handle_call({basic_consume, NoAck, ReaderPid, ChPid, LimiterPid,
+handle_call({basic_consume, NoAck, ChPid, LimiterPid,
              ConsumerTag, ExclusiveConsume, OkMsg},
-            _From, State = #q{owner = Owner,
-                              exclusive_consumer = ExistingHolder}) ->
-    case check_queue_owner(Owner, ReaderPid) of
-        mismatch ->
-            reply({error, queue_owned_by_another_connection}, State);
+            _From, State = #q{exclusive_consumer = ExistingHolder}) ->
+    case check_exclusive_access(ExistingHolder, ExclusiveConsume,
+                                State) of
+        in_use ->
+            reply({error, exclusive_consume_unavailable}, State);
         ok ->
-            case check_exclusive_access(ExistingHolder, ExclusiveConsume,
-                                        State) of
-                in_use ->
-                    reply({error, exclusive_consume_unavailable}, State);
-                ok ->
-                    C = #cr{consumer_count = ConsumerCount} = ch_record(ChPid),
-                    Consumer = #consumer{tag = ConsumerTag,
-                                         ack_required = not NoAck},
-                    store_ch_record(C#cr{consumer_count = ConsumerCount +1,
-                                         limiter_pid = LimiterPid}),
-                    case ConsumerCount of
-                        0 -> ok = rabbit_limiter:register(LimiterPid, self());
-                        _ -> ok
-                    end,
-                    ExclusiveConsumer = case ExclusiveConsume of
-                                            true  -> {ChPid, ConsumerTag};
-                                            false -> ExistingHolder
-                                        end,
-                    State1 = State#q{has_had_consumers = true,
-                                     exclusive_consumer = ExclusiveConsumer},
-                    ok = maybe_send_reply(ChPid, OkMsg),
-                    State2 =
-                        case is_ch_blocked(C) of
-                            true  -> State1#q{
-                                       blocked_consumers =
-                                       add_consumer(
-                                         ChPid, Consumer,
-                                         State1#q.blocked_consumers)};
-                            false -> run_message_queue(
-                                       State1#q{
-                                         active_consumers =
-                                         add_consumer(
-                                           ChPid, Consumer,
-                                           State1#q.active_consumers)})
-                        end,
-                    reply(ok, State2)
-            end
+            C = #cr{consumer_count = ConsumerCount} = ch_record(ChPid),
+            Consumer = #consumer{tag = ConsumerTag,
+                                 ack_required = not NoAck},
+            store_ch_record(C#cr{consumer_count = ConsumerCount +1,
+                                 limiter_pid = LimiterPid}),
+            ok = case ConsumerCount of
+                     0 -> rabbit_limiter:register(LimiterPid, self());
+                     _ -> ok
+                 end,
+            ExclusiveConsumer = if ExclusiveConsume -> {ChPid, ConsumerTag};
+                                   true             -> ExistingHolder
+                                end,
+            State1 = State#q{has_had_consumers = true,
+                             exclusive_consumer = ExclusiveConsumer},
+            ok = maybe_send_reply(ChPid, OkMsg),
+            State2 =
+                case is_ch_blocked(C) of
+                    true  -> State1#q{
+                               blocked_consumers =
+                               add_consumer(
+                                 ChPid, Consumer,
+                                 State1#q.blocked_consumers)};
+                    false -> run_message_queue(
+                               State1#q{
+                                 active_consumers =
+                                 add_consumer(
+                                   ChPid, Consumer,
+                                   State1#q.active_consumers)})
+                end,
+            reply(ok, State2)
     end;
 
 handle_call({basic_cancel, ChPid, ConsumerTag, OkMsg}, _From,
@@ -684,16 +720,15 @@ handle_call({basic_cancel, ChPid, ConsumerTag, OkMsg}, _From,
                                               ChPid, ConsumerTag,
                                               State#q.blocked_consumers)},
             case should_auto_delete(NewState) of
-                false -> reply(ok, NewState);
+                false -> reply(ok, ensure_expiry_timer(NewState));
                 true  -> {stop, normal, ok, NewState}
             end
     end;
 
-handle_call(stat, _From, State = #q{q = #amqqueue{name = Name},
-                                    backing_queue = BQ,
+handle_call(stat, _From, State = #q{backing_queue = BQ,
                                     backing_queue_state = BQS,
                                     active_consumers = ActiveConsumers}) ->
-    reply({ok, Name, BQ:len(BQS), queue:len(ActiveConsumers)}, State);
+    reply({ok, BQ:len(BQS), queue:len(ActiveConsumers)}, State);
 
 handle_call({delete, IfUnused, IfEmpty}, _From,
             State = #q{backing_queue_state = BQS, backing_queue = BQ}) ->
@@ -713,27 +748,15 @@ handle_call(purge, _From, State = #q{backing_queue = BQ,
     {Count, BQS1} = BQ:purge(BQS),
     reply({ok, Count}, State#q{backing_queue_state = BQS1});
 
-handle_call({claim_queue, ReaderPid}, _From,
-            State = #q{owner = Owner, exclusive_consumer = Holder}) ->
-    case Owner of
-        none ->
-            case check_exclusive_access(Holder, true, State) of
-                in_use ->
-                    %% FIXME: Is this really the right answer? What if
-                    %% an active consumer's reader is actually the
-                    %% claiming pid? Should that be allowed? In order
-                    %% to check, we'd need to hold not just the ch
-                    %% pid for each consumer, but also its reader
-                    %% pid...
-                    reply(locked, State);
-                ok ->
-                    MonitorRef = erlang:monitor(process, ReaderPid),
-                    reply(ok, State#q{owner = {ReaderPid, MonitorRef}})
-            end;
-        {ReaderPid, _MonitorRef} ->
-            reply(ok, State);
-        _ ->
-            reply(locked, State)
+handle_call({requeue, AckTags, ChPid}, From, State) ->
+    gen_server2:reply(From, ok),
+    case lookup_ch(ChPid) of
+        not_found ->
+            noreply(State);
+        C = #cr{acktags = ChAckTags} ->
+            ChAckTags1 = subtract_acks(ChAckTags, AckTags),
+            store_ch_record(C#cr{acktags = ChAckTags1}),
+            noreply(requeue_and_run(AckTags, State))
     end;
 
 handle_call({maybe_run_queue_via_backing_queue, Fun}, _From, State) ->
@@ -757,24 +780,27 @@ handle_cast({ack, Txn, AckTags, ChPid},
                     _    -> {C#cr{txn = Txn}, BQ:tx_ack(Txn, AckTags, BQS)}
                 end,
             store_ch_record(C1),
-            noreply(State #q { backing_queue_state = BQS1 })
+            noreply(State#q{backing_queue_state = BQS1})
     end;
 
-handle_cast({rollback, Txn, ChPid}, State) ->
-    noreply(rollback_transaction(Txn, ChPid, State));
-
-handle_cast({requeue, AckTags, ChPid}, State) ->
+handle_cast({reject, AckTags, Requeue, ChPid},
+            State = #q{backing_queue = BQ, backing_queue_state = BQS}) ->
     case lookup_ch(ChPid) of
         not_found ->
-            rabbit_log:warning("Ignoring requeue from unknown ch: ~p~n",
-                               [ChPid]),
             noreply(State);
         C = #cr{acktags = ChAckTags} ->
             ChAckTags1 = subtract_acks(ChAckTags, AckTags),
             store_ch_record(C#cr{acktags = ChAckTags1}),
-            noreply(requeue_and_run(AckTags, State))
+            noreply(case Requeue of
+                        true  -> requeue_and_run(AckTags, State);
+                        false -> BQS1 = BQ:ack(AckTags, BQS),
+                                 State #q { backing_queue_state = BQS1 }
+                    end)
     end;
 
+handle_cast({rollback, Txn, ChPid}, State) ->
+    noreply(rollback_transaction(Txn, ChPid, State));
+
 handle_cast({unblock, ChPid}, State) ->
     noreply(
       possibly_unblock(State, ChPid,
@@ -823,21 +849,24 @@ handle_cast({set_ram_duration_target, Duration},
 
 handle_cast({set_maximum_since_use, Age}, State) ->
     ok = file_handle_cache:set_maximum_since_use(Age),
-    noreply(State).
-
-handle_info({'DOWN', MonitorRef, process, DownPid, _Reason},
-            State = #q{owner = {DownPid, MonitorRef}}) ->
-    %% We know here that there are no consumers on this queue that are
-    %% owned by other pids than the one that just went down, so since
-    %% exclusive in some sense implies autodelete, we delete the queue
-    %% here. The other way of implementing the "exclusive implies
-    %% autodelete" feature is to actually set autodelete when an
-    %% exclusive declaration is seen, but this has the problem that
-    %% the python tests rely on the queue not going away after a
-    %% basic.cancel when the queue was declared exclusive and
-    %% nonautodelete.
-    NewState = State#q{owner = none},
-    {stop, normal, NewState};
+    noreply(State);
+
+handle_cast(maybe_expire, State) ->
+    case is_unused(State) of
+        true  -> ?LOGDEBUG("Queue lease expired for ~p~n", [State#q.q]),
+                 {stop, normal, State};
+        false -> noreply(ensure_expiry_timer(State))
+    end.
+
+handle_info({'DOWN', _MonitorRef, process, DownPid, _Reason},
+            State = #q{q = #amqqueue{exclusive_owner = DownPid}}) ->
+    %% Exclusively owned queues must disappear with their owner.  In
+    %% the case of clean shutdown we delete the queue synchronously in
+    %% the reader - although not required by the spec this seems to
+    %% match what people expect (see bug 21824). However we need this
+    %% monitor-and-async- delete in case the connection goes away
+    %% unexpectedly.
+    {stop, normal, State};
 handle_info({'DOWN', _MonitorRef, process, DownPid, _Reason}, State) ->
     case handle_ch_down(DownPid, State) of
         {ok, NewState}   -> noreply(NewState);
@@ -846,7 +875,7 @@ handle_info({'DOWN', _MonitorRef, process, DownPid, _Reason}, State) ->
 
 handle_info(timeout, State = #q{backing_queue = BQ}) ->
     noreply(maybe_run_queue_via_backing_queue(
-              fun (BQS) -> BQ:sync(BQS) end, State));
+              fun (BQS) -> BQ:idle_timeout(BQS) end, State));
 
 handle_info({'EXIT', _Pid, Reason}, State) ->
     {stop, Reason, State};
diff --git a/src/rabbit_backing_queue.erl b/src/rabbit_backing_queue.erl
index 2dba00ad..2230c507 100644
--- a/src/rabbit_backing_queue.erl
+++ b/src/rabbit_backing_queue.erl
@@ -18,11 +18,11 @@
 %%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
 %%   Technologies LLC, and Rabbit Technologies Ltd.
 %%
-%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift
 %%   Ltd. Portions created by Cohesive Financial Technologies LLC are
-%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
+%%   Copyright (C) 2007-2010 Cohesive Financial Technologies
 %%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
-%%   (C) 2007-2009 Rabbit Technologies Ltd.
+%%   (C) 2007-2010 Rabbit Technologies Ltd.
 %%
 %%   All Rights Reserved.
 %%
@@ -42,6 +42,11 @@ behaviour_info(callbacks) ->
      %% shared resources.
      {start, 1},
 
+     %% Called to tear down any state/resources. NB: Implementations
+     %% should not depend on this function being called on shutdown
+     %% and instead should hook into the rabbit supervision hierarchy.
+     {stop, 0},
+
      %% Initialise the backing queue and its state.
      {init, 3},
 
@@ -113,14 +118,15 @@ behaviour_info(callbacks) ->
      %% queue.
      {ram_duration, 1},
 
-     %% Should 'sync' be called as soon as the queue process can
-     %% manage (either on an empty mailbox, or when a timer fires)?
-     {needs_sync, 1},
+     %% Should 'idle_timeout' be called as soon as the queue process
+     %% can manage (either on an empty mailbox, or when a timer
+     %% fires)?
+     {needs_idle_timeout, 1},
 
-     %% Called (eventually) after needs_sync returns 'true'. Note this
-     %% may be called more than once for each 'true' returned from
-     %% needs_sync.
-     {sync, 1},
+     %% Called (eventually) after needs_idle_timeout returns
+     %% 'true'. Note this may be called more than once for each 'true'
+     %% returned from needs_idle_timeout.
+     {idle_timeout, 1},
 
      %% Called immediately before the queue hibernates.
      {handle_pre_hibernate, 1},
diff --git a/src/rabbit_basic.erl b/src/rabbit_basic.erl
index 4ab7a2a0..c76c01ac 100644
--- a/src/rabbit_basic.erl
+++ b/src/rabbit_basic.erl
@@ -42,24 +42,41 @@
 
 -ifdef(use_specs).
 
--type(properties_input() :: (amqp_properties() | [{atom(), any()}])).
--type(publish_result() :: ({ok, routing_result(), [pid()]} | not_found())).
-
--spec(publish/1 :: (delivery()) -> publish_result()).
--spec(delivery/4 :: (boolean(), boolean(), maybe(txn()), message()) ->
-             delivery()).
--spec(message/4 :: (exchange_name(), routing_key(), properties_input(),
-                    binary()) -> (message() | {'error', any()})).
--spec(properties/1 :: (properties_input()) -> amqp_properties()).
--spec(publish/4 :: (exchange_name(), routing_key(), properties_input(),
-                    binary()) -> publish_result()).
--spec(publish/7 :: (exchange_name(), routing_key(), boolean(), boolean(),
-                    maybe(txn()), properties_input(), binary()) ->
-             publish_result()).
--spec(build_content/2 :: (amqp_properties(), binary()) -> content()).
--spec(from_content/1 :: (content()) -> {amqp_properties(), binary()}).
+-type(properties_input() ::
+        (rabbit_framing:amqp_property_record() | [{atom(), any()}])).
+-type(publish_result() ::
+        ({ok, rabbit_router:routing_result(), [pid()]}
+         | rabbit_types:error('not_found'))).
+
+-spec(publish/1 :: (rabbit_types:delivery()) -> publish_result()).
+-spec(delivery/4 ::
+        (boolean(), boolean(), rabbit_types:maybe(rabbit_types:txn()),
+         rabbit_types:message())
+        -> rabbit_types:delivery()).
+-spec(message/4 ::
+        (rabbit_exchange:name(), rabbit_router:routing_key(),
+         properties_input(), binary())
+        -> (rabbit_types:message() | rabbit_types:error(any()))).
+-spec(properties/1 ::
+        (properties_input()) -> rabbit_framing:amqp_property_record()).
+-spec(publish/4 ::
+        (rabbit_exchange:name(), rabbit_router:routing_key(),
+         properties_input(), binary())
+        -> publish_result()).
+-spec(publish/7 ::
+        (rabbit_exchange:name(), rabbit_router:routing_key(),
+         boolean(), boolean(), rabbit_types:maybe(rabbit_types:txn()),
+         properties_input(), binary())
+        -> publish_result()).
+-spec(build_content/2 ::
+        (rabbit_framing:amqp_property_record(), binary())
+        -> rabbit_types:content()).
+-spec(from_content/1 ::
+        (rabbit_types:content())
+        -> {rabbit_framing:amqp_property_record(), binary()}).
 -spec(is_message_persistent/1 ::
-        (decoded_content()) -> (boolean() | {'invalid', non_neg_integer()})).
+        (rabbit_types:decoded_content())
+        -> (boolean() | {'invalid', non_neg_integer()})).
 
 -endif.
 
@@ -80,18 +97,24 @@ delivery(Mandatory, Immediate, Txn, Message) ->
               sender = self(), message = Message}.
 
 build_content(Properties, BodyBin) ->
-    {ClassId, _MethodId} = rabbit_framing:method_id('basic.publish'),
+    %% basic.publish hasn't changed so we can just hard-code amqp_0_9_1
+    {ClassId, _MethodId} =
+        rabbit_framing_amqp_0_9_1:method_id('basic.publish'),
     #content{class_id = ClassId,
              properties = Properties,
              properties_bin = none,
+             protocol = none,
              payload_fragments_rev = [BodyBin]}.
 
 from_content(Content) ->
     #content{class_id = ClassId,
              properties = Props,
              payload_fragments_rev = FragmentsRev} =
-        rabbit_binary_parser:ensure_content_decoded(Content),
-    {ClassId, _MethodId} = rabbit_framing:method_id('basic.publish'),
+        %% basic.publish hasn't changed so we can just hard-code amqp_0_9_1
+        rabbit_binary_parser:ensure_content_decoded(Content,
+                                                    rabbit_framing_amqp_0_9_1),
+    {ClassId, _MethodId} =
+        rabbit_framing_amqp_0_9_1:method_id('basic.publish'),
     {Props, list_to_binary(lists:reverse(FragmentsRev))}.
 
 message(ExchangeName, RoutingKeyBin, RawProperties, BodyBin) ->
diff --git a/src/rabbit_binary_generator.erl b/src/rabbit_binary_generator.erl
index ed843735..f0ec6180 100644
--- a/src/rabbit_binary_generator.erl
+++ b/src/rabbit_binary_generator.erl
@@ -41,12 +41,12 @@
 % See definition of check_empty_content_body_frame_size/0, an assertion called at startup.
 -define(EMPTY_CONTENT_BODY_FRAME_SIZE, 8).
 
--export([build_simple_method_frame/2,
-         build_simple_content_frames/3,
+-export([build_simple_method_frame/3,
+         build_simple_content_frames/4,
          build_heartbeat_frame/0]).
 -export([generate_table/1, encode_properties/2]).
 -export([check_empty_content_body_frame_size/0]).
--export([ensure_content_encoded/1, clear_encoded_content/1]).
+-export([ensure_content_encoded/2, clear_encoded_content/1]).
 
 -import(lists).
 
@@ -56,45 +56,47 @@
 
 -type(frame() :: [binary()]).
 
--spec(build_simple_method_frame/2 ::
-      (channel_number(), amqp_method()) -> frame()).
--spec(build_simple_content_frames/3 ::
-      (channel_number(), content(), non_neg_integer()) -> [frame()]).
+-spec(build_simple_method_frame/3 ::
+        (rabbit_channel:channel_number(), rabbit_framing:amqp_method_record(),
+         rabbit_types:protocol())
+        -> frame()).
+-spec(build_simple_content_frames/4 ::
+        (rabbit_channel:channel_number(), rabbit_types:content(),
+         non_neg_integer(), rabbit_types:protocol())
+        -> [frame()]).
 -spec(build_heartbeat_frame/0 :: () -> frame()).
--spec(generate_table/1 :: (amqp_table()) -> binary()).
--spec(encode_properties/2 :: ([amqp_property_type()], [any()]) -> binary()).
+-spec(generate_table/1 :: (rabbit_framing:amqp_table()) -> binary()).
+-spec(encode_properties/2 ::
+        ([rabbit_framing:amqp_property_type()], [any()]) -> binary()).
 -spec(check_empty_content_body_frame_size/0 :: () -> 'ok').
--spec(ensure_content_encoded/1 :: (content()) -> encoded_content()).
--spec(clear_encoded_content/1 :: (content()) -> unencoded_content()).
+-spec(ensure_content_encoded/2 ::
+        (rabbit_types:content(), rabbit_types:protocol()) ->
+                                       rabbit_types:encoded_content()).
+-spec(clear_encoded_content/1 ::
+        (rabbit_types:content()) -> rabbit_types:unencoded_content()).
 
 -endif.
 
 %%----------------------------------------------------------------------------
 
-build_simple_method_frame(ChannelInt, MethodRecord) ->
-    MethodFields = rabbit_framing:encode_method_fields(MethodRecord),
+build_simple_method_frame(ChannelInt, MethodRecord, Protocol) ->
+    MethodFields = Protocol:encode_method_fields(MethodRecord),
     MethodName = rabbit_misc:method_record_type(MethodRecord),
-    {ClassId, MethodId} = rabbit_framing:method_id(MethodName),
+    {ClassId, MethodId} = Protocol:method_id(MethodName),
     create_frame(1, ChannelInt, [<<ClassId:16, MethodId:16>>, MethodFields]).
 
-build_simple_content_frames(ChannelInt,
-                            #content{class_id = ClassId,
-                                     properties = ContentProperties,
-                                     properties_bin = ContentPropertiesBin,
-                                     payload_fragments_rev = PayloadFragmentsRev},
-                            FrameMax) ->
-    {BodySize, ContentFrames} = build_content_frames(PayloadFragmentsRev, FrameMax, ChannelInt),
+build_simple_content_frames(ChannelInt, Content, FrameMax, Protocol) ->
+    #content{class_id = ClassId,
+             properties_bin = ContentPropertiesBin,
+             payload_fragments_rev = PayloadFragmentsRev} =
+        ensure_content_encoded(Content, Protocol),
+    {BodySize, ContentFrames} =
+        build_content_frames(PayloadFragmentsRev, FrameMax, ChannelInt),
     HeaderFrame = create_frame(2, ChannelInt,
                                [<<ClassId:16, 0:16, BodySize:64>>,
-                                maybe_encode_properties(ContentProperties, ContentPropertiesBin)]),
+                                ContentPropertiesBin]),
     [HeaderFrame | ContentFrames].
 
-maybe_encode_properties(_ContentProperties, ContentPropertiesBin)
-  when is_binary(ContentPropertiesBin) ->
-    ContentPropertiesBin;
-maybe_encode_properties(ContentProperties, none) ->
-    rabbit_framing:encode_properties(ContentProperties).
-
 build_content_frames(FragsRev, FrameMax, ChannelInt) ->
     BodyPayloadMax = if FrameMax == 0 ->
                              iolist_size(FragsRev);
@@ -118,10 +120,11 @@ build_content_frames(SizeAcc, FramesAcc, FragSizeRem, FragAcc,
                      [Frag | Frags], BodyPayloadMax, ChannelInt) ->
     Size = size(Frag),
     {NewFragSizeRem, NewFragAcc, NewFrags} =
-        case Size =< FragSizeRem of
-            true  -> {FragSizeRem - Size, [Frag | FragAcc], Frags};
-            false -> <<Head:FragSizeRem/binary, Tail/binary>> = Frag,
-                     {0, [Head | FragAcc], [Tail | Frags]}
+        if Size == 0           -> {FragSizeRem, FragAcc, Frags};
+           Size =< FragSizeRem -> {FragSizeRem - Size, [Frag | FragAcc], Frags};
+           true                -> <<Head:FragSizeRem/binary, Tail/binary>> =
+                                      Frag,
+                                  {0, [Head | FragAcc], [Tail | Frags]}
         end,
     build_content_frames(SizeAcc, FramesAcc, NewFragSizeRem, NewFragAcc,
                          NewFrags, BodyPayloadMax, ChannelInt).
@@ -276,13 +279,16 @@ check_empty_content_body_frame_size() ->
                   ComputedSize, ?EMPTY_CONTENT_BODY_FRAME_SIZE})
     end.
 
-ensure_content_encoded(Content = #content{properties_bin = PropsBin})
+ensure_content_encoded(Content = #content{properties_bin = PropsBin,
+                                          protocol = Protocol}, Protocol)
   when PropsBin =/= 'none' ->
     Content;
-ensure_content_encoded(Content = #content{properties = Props}) ->
-    Content #content{properties_bin = rabbit_framing:encode_properties(Props)}.
+ensure_content_encoded(Content = #content{properties = Props}, Protocol) ->
+    Content#content{properties_bin = Protocol:encode_properties(Props),
+                    protocol = Protocol}.
 
-clear_encoded_content(Content = #content{properties_bin = none}) ->
+clear_encoded_content(Content = #content{properties_bin = none,
+                                         protocol = none}) ->
     Content;
 clear_encoded_content(Content = #content{properties = none}) ->
     %% Only clear when we can rebuild the properties_bin later in
@@ -290,4 +296,4 @@ clear_encoded_content(Content = #content{properties = none}) ->
     %% one of properties and properties_bin can be 'none'
     Content;
 clear_encoded_content(Content = #content{}) ->
-    Content#content{properties_bin = none}.
+    Content#content{properties_bin = none, protocol = none}.
diff --git a/src/rabbit_binary_parser.erl b/src/rabbit_binary_parser.erl
index e022a1fa..1d0a62af 100644
--- a/src/rabbit_binary_parser.erl
+++ b/src/rabbit_binary_parser.erl
@@ -34,7 +34,7 @@
 -include("rabbit.hrl").
 
 -export([parse_table/1, parse_properties/2]).
--export([ensure_content_decoded/1, clear_decoded_content/1]).
+-export([ensure_content_decoded/2, clear_decoded_content/1]).
 
 -import(lists).
 
@@ -42,10 +42,14 @@
 
 -ifdef(use_specs).
 
--spec(parse_table/1 :: (binary()) -> amqp_table()).
--spec(parse_properties/2 :: ([amqp_property_type()], binary()) -> [any()]).
--spec(ensure_content_decoded/1 :: (content()) -> decoded_content()).
--spec(clear_decoded_content/1 :: (content()) -> undecoded_content()).
+-spec(parse_table/1 :: (binary()) -> rabbit_framing:amqp_table()).
+-spec(parse_properties/2 ::
+        ([rabbit_framing:amqp_property_type()], binary()) -> [any()]).
+-spec(ensure_content_decoded/2 ::
+        (rabbit_types:content(), rabbit_types:protocol())
+        -> rabbit_types:decoded_content()).
+-spec(clear_decoded_content/1 ::
+        (rabbit_types:content()) -> rabbit_types:undecoded_content()).
 
 -endif.
 
@@ -159,12 +163,12 @@ parse_property(bit, Rest) ->
 parse_property(table, <<Len:32/unsigned, Table:Len/binary, Rest/binary>>) ->
     {parse_table(Table), Rest}.
 
-ensure_content_decoded(Content = #content{properties = Props})
+ensure_content_decoded(Content = #content{properties = Props}, _Protocol)
   when Props =/= 'none' ->
     Content;
-ensure_content_decoded(Content = #content{properties_bin = PropBin})
+ensure_content_decoded(Content = #content{properties_bin = PropBin}, Protocol)
   when is_binary(PropBin) ->
-    Content#content{properties = rabbit_framing:decode_properties(
+    Content#content{properties = Protocol:decode_properties(
                                    Content#content.class_id, PropBin)}.
 
 clear_decoded_content(Content = #content{properties = none}) ->
diff --git a/src/rabbit_channel.erl b/src/rabbit_channel.erl
index a48db9c8..c4ff361d 100644
--- a/src/rabbit_channel.erl
+++ b/src/rabbit_channel.erl
@@ -35,20 +35,25 @@
 
 -behaviour(gen_server2).
 
--export([start_link/5, do/2, do/3, shutdown/1]).
+-export([start_link/6, do/2, do/3, shutdown/1]).
 -export([send_command/2, deliver/4, conserve_memory/2, flushed/2]).
 -export([list/0, info_keys/0, info/1, info/2, info_all/0, info_all/1]).
 
--export([init/1, terminate/2, code_change/3,
-         handle_call/3, handle_cast/2, handle_info/2, handle_pre_hibernate/1]).
+-export([flow_timeout/2]).
+
+-export([init/1, terminate/2, code_change/3, handle_call/3, handle_cast/2,
+         handle_info/2, handle_pre_hibernate/1]).
 
 -record(ch, {state, channel, reader_pid, writer_pid, limiter_pid,
              transaction_id, tx_participants, next_tag,
              uncommitted_ack_q, unacked_message_q,
              username, virtual_host, most_recently_declared_queue,
-             consumer_mapping, blocking}).
+             consumer_mapping, blocking, queue_collector_pid, flow}).
+
+-record(flow, {server, client, pending}).
 
 -define(MAX_PERMISSION_CACHE_SIZE, 12).
+-define(FLOW_OK_TIMEOUT, 10000). %% 10 seconds
 
 -define(INFO_KEYS,
         [pid,
@@ -66,31 +71,39 @@
 
 -ifdef(use_specs).
 
--spec(start_link/5 ::
-      (channel_number(), pid(), pid(), username(), vhost()) -> pid()).
--spec(do/2 :: (pid(), amqp_method()) -> 'ok').
--spec(do/3 :: (pid(), amqp_method(), maybe(content())) -> 'ok').
+-export_type([channel_number/0]).
+
+-type(ref() :: any()).
+-type(channel_number() :: non_neg_integer()).
+
+-spec(start_link/6 ::
+      (channel_number(), pid(), pid(), rabbit_access_control:username(),
+       rabbit_types:vhost(), pid()) -> rabbit_types:ok(pid())).
+-spec(do/2 :: (pid(), rabbit_framing:amqp_method_record()) -> 'ok').
+-spec(do/3 :: (pid(), rabbit_framing:amqp_method_record(),
+               rabbit_types:maybe(rabbit_types:content())) -> 'ok').
 -spec(shutdown/1 :: (pid()) -> 'ok').
--spec(send_command/2 :: (pid(), amqp_method()) -> 'ok').
--spec(deliver/4 :: (pid(), ctag(), boolean(), qmsg()) -> 'ok').
+-spec(send_command/2 :: (pid(), rabbit_framing:amqp_method()) -> 'ok').
+-spec(deliver/4 ::
+        (pid(), rabbit_types:ctag(), boolean(), rabbit_amqqueue:qmsg())
+        -> 'ok').
 -spec(conserve_memory/2 :: (pid(), boolean()) -> 'ok').
 -spec(flushed/2 :: (pid(), pid()) -> 'ok').
+-spec(flow_timeout/2 :: (pid(), ref()) -> 'ok').
 -spec(list/0 :: () -> [pid()]).
--spec(info_keys/0 :: () -> [info_key()]).
--spec(info/1 :: (pid()) -> [info()]).
--spec(info/2 :: (pid(), [info_key()]) -> [info()]).
--spec(info_all/0 :: () -> [[info()]]).
--spec(info_all/1 :: ([info_key()]) -> [[info()]]).
+-spec(info_keys/0 :: () -> [rabbit_types:info_key()]).
+-spec(info/1 :: (pid()) -> [rabbit_types:info()]).
+-spec(info/2 :: (pid(), [rabbit_types:info_key()]) -> [rabbit_types:info()]).
+-spec(info_all/0 :: () -> [[rabbit_types:info()]]).
+-spec(info_all/1 :: ([rabbit_types:info_key()]) -> [[rabbit_types:info()]]).
 
 -endif.
 
 %%----------------------------------------------------------------------------
 
-start_link(Channel, ReaderPid, WriterPid, Username, VHost) ->
-    {ok, Pid} = gen_server2:start_link(
-                  ?MODULE, [Channel, ReaderPid, WriterPid,
-                            Username, VHost], []),
-    Pid.
+start_link(Channel, ReaderPid, WriterPid, Username, VHost, CollectorPid) ->
+    gen_server2:start_link(?MODULE, [Channel, ReaderPid, WriterPid,
+                                     Username, VHost, CollectorPid], []).
 
 do(Pid, Method) ->
     do(Pid, Method, none).
@@ -113,6 +126,9 @@ conserve_memory(Pid, Conserve) ->
 flushed(Pid, QPid) ->
     gen_server2:cast(Pid, {flushed, QPid}).
 
+flow_timeout(Pid, Ref) ->
+    gen_server2:pcast(Pid, 7, {flow_timeout, Ref}).
+
 list() ->
     pg_local:get_members(rabbit_channels).
 
@@ -135,7 +151,7 @@ info_all(Items) ->
 
 %%---------------------------------------------------------------------------
 
-init([Channel, ReaderPid, WriterPid, Username, VHost]) ->
+init([Channel, ReaderPid, WriterPid, Username, VHost, CollectorPid]) ->
     process_flag(trap_exit, true),
     link(WriterPid),
     ok = pg_local:join(rabbit_channels, self()),
@@ -153,7 +169,10 @@ init([Channel, ReaderPid, WriterPid, Username, VHost]) ->
              virtual_host            = VHost,
              most_recently_declared_queue = <<>>,
              consumer_mapping        = dict:new(),
-             blocking                = dict:new()},
+             blocking                = dict:new(),
+             queue_collector_pid     = CollectorPid,
+             flow                    = #flow{server = true, client = true,
+                                             pending = none}},
      hibernate,
      {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
 
@@ -180,11 +199,9 @@ handle_cast({method, Method, Content}, State) ->
             {stop, normal, State#ch{state = terminating}}
     catch
         exit:Reason = #amqp_error{} ->
-            ok = rollback_and_notify(State),
             MethodName = rabbit_misc:method_record_type(Method),
-            State#ch.reader_pid ! {channel_exit, State#ch.channel,
-                                   Reason#amqp_error{method = MethodName}},
-            {stop, normal, State#ch{state = terminating}};
+            {stop, normal, terminating(Reason#amqp_error{method = MethodName},
+                                       State)};
         exit:normal ->
             {stop, normal, State};
         _:Reason ->
@@ -208,11 +225,25 @@ handle_cast({deliver, ConsumerTag, AckRequired, Msg},
     ok = internal_deliver(WriterPid, true, ConsumerTag, DeliveryTag, Msg),
     noreply(State1#ch{next_tag = DeliveryTag + 1});
 
-handle_cast({conserve_memory, Conserve}, State) ->
-    ok = clear_permission_cache(),
-    ok = rabbit_writer:send_command(
-           State#ch.writer_pid, #'channel.flow'{active = not(Conserve)}),
-    noreply(State).
+handle_cast({conserve_memory, true}, State = #ch{state = starting}) ->
+    noreply(State);
+handle_cast({conserve_memory, false}, State = #ch{state = starting}) ->
+    ok = rabbit_writer:send_command(State#ch.writer_pid, #'channel.open_ok'{}),
+    noreply(State#ch{state = running});
+handle_cast({conserve_memory, Conserve}, State = #ch{state = running}) ->
+    flow_control(not Conserve, State);
+handle_cast({conserve_memory, _Conserve}, State) ->
+    noreply(State);
+
+handle_cast({flow_timeout, Ref},
+            State = #ch{flow = #flow{client = Flow, pending = {Ref, _TRef}}}) ->
+    {stop, normal, terminating(
+                     rabbit_misc:amqp_error(
+                       precondition_failed,
+                       "timeout waiting for channel.flow_ok{active=~w}",
+                       [not Flow], none), State)};
+handle_cast({flow_timeout, _Ref}, State) ->
+    {noreply, State}.
 
 handle_info({'EXIT', WriterPid, Reason = {writer, send_failed, _Error}},
             State = #ch{writer_pid = WriterPid}) ->
@@ -253,20 +284,20 @@ return_ok(State, false, Msg)  -> {reply, Msg, State}.
 ok_msg(true, _Msg) -> undefined;
 ok_msg(false, Msg) -> Msg.
 
-return_queue_declare_ok(State, NoWait, Q) ->
-    NewState = State#ch{most_recently_declared_queue =
-                        (Q#amqqueue.name)#resource.name},
+terminating(Reason, State = #ch{channel = Channel, reader_pid = Reader}) ->
+    ok = rollback_and_notify(State),
+    Reader ! {channel_exit, Channel, Reason},
+    State#ch{state = terminating}.
+
+return_queue_declare_ok(#resource{name = ActualName},
+                        NoWait, MessageCount, ConsumerCount, State) ->
+    NewState = State#ch{most_recently_declared_queue = ActualName},
     case NoWait of
         true  -> {noreply, NewState};
-        false ->
-            {ok, ActualName, MessageCount, ConsumerCount} =
-                rabbit_misc:with_exit_handler(
-                  fun () -> {ok, Q#amqqueue.name, 0, 0} end,
-                  fun () -> rabbit_amqqueue:stat(Q) end),
-            Reply = #'queue.declare_ok'{queue = ActualName#resource.name,
-                                        message_count = MessageCount,
-                                        consumer_count = ConsumerCount},
-            {reply, Reply, NewState}
+        false -> Reply = #'queue.declare_ok'{queue = ActualName,
+                                             message_count = MessageCount,
+                                             consumer_count = ConsumerCount},
+                 {reply, Reply, NewState}
     end.
 
 check_resource_access(Username, Resource, Perm) ->
@@ -300,7 +331,7 @@ check_read_permitted(Resource, #ch{ username = Username}) ->
 
 expand_queue_name_shortcut(<<>>, #ch{ most_recently_declared_queue = <<>> }) ->
     rabbit_misc:protocol_error(
-      not_allowed, "no previously declared queue", []);
+      not_found, "no previously declared queue", []);
 expand_queue_name_shortcut(<<>>, #ch{ virtual_host = VHostPath,
                                       most_recently_declared_queue = MRDQ }) ->
     rabbit_misc:r(VHostPath, queue, MRDQ);
@@ -310,7 +341,7 @@ expand_queue_name_shortcut(QueueNameBin, #ch{ virtual_host = VHostPath }) ->
 expand_routing_key_shortcut(<<>>, <<>>,
                             #ch{ most_recently_declared_queue = <<>> }) ->
     rabbit_misc:protocol_error(
-      not_allowed, "no previously declared queue", []);
+      not_found, "no previously declared queue", []);
 expand_routing_key_shortcut(<<>>, <<>>,
                             #ch{ most_recently_declared_queue = MRDQ }) ->
     MRDQ;
@@ -352,8 +383,10 @@ queue_blocked(QPid, State = #ch{blocking = Blocking}) ->
     end.
 
 handle_method(#'channel.open'{}, _, State = #ch{state = starting}) ->
-    rabbit_alarm:register(self(), {?MODULE, conserve_memory, []}),
-    {reply, #'channel.open_ok'{}, State#ch{state = running}};
+    case rabbit_alarm:register(self(), {?MODULE, conserve_memory, []}) of
+        true  -> {noreply, State};
+        false -> {reply, #'channel.open_ok'{}, State#ch{state = running}}
+    end;
 
 handle_method(#'channel.open'{}, _, _State) ->
     rabbit_misc:protocol_error(
@@ -370,19 +403,24 @@ handle_method(#'channel.close'{}, _, State = #ch{writer_pid = WriterPid}) ->
 handle_method(#'access.request'{},_, State) ->
     {reply, #'access.request_ok'{ticket = 1}, State};
 
-handle_method(#'basic.publish'{exchange = ExchangeNameBin,
+handle_method(#'basic.publish'{}, _, #ch{flow = #flow{client = false}}) ->
+    rabbit_misc:protocol_error(
+      command_invalid,
+      "basic.publish received after channel.flow_ok{active=false}", []);
+handle_method(#'basic.publish'{exchange    = ExchangeNameBin,
                                routing_key = RoutingKey,
-                               mandatory = Mandatory,
-                               immediate = Immediate},
-              Content, State = #ch{ virtual_host = VHostPath,
-                                    transaction_id = TxnKey,
-                                    writer_pid = WriterPid}) ->
+                               mandatory   = Mandatory,
+                               immediate   = Immediate},
+              Content, State = #ch{virtual_host   = VHostPath,
+                                   transaction_id = TxnKey,
+                                   writer_pid     = WriterPid}) ->
     ExchangeName = rabbit_misc:r(VHostPath, exchange, ExchangeNameBin),
     check_write_permitted(ExchangeName, State),
     Exchange = rabbit_exchange:lookup_or_die(ExchangeName),
     %% We decode the content's properties here because we're almost
     %% certain to want to look at delivery-mode and priority.
-    DecodedContent = rabbit_binary_parser:ensure_content_decoded(Content),
+    DecodedContent = rabbit_binary_parser:ensure_content_decoded(
+                       Content, rabbit_framing_amqp_0_9_1),
     IsPersistent = is_message_persistent(DecodedContent),
     Message = #basic_message{exchange_name  = ExchangeName,
                              routing_key    = RoutingKey,
@@ -394,16 +432,9 @@ handle_method(#'basic.publish'{exchange = ExchangeNameBin,
           Exchange,
           rabbit_basic:delivery(Mandatory, Immediate, TxnKey, Message)),
     case RoutingRes of
-        routed ->
-            ok;
-        unroutable ->
-            %% FIXME: 312 should be replaced by the ?NO_ROUTE
-            %% definition, when we move to >=0-9
-            ok = basic_return(Message, WriterPid, 312, <<"unroutable">>);
-        not_delivered ->
-            %% FIXME: 313 should be replaced by the ?NO_CONSUMERS
-            %% definition, when we move to >=0-9
-            ok = basic_return(Message, WriterPid, 313, <<"not_delivered">>)
+        routed        -> ok;
+        unroutable    -> ok = basic_return(Message, WriterPid, no_route);
+        not_delivered -> ok = basic_return(Message, WriterPid, no_consumers)
     end,
     {noreply, case TxnKey of
                   none -> State;
@@ -413,13 +444,7 @@ handle_method(#'basic.publish'{exchange = ExchangeNameBin,
 handle_method(#'basic.ack'{delivery_tag = DeliveryTag,
                            multiple = Multiple},
               _, State = #ch{transaction_id = TxnKey,
-                             next_tag = NextDeliveryTag,
                              unacked_message_q = UAMQ}) ->
-    if DeliveryTag >= NextDeliveryTag ->
-            rabbit_misc:protocol_error(
-              command_invalid, "unknown delivery tag ~w", [DeliveryTag]);
-       true -> ok
-    end,
     {Acked, Remaining} = collect_acks(UAMQ, DeliveryTag, Multiple),
     Participants = ack(TxnKey, Acked),
     {noreply, case TxnKey of
@@ -436,11 +461,12 @@ handle_method(#'basic.ack'{delivery_tag = DeliveryTag,
 handle_method(#'basic.get'{queue = QueueNameBin,
                            no_ack = NoAck},
               _, State = #ch{ writer_pid = WriterPid,
+                              reader_pid = ReaderPid,
                               next_tag = DeliveryTag }) ->
     QueueName = expand_queue_name_shortcut(QueueNameBin, State),
     check_read_permitted(QueueName, State),
-    case rabbit_amqqueue:with_or_die(
-           QueueName,
+    case rabbit_amqqueue:with_exclusive_access_or_die(
+           QueueName, ReaderPid,
            fun (Q) -> rabbit_amqqueue:basic_get(Q, self(), NoAck) end) of
         {ok, MessageCount,
          Msg = {_QName, _QPid, _MsgId, Redelivered,
@@ -458,7 +484,7 @@ handle_method(#'basic.get'{queue = QueueNameBin,
                    Content),
             {noreply, State1#ch{next_tag = DeliveryTag + 1}};
         empty ->
-            {reply, #'basic.get_empty'{cluster_id = <<>>}, State}
+            {reply, #'basic.get_empty'{}, State}
     end;
 
 handle_method(#'basic.consume'{queue = QueueNameBin,
@@ -480,14 +506,14 @@ handle_method(#'basic.consume'{queue = QueueNameBin,
                     Other -> Other
                 end,
 
-            %% In order to ensure that the consume_ok gets sent before
-            %% any messages are sent to the consumer, we get the queue
-            %% process to send the consume_ok on our behalf.
-            case rabbit_amqqueue:with_or_die(
-                   QueueName,
+            %% We get the queue process to send the consume_ok on our
+            %% behalf. This is for symmetry with basic.cancel - see
+            %% the comment in that method for why.
+            case rabbit_amqqueue:with_exclusive_access_or_die(
+                   QueueName, ReaderPid,
                    fun (Q) ->
                            rabbit_amqqueue:basic_consume(
-                             Q, NoAck, ReaderPid, self(), LimiterPid,
+                             Q, NoAck, self(), LimiterPid,
                              ActualConsumerTag, ExclusiveConsume,
                              ok_msg(NoWait, #'basic.consume_ok'{
                                       consumer_tag = ActualConsumerTag}))
@@ -497,14 +523,6 @@ handle_method(#'basic.consume'{queue = QueueNameBin,
                                        dict:store(ActualConsumerTag,
                                                   QueueName,
                                                   ConsumerMapping)}};
-                {error, queue_owned_by_another_connection} ->
-                    %% The spec is silent on which exception to use
-                    %% here. This seems reasonable?
-                    %% FIXME: check this
-
-                    rabbit_misc:protocol_error(
-                      resource_locked, "~s owned by another connection",
-                      [rabbit_misc:rs(QueueName)]);
                 {error, exclusive_consume_unavailable} ->
                     rabbit_misc:protocol_error(
                       access_refused, "~s in exclusive use",
@@ -571,9 +589,8 @@ handle_method(#'basic.qos'{prefetch_count = PrefetchCount},
                   end,
     {reply, #'basic.qos_ok'{}, State#ch{limiter_pid = LimiterPid2}};
 
-handle_method(#'basic.recover'{requeue = true},
-              _, State = #ch{ transaction_id = none,
-                              unacked_message_q = UAMQ }) ->
+handle_method(#'basic.recover_async'{requeue = true},
+              _, State = #ch{ unacked_message_q = UAMQ }) ->
     ok = fold_per_queue(
            fun (QPid, MsgIds, ok) ->
                    %% The Qpid python test suite incorrectly assumes
@@ -583,12 +600,12 @@ handle_method(#'basic.recover'{requeue = true},
                    rabbit_amqqueue:requeue(
                      QPid, lists:reverse(MsgIds), self())
            end, ok, UAMQ),
-    %% No answer required, apparently!
+    %% No answer required - basic.recover is the newer, synchronous
+    %% variant of this method
     {noreply, State#ch{unacked_message_q = queue:new()}};
 
-handle_method(#'basic.recover'{requeue = false},
-              _, State = #ch{ transaction_id = none,
-                              writer_pid = WriterPid,
+handle_method(#'basic.recover_async'{requeue = false},
+              _, State = #ch{ writer_pid = WriterPid,
                               unacked_message_q = UAMQ }) ->
     ok = rabbit_misc:queue_fold(
            fun ({_DeliveryTag, none, _Msg}, ok) ->
@@ -608,12 +625,28 @@ handle_method(#'basic.recover'{requeue = false},
                      WriterPid, false, ConsumerTag, DeliveryTag,
                      {QName, QPid, MsgId, true, Message})
            end, ok, UAMQ),
-    %% No answer required, apparently!
+    %% No answer required - basic.recover is the newer, synchronous
+    %% variant of this method
     {noreply, State};
 
-handle_method(#'basic.recover'{}, _, _State) ->
-    rabbit_misc:protocol_error(
-      not_allowed, "attempt to recover a transactional channel",[]);
+handle_method(#'basic.recover'{requeue = Requeue}, Content, State) ->
+    {noreply, State2 = #ch{writer_pid = WriterPid}} =
+        handle_method(#'basic.recover_async'{requeue = Requeue},
+                      Content,
+                      State),
+    ok = rabbit_writer:send_command(WriterPid, #'basic.recover_ok'{}),
+    {noreply, State2};
+
+handle_method(#'basic.reject'{delivery_tag = DeliveryTag,
+                              requeue = Requeue},
+              _, State = #ch{ unacked_message_q = UAMQ}) ->
+    {Acked, Remaining} = collect_acks(UAMQ, DeliveryTag, false),
+    ok = fold_per_queue(
+           fun (QPid, MsgIds, ok) ->
+                   rabbit_amqqueue:reject(QPid, MsgIds, Requeue, self())
+           end, ok, Acked),
+    ok = notify_limiter(State#ch.limiter_pid, Acked),
+    {noreply, State#ch{unacked_message_q = Remaining}};
 
 handle_method(#'exchange.declare'{exchange = ExchangeNameBin,
                                   type = TypeNameBin,
@@ -644,18 +677,17 @@ handle_method(#'exchange.declare'{exchange = ExchangeNameBin,
                                         AutoDelete,
                                         Args)
         end,
-    ok = rabbit_exchange:assert_type(X, CheckedType),
+    ok = rabbit_exchange:assert_equivalence(X, CheckedType, Durable,
+                                            AutoDelete, Args),
     return_ok(State, NoWait, #'exchange.declare_ok'{});
 
 handle_method(#'exchange.declare'{exchange = ExchangeNameBin,
-                                  type = TypeNameBin,
                                   passive = true,
                                   nowait = NoWait},
               _, State = #ch{ virtual_host = VHostPath }) ->
     ExchangeName = rabbit_misc:r(VHostPath, exchange, ExchangeNameBin),
     check_configure_permitted(ExchangeName, State),
-    X = rabbit_exchange:lookup_or_die(ExchangeName),
-    ok = rabbit_exchange:assert_type(X, rabbit_exchange:check_type(TypeNameBin)),
+    _ = rabbit_exchange:lookup_or_die(ExchangeName),
     return_ok(State, NoWait, #'exchange.declare_ok'{});
 
 handle_method(#'exchange.delete'{exchange = ExchangeNameBin,
@@ -674,73 +706,78 @@ handle_method(#'exchange.delete'{exchange = ExchangeNameBin,
             return_ok(State, NoWait,  #'exchange.delete_ok'{})
     end;
 
-handle_method(#'queue.declare'{queue = QueueNameBin,
-                               passive = false,
-                               durable = Durable,
-                               exclusive = ExclusiveDeclare,
+handle_method(#'queue.declare'{queue       = QueueNameBin,
+                               passive     = false,
+                               durable     = Durable,
+                               exclusive   = ExclusiveDeclare,
                                auto_delete = AutoDelete,
-                               nowait = NoWait,
-                               arguments = Args},
-              _, State = #ch { virtual_host = VHostPath,
-                               reader_pid = ReaderPid }) ->
-    %% FIXME: atomic create&claim
-    Finish =
-        fun (Q) ->
-                if ExclusiveDeclare ->
-                        case rabbit_amqqueue:claim_queue(Q, ReaderPid) of
-                            locked ->
-                                %% AMQP 0-8 doesn't say which
-                                %% exception to use, so we mimic QPid
-                                %% here.
-                                rabbit_misc:protocol_error(
-                                  resource_locked,
-                                  "cannot obtain exclusive access to locked ~s",
-                                  [rabbit_misc:rs(Q#amqqueue.name)]);
-                            ok -> ok
-                        end;
-                   true ->
-                        ok
-                end,
-                Q
-        end,
-    Q = case rabbit_amqqueue:with(
-               rabbit_misc:r(VHostPath, queue, QueueNameBin),
-               Finish) of
-            {error, not_found} ->
-                ActualNameBin =
-                    case QueueNameBin of
+                               nowait      = NoWait,
+                               arguments   = Args} = Declare,
+              _, State = #ch{virtual_host        = VHostPath,
+                             reader_pid          = ReaderPid,
+                             queue_collector_pid = CollectorPid}) ->
+    Owner = case ExclusiveDeclare of
+                true  -> ReaderPid;
+                false -> none
+            end,
+    ActualNameBin = case QueueNameBin of
                         <<>>  -> rabbit_guid:binstring_guid("amq.gen");
                         Other -> check_name('queue', Other)
                     end,
-                QueueName = rabbit_misc:r(VHostPath, queue, ActualNameBin),
-                check_configure_permitted(QueueName, State),
-                Finish(rabbit_amqqueue:declare(QueueName,
-                                               Durable, AutoDelete, Args));
-            Other = #amqqueue{name = QueueName} ->
-                check_configure_permitted(QueueName, State),
-                Other
-        end,
-    return_queue_declare_ok(State, NoWait, Q);
+    QueueName = rabbit_misc:r(VHostPath, queue, ActualNameBin),
+    check_configure_permitted(QueueName, State),
+    case rabbit_amqqueue:with(
+           QueueName,
+           fun (Q) -> ok = rabbit_amqqueue:assert_equivalence(
+                             Q, Durable, AutoDelete, Args, Owner),
+                      rabbit_amqqueue:stat(Q)
+           end) of
+        {ok, MessageCount, ConsumerCount} ->
+            return_queue_declare_ok(QueueName, NoWait, MessageCount,
+                                    ConsumerCount, State);
+        {error, not_found} ->
+            case rabbit_amqqueue:declare(QueueName, Durable, AutoDelete,
+                                         Args, Owner) of
+                {new, Q = #amqqueue{}} ->
+                    %% We need to notify the reader within the channel
+                    %% process so that we can be sure there are no
+                    %% outstanding exclusive queues being declared as
+                    %% the connection shuts down.
+                    ok = case Owner of
+                             none -> ok;
+                             _    -> rabbit_queue_collector:register(CollectorPid, Q)
+                         end,
+                    return_queue_declare_ok(QueueName, NoWait, 0, 0, State);
+                {existing, _Q} ->
+                    %% must have been created between the stat and the
+                    %% declare. Loop around again.
+                    handle_method(Declare, none, State)
+            end
+    end;
 
-handle_method(#'queue.declare'{queue = QueueNameBin,
+handle_method(#'queue.declare'{queue   = QueueNameBin,
                                passive = true,
-                               nowait = NoWait},
-              _, State = #ch{ virtual_host = VHostPath }) ->
+                               nowait  = NoWait},
+              _, State = #ch{virtual_host = VHostPath,
+                             reader_pid   = ReaderPid}) ->
     QueueName = rabbit_misc:r(VHostPath, queue, QueueNameBin),
     check_configure_permitted(QueueName, State),
-    Q = rabbit_amqqueue:with_or_die(QueueName, fun (Q) -> Q end),
-    return_queue_declare_ok(State, NoWait, Q);
+    {{ok, MessageCount, ConsumerCount}, #amqqueue{} = Q} =
+        rabbit_amqqueue:with_or_die(
+          QueueName, fun (Q) -> {rabbit_amqqueue:stat(Q), Q} end),
+    ok = rabbit_amqqueue:check_exclusive_access(Q, ReaderPid),
+    return_queue_declare_ok(QueueName, NoWait, MessageCount, ConsumerCount,
+                            State);
 
 handle_method(#'queue.delete'{queue = QueueNameBin,
                               if_unused = IfUnused,
                               if_empty = IfEmpty,
-                              nowait = NoWait
-                             },
-              _, State) ->
+                              nowait = NoWait},
+              _, State = #ch{reader_pid = ReaderPid}) ->
     QueueName = expand_queue_name_shortcut(QueueNameBin, State),
     check_configure_permitted(QueueName, State),
-    case rabbit_amqqueue:with_or_die(
-           QueueName,
+    case rabbit_amqqueue:with_exclusive_access_or_die(
+           QueueName, ReaderPid,
            fun (Q) -> rabbit_amqqueue:delete(Q, IfUnused, IfEmpty) end) of
         {error, in_use} ->
             rabbit_misc:protocol_error(
@@ -750,8 +787,7 @@ handle_method(#'queue.delete'{queue = QueueNameBin,
               precondition_failed, "~s not empty", [rabbit_misc:rs(QueueName)]);
         {ok, PurgedMessageCount} ->
             return_ok(State, NoWait,
-                      #'queue.delete_ok'{
-                               message_count = PurgedMessageCount})
+                      #'queue.delete_ok'{message_count = PurgedMessageCount})
     end;
 
 handle_method(#'queue.bind'{queue = QueueNameBin,
@@ -759,7 +795,7 @@ handle_method(#'queue.bind'{queue = QueueNameBin,
                             routing_key = RoutingKey,
                             nowait = NoWait,
                             arguments = Arguments}, _, State) ->
-    binding_action(fun rabbit_exchange:add_binding/4, ExchangeNameBin,
+    binding_action(fun rabbit_exchange:add_binding/5, ExchangeNameBin,
                    QueueNameBin, RoutingKey, Arguments, #'queue.bind_ok'{},
                    NoWait, State);
 
@@ -767,17 +803,17 @@ handle_method(#'queue.unbind'{queue = QueueNameBin,
                               exchange = ExchangeNameBin,
                               routing_key = RoutingKey,
                               arguments = Arguments}, _, State) ->
-    binding_action(fun rabbit_exchange:delete_binding/4, ExchangeNameBin,
+    binding_action(fun rabbit_exchange:delete_binding/5, ExchangeNameBin,
                    QueueNameBin, RoutingKey, Arguments, #'queue.unbind_ok'{},
                    false, State);
 
 handle_method(#'queue.purge'{queue = QueueNameBin,
                              nowait = NoWait},
-              _, State) ->
+              _, State = #ch{reader_pid = ReaderPid}) ->
     QueueName = expand_queue_name_shortcut(QueueNameBin, State),
     check_read_permitted(QueueName, State),
-    {ok, PurgedMessageCount} = rabbit_amqqueue:with_or_die(
-                                 QueueName,
+    {ok, PurgedMessageCount} = rabbit_amqqueue:with_exclusive_access_or_die(
+                                 QueueName, ReaderPid,
                                  fun (Q) -> rabbit_amqqueue:purge(Q) end),
     return_ok(State, NoWait,
               #'queue.purge_ok'{message_count = PurgedMessageCount});
@@ -790,14 +826,14 @@ handle_method(#'tx.select'{}, _, State) ->
 
 handle_method(#'tx.commit'{}, _, #ch{transaction_id = none}) ->
     rabbit_misc:protocol_error(
-      not_allowed, "channel is not transactional", []);
+      precondition_failed, "channel is not transactional", []);
 
 handle_method(#'tx.commit'{}, _, State) ->
     {reply, #'tx.commit_ok'{}, internal_commit(State)};
 
 handle_method(#'tx.rollback'{}, _, #ch{transaction_id = none}) ->
     rabbit_misc:protocol_error(
-      not_allowed, "channel is not transactional", []);
+      precondition_failed, "channel is not transactional", []);
 
 handle_method(#'tx.rollback'{}, _, State) ->
     {reply, #'tx.rollback_ok'{}, internal_rollback(State)};
@@ -810,7 +846,6 @@ handle_method(#'channel.flow'{active = true}, _,
                   end,
     {reply, #'channel.flow_ok'{active = true},
      State#ch{limiter_pid = LimiterPid1}};
-
 handle_method(#'channel.flow'{active = false}, _,
               State = #ch{limiter_pid = LimiterPid,
                           consumer_mapping = Consumers}) ->
@@ -828,11 +863,25 @@ handle_method(#'channel.flow'{active = false}, _,
                                  blocking = dict:from_list(Queues)}}
     end;
 
-handle_method(#'channel.flow_ok'{active = _}, _, State) ->
-    %% TODO: We may want to correlate this to channel.flow messages we
-    %% have sent, and complain if we get an unsolicited
-    %% channel.flow_ok, or the client refuses our flow request.
-    {noreply, State};
+handle_method(#'channel.flow_ok'{active = Active}, _,
+              State = #ch{flow = #flow{server = Active, client = Flow,
+                                       pending = {_Ref, TRef}} = F})
+  when Flow =:= not Active ->
+    {ok, cancel} = timer:cancel(TRef),
+    {noreply, State#ch{flow = F#flow{client = Active, pending = none}}};
+handle_method(#'channel.flow_ok'{active = Active}, _,
+              State = #ch{flow = #flow{server = Flow, client = Flow,
+                                       pending = {_Ref, TRef}}})
+  when Flow =:= not Active ->
+    {ok, cancel} = timer:cancel(TRef),
+    {noreply, issue_flow(Flow, State)};
+handle_method(#'channel.flow_ok'{}, _, #ch{flow = #flow{pending = none}}) ->
+    rabbit_misc:protocol_error(
+      command_invalid, "unsolicited channel.flow_ok", []);
+handle_method(#'channel.flow_ok'{active = Active}, _, _State) ->
+    rabbit_misc:protocol_error(
+      command_invalid,
+      "received channel.flow_ok{active=~w} has incorrect polarity", [Active]);
 
 handle_method(_MethodRecord, _Content, _State) ->
     rabbit_misc:protocol_error(
@@ -840,8 +889,26 @@ handle_method(_MethodRecord, _Content, _State) ->
 
 %%----------------------------------------------------------------------------
 
+flow_control(Active, State = #ch{flow = #flow{server = Flow, pending = none}})
+  when Flow =:= not Active ->
+    ok = clear_permission_cache(),
+    noreply(issue_flow(Active, State));
+flow_control(Active, State = #ch{flow = F}) ->
+    noreply(State#ch{flow = F#flow{server = Active}}).
+
+issue_flow(Active, State) ->
+    ok = rabbit_writer:send_command(
+           State#ch.writer_pid, #'channel.flow'{active = Active}),
+    Ref = make_ref(),
+    {ok, TRef} = timer:apply_after(?FLOW_OK_TIMEOUT, ?MODULE, flow_timeout,
+                                   [self(), Ref]),
+    State#ch{flow = #flow{server = Active, client = not Active,
+                          pending = {Ref, TRef}}}.
+
 binding_action(Fun, ExchangeNameBin, QueueNameBin, RoutingKey, Arguments,
-               ReturnMethod, NoWait, State = #ch{virtual_host = VHostPath}) ->
+               ReturnMethod, NoWait,
+               State = #ch{virtual_host = VHostPath,
+                           reader_pid   = ReaderPid}) ->
     %% FIXME: connection exception (!) on failure??
     %% (see rule named "failure" in spec-XML)
     %% FIXME: don't allow binding to internal exchanges -
@@ -852,7 +919,12 @@ binding_action(Fun, ExchangeNameBin, QueueNameBin, RoutingKey, Arguments,
                                                    State),
     ExchangeName = rabbit_misc:r(VHostPath, exchange, ExchangeNameBin),
     check_read_permitted(ExchangeName, State),
-    case Fun(ExchangeName, QueueName, ActualRoutingKey, Arguments) of
+    case Fun(ExchangeName, QueueName, ActualRoutingKey, Arguments,
+             fun (_X, Q) ->
+                     try rabbit_amqqueue:check_exclusive_access(Q, ReaderPid)
+                     catch exit:Reason -> {error, Reason}
+                     end
+             end) of
         {error, exchange_not_found} ->
             rabbit_misc:not_found(ExchangeName);
         {error, queue_not_found} ->
@@ -866,17 +938,17 @@ binding_action(Fun, ExchangeNameBin, QueueNameBin, RoutingKey, Arguments,
               not_found, "no binding ~s between ~s and ~s",
               [RoutingKey, rabbit_misc:rs(ExchangeName),
                rabbit_misc:rs(QueueName)]);
-        {error, durability_settings_incompatible} ->
-            rabbit_misc:protocol_error(
-              not_allowed, "durability settings of ~s incompatible with ~s",
-              [rabbit_misc:rs(QueueName), rabbit_misc:rs(ExchangeName)]);
+        {error, #amqp_error{} = Error} ->
+            rabbit_misc:protocol_error(Error);
         ok -> return_ok(State, NoWait, ReturnMethod)
     end.
 
 basic_return(#basic_message{exchange_name = ExchangeName,
                             routing_key   = RoutingKey,
                             content       = Content},
-             WriterPid, ReplyCode, ReplyText) ->
+             WriterPid, Reason) ->
+    {_Close, ReplyCode, ReplyText} =
+        rabbit_framing_amqp_0_9_1:lookup_amqp_exception(Reason),
     ok = rabbit_writer:send_command(
            WriterPid,
            #'basic.return'{reply_code  = ReplyCode,
@@ -904,7 +976,8 @@ collect_acks(ToAcc, PrefixAcc, Q, DeliveryTag, Multiple) ->
                                  QTail, DeliveryTag, Multiple)
             end;
         {empty, _} ->
-            {ToAcc, PrefixAcc}
+            rabbit_misc:protocol_error(
+              precondition_failed, "unknown delivery tag ~w", [DeliveryTag])
     end.
 
 add_tx_participants(MoreP, State = #ch{tx_participants = Participants}) ->
@@ -968,7 +1041,7 @@ fold_per_queue(F, Acc0, UAQ) ->
               Acc0, D).
 
 start_limiter(State = #ch{unacked_message_q = UAMQ}) ->
-    LPid = rabbit_limiter:start_link(self(), queue:len(UAMQ)),
+    {ok, LPid} = rabbit_limiter:start_link(self(), queue:len(UAMQ)),
     ok = limit_queues(LPid, State),
     LPid.
 
diff --git a/src/rabbit_control.erl b/src/rabbit_control.erl
index d1834b3b..6e6ad06c 100644
--- a/src/rabbit_control.erl
+++ b/src/rabbit_control.erl
@@ -44,7 +44,7 @@
 
 -spec(start/0 :: () -> no_return()).
 -spec(stop/0 :: () -> 'ok').
--spec(action/4 :: (atom(), erlang_node(), [string()],
+-spec(action/4 :: (atom(), node(), [string()],
                    fun ((string(), [any()]) -> 'ok')) -> 'ok').
 -spec(usage/0 :: () -> no_return()).
 
@@ -59,8 +59,8 @@ start() ->
         parse_args(FullCommand, #params{quiet = false,
                                         node = rabbit_misc:makenode(NodeStr)}),
     Inform = case Quiet of
-                 true  -> fun(_Format, _Args1) -> ok end;
-                 false -> fun(Format, Args1) ->
+                 true  -> fun (_Format, _Args1) -> ok end;
+                 false -> fun (Format, Args1) ->
                                   io:format(Format ++ " ...~n", Args1)
                          end
              end,
@@ -160,6 +160,12 @@ action(cluster, Node, ClusterNodeSs, Inform) ->
               [Node, ClusterNodes]),
     rpc_call(Node, rabbit_mnesia, cluster, [ClusterNodes]);
 
+action(force_cluster, Node, ClusterNodeSs, Inform) ->
+    ClusterNodes = lists:map(fun list_to_atom/1, ClusterNodeSs),
+    Inform("Forcefully clustering node ~p with ~p (ignoring offline nodes)",
+              [Node, ClusterNodes]),
+    rpc_call(Node, rabbit_mnesia, force_cluster, [ClusterNodes]);
+
 action(status, Node, [], Inform) ->
     Inform("Status of node ~p", [Node]),
     case call(Node, {rabbit, status, []}) of
diff --git a/src/rabbit_dialyzer.erl b/src/rabbit_dialyzer.erl
index f19e8d02..51bd6b1f 100644
--- a/src/rabbit_dialyzer.erl
+++ b/src/rabbit_dialyzer.erl
@@ -30,17 +30,17 @@
 %%
 
 -module(rabbit_dialyzer).
--include("rabbit.hrl").
 
--export([create_basic_plt/1, add_to_plt/2, dialyze_files/2, halt_with_code/1]).
+-export([create_basic_plt/1, add_to_plt/2, dialyze_files/2,
+         halt_with_code/1]).
 
 %%----------------------------------------------------------------------------
 
 -ifdef(use_specs).
 
--spec(create_basic_plt/1 :: (file_path()) -> 'ok').
--spec(add_to_plt/2 :: (file_path(), string()) -> 'ok').
--spec(dialyze_files/2 :: (file_path(), string()) -> 'ok').
+-spec(create_basic_plt/1 :: (file:filename()) -> 'ok').
+-spec(add_to_plt/2 :: (file:filename(), string()) -> 'ok').
+-spec(dialyze_files/2 :: (file:filename(), string()) -> 'ok').
 -spec(halt_with_code/1 :: (atom()) -> no_return()).
 
 -endif.
@@ -56,7 +56,7 @@ create_basic_plt(BasicPltPath) ->
     ok.
 
 add_to_plt(PltPath, FilesString) ->
-    {ok, Files} = regexp:split(FilesString, " "),
+    Files = string:tokens(FilesString, " "),
     DialyzerWarnings = dialyzer:run([{analysis_type, plt_add},
                                      {init_plt, PltPath},
                                      {output_plt, PltPath},
@@ -65,7 +65,7 @@ add_to_plt(PltPath, FilesString) ->
     ok.
 
 dialyze_files(PltPath, ModifiedFiles) ->
-    {ok, Files} = regexp:split(ModifiedFiles, " "),
+    Files = string:tokens(ModifiedFiles, " "),
     DialyzerWarnings = dialyzer:run([{init_plt, PltPath},
                                      {files, Files}]),
     case DialyzerWarnings of
diff --git a/src/rabbit_error_logger.erl b/src/rabbit_error_logger.erl
index e9baf2c4..42861f86 100644
--- a/src/rabbit_error_logger.erl
+++ b/src/rabbit_error_logger.erl
@@ -39,7 +39,8 @@
 
 -export([boot/0]).
 
--export([init/1, terminate/2, code_change/3, handle_call/2, handle_event/2, handle_info/2]).
+-export([init/1, terminate/2, code_change/3, handle_call/2, handle_event/2,
+         handle_info/2]).
 
 boot() ->
     {ok, DefaultVHost} = application:get_env(default_vhost),
diff --git a/src/rabbit_error_logger_file_h.erl b/src/rabbit_error_logger_file_h.erl
index 45b66712..875d680f 100644
--- a/src/rabbit_error_logger_file_h.erl
+++ b/src/rabbit_error_logger_file_h.erl
@@ -33,7 +33,8 @@
 
 -behaviour(gen_event).
 
--export([init/1, handle_event/2, handle_call/2, handle_info/2, terminate/2, code_change/3]).
+-export([init/1, handle_event/2, handle_call/2, handle_info/2, terminate/2,
+         code_change/3]).
 
 %% rabbit_error_logger_file_h is a wrapper around the error_logger_file_h
 %% module because the original's init/1 does not match properly
diff --git a/src/rabbit_exchange.erl b/src/rabbit_exchange.erl
index 8f41392f..49f87a22 100644
--- a/src/rabbit_exchange.erl
+++ b/src/rabbit_exchange.erl
@@ -33,13 +33,14 @@
 -include("rabbit.hrl").
 -include("rabbit_framing.hrl").
 
--export([recover/0, declare/5, lookup/1, lookup_or_die/1,
-         list/1, info_keys/0, info/1, info/2, info_all/1, info_all/2,
-         publish/2]).
--export([add_binding/4, delete_binding/4, list_bindings/1]).
+-export([recover/0, declare/5, lookup/1, lookup_or_die/1, list/1, info_keys/0,
+         info/1, info/2, info_all/1, info_all/2, publish/2]).
+-export([add_binding/5, delete_binding/5, list_bindings/1]).
 -export([delete/2]).
 -export([delete_queue_bindings/1, delete_transient_queue_bindings/1]).
--export([check_type/1, assert_type/2]).
+-export([assert_equivalence/5]).
+-export([assert_args_equivalence/2]).
+-export([check_type/1]).
 
 %% EXTENDED API
 -export([list_exchange_bindings/1]).
@@ -48,61 +49,93 @@
 -import(mnesia).
 -import(sets).
 -import(lists).
--import(regexp).
 
 %%----------------------------------------------------------------------------
 
 -ifdef(use_specs).
 
--type(bind_res() :: 'ok' | {'error',
-                            'queue_not_found' |
-                            'exchange_not_found' |
-                            'exchange_and_queue_not_found'}).
+-export_type([name/0, type/0, binding_key/0]).
+
+-type(name() :: rabbit_types:r('exchange')).
+-type(type() :: atom()).
+-type(binding_key() :: binary()).
+
+-type(bind_res() :: rabbit_types:ok_or_error('queue_not_found' |
+                                             'exchange_not_found' |
+                                             'exchange_and_queue_not_found')).
+-type(inner_fun() ::
+        fun((rabbit_types:exchange(), queue()) ->
+                   rabbit_types:ok_or_error(rabbit_types:amqp_error()))).
+
 -spec(recover/0 :: () -> 'ok').
--spec(declare/5 :: (exchange_name(), exchange_type(), boolean(), boolean(),
-                    amqp_table()) -> exchange()).
+-spec(declare/5 ::
+        (name(), type(), boolean(), boolean(), rabbit_framing:amqp_table())
+        -> rabbit_types:exchange()).
 -spec(check_type/1 :: (binary()) -> atom()).
--spec(assert_type/2 :: (exchange(), atom()) -> 'ok').
--spec(lookup/1 :: (exchange_name()) -> {'ok', exchange()} | not_found()).
--spec(lookup_or_die/1 :: (exchange_name()) -> exchange()).
--spec(list/1 :: (vhost()) -> [exchange()]).
--spec(info_keys/0 :: () -> [info_key()]).
--spec(info/1 :: (exchange()) -> [info()]).
--spec(info/2 :: (exchange(), [info_key()]) -> [info()]).
--spec(info_all/1 :: (vhost()) -> [[info()]]).
--spec(info_all/2 :: (vhost(), [info_key()]) -> [[info()]]).
--spec(publish/2 :: (exchange(), delivery()) -> {routing_result(), [pid()]}).
--spec(add_binding/4 ::
-      (exchange_name(), queue_name(), routing_key(), amqp_table()) ->
-             bind_res() | {'error', 'durability_settings_incompatible'}).
--spec(delete_binding/4 ::
-      (exchange_name(), queue_name(), routing_key(), amqp_table()) ->
-             bind_res() | {'error', 'binding_not_found'}).
--spec(list_bindings/1 :: (vhost()) ->
-             [{exchange_name(), queue_name(), routing_key(), amqp_table()}]).
--spec(delete_queue_bindings/1 :: (queue_name()) -> fun(() -> none())).
--spec(delete_transient_queue_bindings/1 :: (queue_name()) -> fun(() -> none())).
--spec(delete/2 :: (exchange_name(), boolean()) ->
-             'ok' | not_found() | {'error', 'in_use'}).
--spec(list_queue_bindings/1 :: (queue_name()) ->
-              [{exchange_name(), routing_key(), amqp_table()}]).
--spec(list_exchange_bindings/1 :: (exchange_name()) ->
-              [{queue_name(), routing_key(), amqp_table()}]).
+-spec(assert_equivalence/5 ::
+        (rabbit_types:exchange(), atom(), boolean(), boolean(),
+         rabbit_framing:amqp_table())
+        -> 'ok' | no_return()).
+-spec(assert_args_equivalence/2 ::
+        (rabbit_types:exchange(), rabbit_framing:amqp_table()) ->
+                                        'ok' | no_return()).
+-spec(lookup/1 ::
+        (name()) -> rabbit_types:ok(rabbit_types:exchange()) |
+                    rabbit_types:error('not_found')).
+-spec(lookup_or_die/1 :: (name()) -> rabbit_types:exchange()).
+-spec(list/1 :: (rabbit_types:vhost()) -> [rabbit_types:exchange()]).
+-spec(info_keys/0 :: () -> [rabbit_types:info_key()]).
+-spec(info/1 :: (rabbit_types:exchange()) -> [rabbit_types:info()]).
+-spec(info/2 ::
+        (rabbit_types:exchange(), [rabbit_types:info_key()])
+        -> [rabbit_types:info()]).
+-spec(info_all/1 :: (rabbit_types:vhost()) -> [[rabbit_types:info()]]).
+-spec(info_all/2 ::(rabbit_types:vhost(), [rabbit_types:info_key()])
+                    -> [[rabbit_types:info()]]).
+-spec(publish/2 :: (rabbit_types:exchange(), rabbit_types:delivery())
+                   -> {rabbit_router:routing_result(), [pid()]}).
+-spec(add_binding/5 ::
+        (name(), rabbit_amqqueue:name(), rabbit_router:routing_key(),
+         rabbit_framing:amqp_table(), inner_fun())
+        -> bind_res()).
+-spec(delete_binding/5 ::
+        (name(), rabbit_amqqueue:name(), rabbit_router:routing_key(),
+         rabbit_framing:amqp_table(), inner_fun())
+        -> bind_res() | rabbit_types:error('binding_not_found')).
+-spec(list_bindings/1 ::
+        (rabbit_types:vhost())
+        -> [{name(), rabbit_amqqueue:name(), rabbit_router:routing_key(),
+             rabbit_framing:amqp_table()}]).
+-spec(delete_queue_bindings/1 ::
+        (rabbit_amqqueue:name()) -> fun (() -> none())).
+-spec(delete_transient_queue_bindings/1 ::
+        (rabbit_amqqueue:name()) -> fun (() -> none())).
+-spec(delete/2 ::
+        (name(), boolean())-> 'ok' |
+                              rabbit_types:error('not_found') |
+                              rabbit_types:error('in_use')).
+-spec(list_queue_bindings/1 ::
+        (rabbit_amqqueue:name())
+        -> [{name(), rabbit_router:routing_key(),
+             rabbit_framing:amqp_table()}]).
+-spec(list_exchange_bindings/1 ::
+        (name()) -> [{rabbit_amqqueue:name(), rabbit_router:routing_key(),
+                      rabbit_framing:amqp_table()}]).
 
 -endif.
 
 %%----------------------------------------------------------------------------
 
--define(INFO_KEYS, [name, type, durable, auto_delete, arguments].
+-define(INFO_KEYS, [name, type, durable, auto_delete, arguments]).
 
 recover() ->
     Exs = rabbit_misc:table_fold(
-            fun(Exchange, Acc) ->
+            fun (Exchange, Acc) ->
                     ok = mnesia:write(rabbit_exchange, Exchange, write),
                     [Exchange | Acc]
             end, [], rabbit_durable_exchange),
     Bs = rabbit_misc:table_fold(
-           fun(Route = #route{binding = B}, Acc) ->
+           fun (Route = #route{binding = B}, Acc) ->
                    {_, ReverseRoute} = route_with_reverse(Route),
                    ok = mnesia:write(rabbit_route,
                                      Route, write),
@@ -182,13 +215,26 @@ check_type(TypeBin) ->
             T
     end.
 
-assert_type(#exchange{ type = ActualType }, RequiredType)
-  when ActualType == RequiredType ->
-    ok;
-assert_type(#exchange{ name = Name, type = ActualType }, RequiredType) ->
+assert_equivalence(X = #exchange{ durable = Durable,
+                                  auto_delete = AutoDelete,
+                                  type = Type},
+                   Type, Durable, AutoDelete, RequiredArgs) ->
+    (type_to_module(Type)):assert_args_equivalence(X, RequiredArgs);
+assert_equivalence(#exchange{ name = Name }, _Type, _Durable, _AutoDelete,
+                   _Args) ->
     rabbit_misc:protocol_error(
-      not_allowed, "cannot redeclare ~s of type '~s' with type '~s'",
-      [rabbit_misc:rs(Name), ActualType, RequiredType]).
+      not_allowed,
+      "cannot redeclare ~s with different type, durable or autodelete value",
+      [rabbit_misc:rs(Name)]).
+
+assert_args_equivalence(#exchange{ name = Name,
+                                   arguments = Args },
+                        RequiredArgs) ->
+    %% The spec says "Arguments are compared for semantic
+    %% equivalence".  The only arg we care about is
+    %% "alternate-exchange".
+    rabbit_misc:assert_args_equivalence(Args, RequiredArgs, Name,
+                                        [<<"alternate-exchange">>]).
 
 lookup(Name) ->
     rabbit_misc:dirty_read({rabbit_exchange, Name}).
@@ -305,7 +351,7 @@ delete_queue_bindings(QueueName, FwdDeleteFun) ->
                       Module = type_to_module(Type),
                       case IsDeleted of
                           auto_deleted -> Module:delete(X, Bs);
-                          no_delete    -> Module:remove_bindings(X, Bs)
+                          not_deleted  -> Module:remove_bindings(X, Bs)
                       end
               end, Cleanup)
     end.
@@ -332,7 +378,6 @@ cleanup_deleted_queue_bindings1(ExchangeName, Bindings) ->
     [X] = mnesia:read({rabbit_exchange, ExchangeName}),
     {maybe_auto_delete(X), Bindings}.
 
-
 delete_forward_routes(Route) ->
     ok = mnesia:delete_object(rabbit_route, Route, write),
     ok = mnesia:delete_object(rabbit_durable_route, Route, write).
@@ -349,7 +394,7 @@ continue({[], Continuation}) -> continue(mnesia:select(Continuation)).
 
 call_with_exchange(Exchange, Fun) ->
     rabbit_misc:execute_mnesia_transaction(
-      fun() -> case mnesia:read({rabbit_exchange, Exchange}) of
+      fun () -> case mnesia:read({rabbit_exchange, Exchange}) of
                    []  -> {error, not_found};
                    [X] -> Fun(X)
                end
@@ -357,7 +402,7 @@ call_with_exchange(Exchange, Fun) ->
 
 call_with_exchange_and_queue(Exchange, Queue, Fun) ->
     rabbit_misc:execute_mnesia_transaction(
-      fun() -> case {mnesia:read({rabbit_exchange, Exchange}),
+      fun () -> case {mnesia:read({rabbit_exchange, Exchange}),
                      mnesia:read({rabbit_queue, Queue})} of
                    {[X], [Q]} -> Fun(X, Q);
                    {[ ], [_]} -> {error, exchange_not_found};
@@ -366,50 +411,66 @@ call_with_exchange_and_queue(Exchange, Queue, Fun) ->
                end
       end).
 
-add_binding(ExchangeName, QueueName, RoutingKey, Arguments) ->
+add_binding(ExchangeName, QueueName, RoutingKey, Arguments, InnerFun) ->
     case binding_action(
            ExchangeName, QueueName, RoutingKey, Arguments,
            fun (X, Q, B) ->
-                   if Q#amqqueue.durable and not(X#exchange.durable) ->
-                           {error, durability_settings_incompatible};
-                      true ->
+                   %% this argument is used to check queue exclusivity;
+                   %% in general, we want to fail on that in preference to
+                   %% anything else
+                   case InnerFun(X, Q) of
+                       ok ->
                            case mnesia:read({rabbit_route, B}) of
                                [] ->
-                                   sync_binding(B, Q#amqqueue.durable,
-                                                fun mnesia:write/3),
+                                   ok = sync_binding(B,
+                                                     X#exchange.durable andalso
+                                                     Q#amqqueue.durable,
+                                                     fun mnesia:write/3),
                                    {new, X, B};
                                [_R] ->
                                    {existing, X, B}
-                           end
+                           end;
+                       {error, _} = E ->
+                           E
                    end
            end) of
         {new, Exchange = #exchange{ type = Type }, Binding} ->
             (type_to_module(Type)):add_binding(Exchange, Binding);
         {existing, _, _} ->
             ok;
-        Err = {error, _}  ->
+        {error, _} = Err ->
             Err
     end.
 
-delete_binding(ExchangeName, QueueName, RoutingKey, Arguments) ->
+delete_binding(ExchangeName, QueueName, RoutingKey, Arguments, InnerFun) ->
     case binding_action(
            ExchangeName, QueueName, RoutingKey, Arguments,
            fun (X, Q, B) ->
                    case mnesia:match_object(rabbit_route, #route{binding = B},
                                             write) of
-                       [] -> {error, binding_not_found};
-                       _  -> ok = sync_binding(B, Q#amqqueue.durable,
-                                               fun mnesia:delete_object/3),
-                             {maybe_auto_delete(X), B}
+                       [] ->
+                           {error, binding_not_found};
+                       _  ->
+                           case InnerFun(X, Q) of
+                               ok ->
+                                   ok =
+                                       sync_binding(B,
+                                                    X#exchange.durable andalso
+                                                    Q#amqqueue.durable,
+                                                    fun mnesia:delete_object/3),
+                                   {maybe_auto_delete(X), B};
+                               {error, _} = E ->
+                                   E
+                           end
                    end
            end) of
-        Err = {error, _}  ->
+        {error, _} = Err ->
             Err;
-        {{Action, X = #exchange{ type = Type }}, B} ->
+        {{IsDeleted, X = #exchange{ type = Type }}, B} ->
             Module = type_to_module(Type),
-            case Action of
-                auto_delete -> Module:delete(X, [B]);
-                no_delete   -> Module:remove_bindings(X, [B])
+            case IsDeleted of
+                auto_deleted -> Module:delete(X, [B]);
+                not_deleted  -> Module:remove_bindings(X, [B])
             end
     end.
 
@@ -493,10 +554,10 @@ delete(ExchangeName, IfUnused) ->
     end.
 
 maybe_auto_delete(Exchange = #exchange{auto_delete = false}) ->
-    {no_delete, Exchange};
+    {not_deleted, Exchange};
 maybe_auto_delete(Exchange = #exchange{auto_delete = true}) ->
     case conditional_delete(Exchange) of
-        {error, in_use}         -> {no_delete, Exchange};
+        {error, in_use}         -> {not_deleted, Exchange};
         {deleted, Exchange, []} -> {auto_deleted, Exchange}
     end.
 
diff --git a/src/rabbit_exchange_type.erl b/src/rabbit_exchange_type.erl
index a8c071e6..85760edc 100644
--- a/src/rabbit_exchange_type.erl
+++ b/src/rabbit_exchange_type.erl
@@ -18,11 +18,11 @@
 %%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
 %%   Technologies LLC, and Rabbit Technologies Ltd.
 %%
-%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift
 %%   Ltd. Portions created by Cohesive Financial Technologies LLC are
-%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
+%%   Copyright (C) 2007-2010 Cohesive Financial Technologies
 %%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
-%%   (C) 2007-2009 Rabbit Technologies Ltd.
+%%   (C) 2007-2010 Rabbit Technologies Ltd.
 %%
 %%   All Rights Reserved.
 %%
@@ -54,7 +54,11 @@ behaviour_info(callbacks) ->
      {add_binding, 2},
 
      %% called after bindings have been deleted.
-     {remove_bindings, 2}
+     {remove_bindings, 2},
+
+     %% called when comparing exchanges for equivalence - should return ok or
+     %% exit with #amqp_error{}
+     {assert_args_equivalence, 2}
 
     ];
 behaviour_info(_Other) ->
diff --git a/src/rabbit_exchange_type_direct.erl b/src/rabbit_exchange_type_direct.erl
index 9b71e0e1..4f6eb851 100644
--- a/src/rabbit_exchange_type_direct.erl
+++ b/src/rabbit_exchange_type_direct.erl
@@ -18,11 +18,11 @@
 %%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
 %%   Technologies LLC, and Rabbit Technologies Ltd.
 %%
-%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift
 %%   Ltd. Portions created by Cohesive Financial Technologies LLC are
-%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
+%%   Copyright (C) 2007-2010 Cohesive Financial Technologies
 %%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
-%%   (C) 2007-2009 Rabbit Technologies Ltd.
+%%   (C) 2007-2010 Rabbit Technologies Ltd.
 %%
 %%   All Rights Reserved.
 %%
@@ -36,7 +36,7 @@
 
 -export([description/0, publish/2]).
 -export([validate/1, create/1, recover/2, delete/2,
-         add_binding/2, remove_bindings/2]).
+         add_binding/2, remove_bindings/2, assert_args_equivalence/2]).
 -include("rabbit_exchange_type_spec.hrl").
 
 -rabbit_boot_step({?MODULE,
@@ -61,3 +61,5 @@ recover(_X, _Bs) -> ok.
 delete(_X, _Bs) -> ok.
 add_binding(_X, _B) -> ok.
 remove_bindings(_X, _Bs) -> ok.
+assert_args_equivalence(X, Args) ->
+    rabbit_exchange:assert_args_equivalence(X, Args).
diff --git a/src/rabbit_exchange_type_fanout.erl b/src/rabbit_exchange_type_fanout.erl
index 311654ab..94798c78 100644
--- a/src/rabbit_exchange_type_fanout.erl
+++ b/src/rabbit_exchange_type_fanout.erl
@@ -18,11 +18,11 @@
 %%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
 %%   Technologies LLC, and Rabbit Technologies Ltd.
 %%
-%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift
 %%   Ltd. Portions created by Cohesive Financial Technologies LLC are
-%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
+%%   Copyright (C) 2007-2010 Cohesive Financial Technologies
 %%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
-%%   (C) 2007-2009 Rabbit Technologies Ltd.
+%%   (C) 2007-2010 Rabbit Technologies Ltd.
 %%
 %%   All Rights Reserved.
 %%
@@ -35,8 +35,8 @@
 -behaviour(rabbit_exchange_type).
 
 -export([description/0, publish/2]).
--export([validate/1, create/1, recover/2, delete/2,
-         add_binding/2, remove_bindings/2]).
+-export([validate/1, create/1, recover/2, delete/2, add_binding/2,
+         remove_bindings/2, assert_args_equivalence/2]).
 -include("rabbit_exchange_type_spec.hrl").
 
 -rabbit_boot_step({?MODULE,
@@ -59,3 +59,5 @@ recover(_X, _Bs) -> ok.
 delete(_X, _Bs) -> ok.
 add_binding(_X, _B) -> ok.
 remove_bindings(_X, _Bs) -> ok.
+assert_args_equivalence(X, Args) ->
+    rabbit_exchange:assert_args_equivalence(X, Args).
diff --git a/src/rabbit_exchange_type_headers.erl b/src/rabbit_exchange_type_headers.erl
index 285dab1a..44607398 100644
--- a/src/rabbit_exchange_type_headers.erl
+++ b/src/rabbit_exchange_type_headers.erl
@@ -18,11 +18,11 @@
 %%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
 %%   Technologies LLC, and Rabbit Technologies Ltd.
 %%
-%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift
 %%   Ltd. Portions created by Cohesive Financial Technologies LLC are
-%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
+%%   Copyright (C) 2007-2010 Cohesive Financial Technologies
 %%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
-%%   (C) 2007-2009 Rabbit Technologies Ltd.
+%%   (C) 2007-2010 Rabbit Technologies Ltd.
 %%
 %%   All Rights Reserved.
 %%
@@ -36,8 +36,8 @@
 -behaviour(rabbit_exchange_type).
 
 -export([description/0, publish/2]).
--export([validate/1, create/1, recover/2, delete/2,
-         add_binding/2, remove_bindings/2]).
+-export([validate/1, create/1, recover/2, delete/2, add_binding/2,
+         remove_bindings/2, assert_args_equivalence/2]).
 -include("rabbit_exchange_type_spec.hrl").
 
 -rabbit_boot_step({?MODULE,
@@ -48,7 +48,8 @@
                     {enables,     kernel_ready}]}).
 
 -ifdef(use_specs).
--spec(headers_match/2 :: (amqp_table(), amqp_table()) -> boolean()).
+-spec(headers_match/2 :: (rabbit_framing:amqp_table(),
+                          rabbit_framing:amqp_table()) -> boolean()).
 -endif.
 
 description() ->
@@ -135,3 +136,5 @@ recover(_X, _Bs) -> ok.
 delete(_X, _Bs) -> ok.
 add_binding(_X, _B) -> ok.
 remove_bindings(_X, _Bs) -> ok.
+assert_args_equivalence(X, Args) ->
+    rabbit_exchange:assert_args_equivalence(X, Args).
diff --git a/src/rabbit_exchange_type_registry.erl b/src/rabbit_exchange_type_registry.erl
index 175d15ad..7906fbee 100644
--- a/src/rabbit_exchange_type_registry.erl
+++ b/src/rabbit_exchange_type_registry.erl
@@ -18,11 +18,11 @@
 %%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
 %%   Technologies LLC, and Rabbit Technologies Ltd.
 %%
-%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift
 %%   Ltd. Portions created by Cohesive Financial Technologies LLC are
-%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
+%%   Copyright (C) 2007-2010 Cohesive Financial Technologies
 %%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
-%%   (C) 2007-2009 Rabbit Technologies Ltd.
+%%   (C) 2007-2010 Rabbit Technologies Ltd.
 %%
 %%   All Rights Reserved.
 %%
@@ -35,8 +35,8 @@
 
 -export([start_link/0]).
 
--export([init/1, handle_call/3, handle_cast/2, handle_info/2,
-         terminate/2, code_change/3]).
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2,
+         code_change/3]).
 
 -export([register/2, binary_to_type/1, lookup_module/1]).
 
@@ -45,10 +45,13 @@
 
 -ifdef(use_specs).
 
--spec(start_link/0 :: () -> 'ignore' | {'error', term()} | {'ok', pid()}).
+-spec(start_link/0 ::
+        () -> 'ignore' | rabbit_types:ok_or_error2(pid(), term())).
 -spec(register/2 :: (binary(), atom()) -> 'ok').
--spec(binary_to_type/1 :: (binary()) -> atom() | {'error', 'not_found'}).
--spec(lookup_module/1 :: (atom()) -> {'ok', atom()} | {'error', 'not_found'}).
+-spec(binary_to_type/1 ::
+        (binary()) -> atom() | rabbit_types:error('not_found')).
+-spec(lookup_module/1 ::
+        (atom()) -> rabbit_types:ok_or_error2(atom(), 'not_found')).
 
 -endif.
 
diff --git a/src/rabbit_exchange_type_topic.erl b/src/rabbit_exchange_type_topic.erl
index 8a3dceea..89b2441e 100644
--- a/src/rabbit_exchange_type_topic.erl
+++ b/src/rabbit_exchange_type_topic.erl
@@ -18,11 +18,11 @@
 %%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
 %%   Technologies LLC, and Rabbit Technologies Ltd.
 %%
-%%   Portions created by LShift Ltd are Copyright (C) 2007-2009 LShift
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift
 %%   Ltd. Portions created by Cohesive Financial Technologies LLC are
-%%   Copyright (C) 2007-2009 Cohesive Financial Technologies
+%%   Copyright (C) 2007-2010 Cohesive Financial Technologies
 %%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
-%%   (C) 2007-2009 Rabbit Technologies Ltd.
+%%   (C) 2007-2010 Rabbit Technologies Ltd.
 %%
 %%   All Rights Reserved.
 %%
@@ -35,8 +35,8 @@
 -behaviour(rabbit_exchange_type).
 
 -export([description/0, publish/2]).
--export([validate/1, create/1, recover/2, delete/2,
-         add_binding/2, remove_bindings/2]).
+-export([validate/1, create/1, recover/2, delete/2, add_binding/2,
+         remove_bindings/2, assert_args_equivalence/2]).
 -include("rabbit_exchange_type_spec.hrl").
 
 -rabbit_boot_step({?MODULE,
@@ -49,7 +49,9 @@
 -export([topic_matches/2]).
 
 -ifdef(use_specs).
+
 -spec(topic_matches/2 :: (binary(), binary()) -> boolean()).
+
 -endif.
 
 description() ->
@@ -65,8 +67,7 @@ publish(#exchange{name = Name}, Delivery =
                           Delivery).
 
 split_topic_key(Key) ->
-    {ok, KeySplit} = regexp:split(binary_to_list(Key), "\\."),
-    KeySplit.
+    re:split(Key, "\\.", [{return, list}]).
 
 topic_matches(PatternKey, RoutingKey) ->
     P = split_topic_key(PatternKey),
@@ -99,3 +100,5 @@ recover(_X, _Bs) -> ok.
 delete(_X, _Bs) -> ok.
 add_binding(_X, _B) -> ok.
 remove_bindings(_X, _Bs) -> ok.
+assert_args_equivalence(X, Args) ->
+    rabbit_exchange:assert_args_equivalence(X, Args).
diff --git a/src/rabbit_framing_channel.erl b/src/rabbit_framing_channel.erl
index b7c6aa96..00b74ad0 100644
--- a/src/rabbit_framing_channel.erl
+++ b/src/rabbit_framing_channel.erl
@@ -32,21 +32,22 @@
 -module(rabbit_framing_channel).
 -include("rabbit.hrl").
 
--export([start_link/2, process/2, shutdown/1]).
+-export([start_link/3, process/2, shutdown/1]).
 
 %% internal
--export([mainloop/1]).
+-export([mainloop/2]).
 
 %%--------------------------------------------------------------------
 
-start_link(StartFun, StartArgs) ->
-    spawn_link(
-      fun () ->
-              %% we trap exits so that a normal termination of the
-              %% channel or reader process terminates us too.
-              process_flag(trap_exit, true),
-              mainloop(apply(StartFun, StartArgs))
-      end).
+start_link(StartFun, StartArgs, Protocol) ->
+    {ok, spawn_link(
+           fun () ->
+                   %% we trap exits so that a normal termination of
+                   %% the channel or reader process terminates us too.
+                   process_flag(trap_exit, true),
+                   {ok, ChannelPid} = apply(StartFun, StartArgs),
+                   mainloop(ChannelPid, Protocol)
+           end)}.
 
 process(Pid, Frame) ->
     Pid ! {frame, Frame},
@@ -72,39 +73,42 @@ read_frame(ChannelPid) ->
         Msg                    -> exit({unexpected_message, Msg})
     end.
 
-mainloop(ChannelPid) ->
-    {method, MethodName, FieldsBin} = read_frame(ChannelPid),
-    Method = rabbit_framing:decode_method_fields(MethodName, FieldsBin),
-    case rabbit_framing:method_has_content(MethodName) of
-        true  -> rabbit_channel:do(ChannelPid, Method,
-                                   collect_content(ChannelPid, MethodName));
-        false -> rabbit_channel:do(ChannelPid, Method)
-    end,
-    ?MODULE:mainloop(ChannelPid).
+mainloop(ChannelPid, Protocol) ->
+    case read_frame(ChannelPid) of
+        {method, MethodName, FieldsBin} ->
+            Method = Protocol:decode_method_fields(MethodName, FieldsBin),
+            case Protocol:method_has_content(MethodName) of
+                true  -> {ClassId, _MethodId} = Protocol:method_id(MethodName),
+                         rabbit_channel:do(ChannelPid, Method,
+                                           collect_content(ChannelPid,
+                                                           ClassId,
+                                                           Protocol));
+                false -> rabbit_channel:do(ChannelPid, Method)
+            end,
+            ?MODULE:mainloop(ChannelPid, Protocol);
+        _ ->
+            unexpected_frame("expected method frame, "
+                             "got non method frame instead",
+                             [])
+    end.
 
-collect_content(ChannelPid, MethodName) ->
-    {ClassId, _MethodId} = rabbit_framing:method_id(MethodName),
+collect_content(ChannelPid, ClassId, Protocol) ->
     case read_frame(ChannelPid) of
-        {content_header, HeaderClassId, 0, BodySize, PropertiesBin} ->
-            if HeaderClassId == ClassId ->
-                    Payload = collect_content_payload(ChannelPid, BodySize, []),
-                    #content{class_id = ClassId,
-                             properties = none,
-                             properties_bin = PropertiesBin,
-                             payload_fragments_rev = Payload};
-               true ->
-                    rabbit_misc:protocol_error(
-                      command_invalid,
-                      "expected content header for class ~w, "
-                      "got one for class ~w instead",
-                      [ClassId, HeaderClassId])
-            end;
+        {content_header, ClassId, 0, BodySize, PropertiesBin} ->
+            Payload = collect_content_payload(ChannelPid, BodySize, []),
+            #content{class_id = ClassId,
+                     properties = none,
+                     properties_bin = PropertiesBin,
+                     protocol = Protocol,
+                     payload_fragments_rev = Payload};
+        {content_header, HeaderClassId, 0, _BodySize, _PropertiesBin} ->
+            unexpected_frame("expected content header for class ~w, "
+                             "got one for class ~w instead",
+                             [ClassId, HeaderClassId]);
         _ ->
-            rabbit_misc:protocol_error(
-              command_invalid,
-              "expected content header for class ~w, "
-              "got non content header frame instead",
-              [ClassId])
+            unexpected_frame("expected content header for class ~w, "
+                             "got non content header frame instead",
+                             [ClassId])
     end.
 
 collect_content_payload(_ChannelPid, 0, Acc) ->
@@ -116,8 +120,10 @@ collect_content_payload(ChannelPid, RemainingByteCount, Acc) ->
                                     RemainingByteCount - size(FragmentBin),
                                     [FragmentBin | Acc]);
         _ ->
-            rabbit_misc:protocol_error(
-              command_invalid,
-              "expected content body, got non content body frame instead",
-              [])
+            unexpected_frame("expected content body, "
+                             "got non content body frame instead",
+                             [])
     end.
+
+unexpected_frame(ExplanationFormat, Params) ->
+    rabbit_misc:protocol_error(unexpected_frame, ExplanationFormat, Params).
diff --git a/src/rabbit_guid.erl b/src/rabbit_guid.erl
index 1ae8f7da..af1c629f 100644
--- a/src/rabbit_guid.erl
+++ b/src/rabbit_guid.erl
@@ -31,15 +31,13 @@
 
 -module(rabbit_guid).
 
--include("rabbit.hrl").
-
 -behaviour(gen_server).
 
 -export([start_link/0]).
 -export([guid/0, string_guid/1, binstring_guid/1]).
 
--export([init/1, handle_call/3, handle_cast/2, handle_info/2,
-         terminate/2, code_change/3]).
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2,
+         code_change/3]).
 
 -define(SERVER, ?MODULE).
 -define(SERIAL_FILENAME, "rabbit_serial").
@@ -50,7 +48,11 @@
 
 -ifdef(use_specs).
 
--spec(start_link/0 :: () -> {'ok', pid()} | 'ignore' | {'error', any()}).
+-export_type([guid/0]).
+
+-type(guid() :: binary()).
+
+-spec(start_link/0 :: () -> 'ignore' | rabbit_types:ok_or_error2(pid(), any())).
 -spec(guid/0 :: () -> guid()).
 -spec(string_guid/1 :: (any()) -> string()).
 -spec(binstring_guid/1 :: (any()) -> binary()).
diff --git a/src/rabbit_heartbeat.erl b/src/rabbit_heartbeat.erl
index 45565705..1989fb7b 100644
--- a/src/rabbit_heartbeat.erl
+++ b/src/rabbit_heartbeat.erl
@@ -33,68 +33,72 @@
 
 -export([start_heartbeat/2]).
 
+%%----------------------------------------------------------------------------
+
+-ifdef(use_specs).
+
+-spec(start_heartbeat/2 :: (rabbit_net:socket(), non_neg_integer()) ->
+                                rabbit_types:maybe({pid(), pid()})).
+
+-endif.
+
+%%----------------------------------------------------------------------------
+
 start_heartbeat(_Sock, 0) ->
     none;
 start_heartbeat(Sock, TimeoutSec) ->
     Parent = self(),
-    %% we check for incoming data every interval, and time out after
-    %% two checks with no change. As a result we will time out between
-    %% 2 and 3 intervals after the last data has been received.
-    spawn_link(fun () -> heartbeater(Sock, TimeoutSec * 1000,
-                                     recv_oct, 1,
-                                     fun () ->
-                                             Parent ! timeout,
-                                             stop
-                                     end,
-                                     erlang:monitor(process, Parent)) end),
     %% the 'div 2' is there so that we don't end up waiting for nearly
     %% 2 * TimeoutSec before sending a heartbeat in the boundary case
     %% where the last message was sent just after a heartbeat.
-    spawn_link(fun () -> heartbeater(Sock, TimeoutSec * 1000 div 2,
-                                     send_oct, 0,
-                                     fun () ->
-                                             catch rabbit_net:send(Sock, rabbit_binary_generator:build_heartbeat_frame()),
-                                             continue
-                                     end,
-                                     erlang:monitor(process, Parent)) end),
-    ok.
+    Sender =
+        spawn_link(fun () -> heartbeater({Sock, TimeoutSec * 1000 div 2,
+                                          send_oct, 0,
+                                          fun () ->
+                                                  catch rabbit_net:send(Sock, rabbit_binary_generator:build_heartbeat_frame()),
+                                                  continue
+                                          end}, Parent) end),
+    %% we check for incoming data every interval, and time out after
+    %% two checks with no change. As a result we will time out between
+    %% 2 and 3 intervals after the last data has been received.
+    Receiver =
+        spawn_link(fun () -> heartbeater({Sock, TimeoutSec * 1000,
+                                          recv_oct, 1,
+                                          fun () ->
+                                                  Parent ! timeout,
+                                                  stop
+                                          end}, Parent) end),
+    {Sender, Receiver}.
 
-%% Y-combinator, posted by Vladimir Sekissov to the Erlang mailing list
-%% http://www.erlang.org/ml-archive/erlang-questions/200301/msg00053.html
-y(X) ->
-    F = fun (P) -> X(fun (A) -> (P(P))(A) end) end,
-    F(F).
+heartbeater(Params, Parent) ->
+    heartbeater(Params, erlang:monitor(process, Parent), {0, 0}).
 
-heartbeater(Sock, TimeoutMillisec, StatName, Threshold, Handler, MonitorRef) ->
-    Heartbeat =
-        fun (F) ->
-                fun ({StatVal, SameCount}) ->
-                        receive
-                            {'DOWN', MonitorRef, process, _Object, _Info} -> ok;
-                            Other -> exit({unexpected_message, Other})
-                        after TimeoutMillisec ->
-                                case rabbit_net:getstat(Sock, [StatName]) of
-                                    {ok, [{StatName, NewStatVal}]} ->
-                                        if NewStatVal =/= StatVal ->
-                                                F({NewStatVal, 0});
-                                           SameCount < Threshold ->
-                                                F({NewStatVal, SameCount + 1});
-                                           true ->
-                                                case Handler() of
-                                                    stop     -> ok;
-                                                    continue -> F({NewStatVal, 0})
-                                                end
-                                        end;
-                                    {error, einval} ->
-                                        %% the socket is dead, most
-                                        %% likely because the
-                                        %% connection is being shut
-                                        %% down -> terminate
-                                        ok;
-                                    {error, Reason} ->
-                                        exit({cannot_get_socket_stats, Reason})
-                                end
-                        end
-                end
-        end,
-    (y(Heartbeat))({0, 0}).
+heartbeater({Sock, TimeoutMillisec, StatName, Threshold, Handler} = Params,
+            MonitorRef, {StatVal, SameCount}) ->
+    receive
+        {'DOWN', MonitorRef, process, _Object, _Info} ->
+            ok;
+        Other ->
+            exit({unexpected_message, Other})
+    after TimeoutMillisec ->
+            case rabbit_net:getstat(Sock, [StatName]) of
+                {ok, [{StatName, NewStatVal}]} ->
+                    Recurse = fun (V) -> heartbeater(Params, MonitorRef, V) end,
+                    if NewStatVal =/= StatVal ->
+                            Recurse({NewStatVal, 0});
+                       SameCount < Threshold ->
+                            Recurse({NewStatVal, SameCount + 1});
+                       true ->
+                            case Handler() of
+                                stop     -> ok;
+                                continue -> Recurse({NewStatVal, 0})
+                            end
+                    end;
+                {error, einval} ->
+                    %% the socket is dead, most likely because the
+                    %% connection is being shut down -> terminate
+                    ok;
+                {error, Reason} ->
+                    exit({cannot_get_socket_stats, Reason})
+            end
+    end.
diff --git a/src/rabbit_invariable_queue.erl b/src/rabbit_invariable_queue.erl
index b4fd9156..4e0dad84 100644
--- a/src/rabbit_invariable_queue.erl
+++ b/src/rabbit_invariable_queue.erl
@@ -34,25 +34,25 @@
 -export([init/3, terminate/1, delete_and_terminate/1, purge/1, publish/2,
          publish_delivered/3, fetch/2, ack/2, tx_publish/3, tx_ack/3,
          tx_rollback/2, tx_commit/3, requeue/2, len/1, is_empty/1,
-         set_ram_duration_target/2, ram_duration/1, needs_sync/1, sync/1,
-         handle_pre_hibernate/1, status/1]).
+         set_ram_duration_target/2, ram_duration/1, needs_idle_timeout/1,
+         idle_timeout/1, handle_pre_hibernate/1, status/1]).
 
--export([start/1]).
+-export([start/1, stop/0]).
 
 -behaviour(rabbit_backing_queue).
 
 -include("rabbit.hrl").
 
--record(iv_state, { queue, qname, len, pending_ack }).
+-record(iv_state, { queue, qname, durable, len, pending_ack }).
 -record(tx, { pending_messages, pending_acks, is_persistent }).
 
 -ifdef(use_specs).
 
--type(ack() :: guid() | 'blank_ack').
+-type(ack() :: rabbit_guid:guid() | 'blank_ack').
 -type(state() :: #iv_state { queue       :: queue(),
-                             qname       :: queue_name(),
+                             qname       :: rabbit_amqqueue:name(),
                              len         :: non_neg_integer(),
-                             pending_ack :: dict()
+                             pending_ack :: dict:dictionary()
                            }).
 -include("rabbit_backing_queue_spec.hrl").
 
@@ -61,23 +61,31 @@
 start(DurableQueues) ->
     ok = rabbit_sup:start_child(rabbit_persister, [DurableQueues]).
 
+stop() ->
+    ok = rabbit_sup:stop_child(rabbit_persister).
+
 init(QName, IsDurable, Recover) ->
     Q = queue:from_list(case IsDurable andalso Recover of
                             true  -> rabbit_persister:queue_content(QName);
                             false -> []
                         end),
-    #iv_state { queue = Q, qname = QName, len = queue:len(Q),
+    #iv_state { queue       = Q,
+                qname       = QName,
+                durable     = IsDurable,
+                len         = queue:len(Q),
                 pending_ack = dict:new() }.
 
 terminate(State) ->
     State #iv_state { queue = queue:new(), len = 0, pending_ack = dict:new() }.
 
-delete_and_terminate(State = #iv_state { qname = QName, pending_ack = PA }) ->
-    ok = persist_acks(none, QName, dict:fetch_keys(PA), PA),
+delete_and_terminate(State = #iv_state { qname = QName, durable = IsDurable,
+                                         pending_ack = PA }) ->
+    ok = persist_acks(QName, IsDurable, none, dict:fetch_keys(PA), PA),
     {_PLen, State1} = purge(State),
     terminate(State1).
 
-purge(State = #iv_state { len = Len, queue = Q, qname = QName }) ->
+purge(State = #iv_state { queue = Q, qname = QName, durable = IsDurable,
+                          len = Len }) ->
     %% We do not purge messages pending acks.
     {AckTags, PA} =
         rabbit_misc:queue_fold(
@@ -85,57 +93,63 @@ purge(State = #iv_state { len = Len, queue = Q, qname = QName }) ->
                   Acc;
               ({Msg = #basic_message { guid = Guid }, IsDelivered},
                {AckTagsN, PAN}) ->
-                  ok = persist_delivery(QName, Msg, IsDelivered),
+                  ok = persist_delivery(QName, IsDurable, IsDelivered, Msg),
                   {[Guid | AckTagsN], dict:store(Guid, Msg, PAN)}
           end, {[], dict:new()}, Q),
-    ok = persist_acks(none, QName, AckTags, PA),
+    ok = persist_acks(QName, IsDurable, none, AckTags, PA),
     {Len, State #iv_state { len = 0, queue = queue:new() }}.
 
-publish(Msg, State = #iv_state { queue = Q, qname = QName, len = Len }) ->
-    ok = persist_message(none, QName, Msg),
+publish(Msg, State = #iv_state { queue = Q, qname = QName, durable = IsDurable,
+                                 len = Len }) ->
+    ok = persist_message(QName, IsDurable, none, Msg),
     State #iv_state { queue = queue:in({Msg, false}, Q), len = Len + 1 }.
 
 publish_delivered(false, _Msg, State) ->
     {blank_ack, State};
 publish_delivered(true, Msg = #basic_message { guid = Guid },
-                  State = #iv_state { qname = QName, len = 0,
-                                      pending_ack = PA }) ->
-    ok = persist_message(none, QName, Msg),
-    ok = persist_delivery(QName, Msg, false),
+                  State = #iv_state { qname = QName, durable = IsDurable,
+                                      len = 0, pending_ack = PA }) ->
+    ok = persist_message(QName, IsDurable, none, Msg),
+    ok = persist_delivery(QName, IsDurable, false, Msg),
     {Guid, State #iv_state { pending_ack = dict:store(Guid, Msg, PA) }}.
 
 fetch(_AckRequired, State = #iv_state { len = 0 }) ->
     {empty, State};
-fetch(AckRequired, State = #iv_state { queue = Q, qname = QName, len = Len,
+fetch(AckRequired, State = #iv_state { len = Len, queue = Q, qname = QName,
+                                       durable = IsDurable,
                                        pending_ack = PA }) ->
     {{value, {Msg = #basic_message { guid = Guid }, IsDelivered}}, Q1} =
         queue:out(Q),
     Len1 = Len - 1,
-    ok = persist_delivery(QName, Msg, IsDelivered),
+    ok = persist_delivery(QName, IsDurable, IsDelivered, Msg),
     PA1 = dict:store(Guid, Msg, PA),
     {AckTag, PA2} = case AckRequired of
                         true  -> {Guid, PA1};
-                        false -> ok = persist_acks(none, QName, [Guid], PA1),
+                        false -> ok = persist_acks(QName, IsDurable, none,
+                                                   [Guid], PA1),
                                  {blank_ack, PA}
                     end,
     {{Msg, IsDelivered, AckTag, Len1},
      State #iv_state { queue = Q1, len = Len1, pending_ack = PA2 }}.
 
-ack(AckTags, State = #iv_state { qname = QName, pending_ack = PA }) ->
-    ok = persist_acks(none, QName, AckTags, PA),
+ack(AckTags, State = #iv_state { qname = QName, durable = IsDurable,
+                                 pending_ack = PA }) ->
+    ok = persist_acks(QName, IsDurable, none, AckTags, PA),
     PA1 = remove_acks(AckTags, PA),
     State #iv_state { pending_ack = PA1 }.
 
-tx_publish(Txn, Msg, State = #iv_state { qname = QName }) ->
+tx_publish(Txn, Msg, State = #iv_state { qname = QName,
+                                         durable = IsDurable }) ->
     Tx = #tx { pending_messages = Pubs } = lookup_tx(Txn),
     store_tx(Txn, Tx #tx { pending_messages = [Msg | Pubs] }),
-    ok = persist_message(Txn, QName, Msg),
+    ok = persist_message(QName, IsDurable, Txn, Msg),
     State.
 
-tx_ack(Txn, AckTags, State = #iv_state { qname = QName, pending_ack = PA }) ->
+tx_ack(Txn, AckTags, State = #iv_state { qname = QName, durable = IsDurable,
+                                         pending_ack = PA }) ->
     Tx = #tx { pending_acks = Acks } = lookup_tx(Txn),
     store_tx(Txn, Tx #tx { pending_acks = [AckTags | Acks] }),
-    ok = persist_acks(Txn, QName, AckTags, PA),
+    ok = persist_acks(QName, IsDurable, Txn, AckTags, PA),
     State.
 
 tx_rollback(Txn, State = #iv_state { qname = QName }) ->
@@ -186,9 +200,9 @@ set_ram_duration_target(_DurationTarget, State) -> State.
 
 ram_duration(State) -> {0, State}.
 
-needs_sync(_State) -> false.
+needs_idle_timeout(_State) -> false.
 
-sync(State) -> State.
+idle_timeout(State) -> State.
 
 handle_pre_hibernate(State) -> State.
 
@@ -228,32 +242,32 @@ do_if_persistent(F, Txn, QName) ->
 
 %%----------------------------------------------------------------------------
 
-persist_message(_Txn, _QName, #basic_message { is_persistent = false }) ->
-    ok;
-persist_message(Txn, QName, Msg) ->
+persist_message(QName, true, Txn, Msg = #basic_message {
+                                    is_persistent = true }) ->
     Msg1 = Msg #basic_message {
-             %% don't persist any recoverable decoded properties,
-             %% rebuild from properties_bin on restore
+             %% don't persist any recoverable decoded properties
              content = rabbit_binary_parser:clear_decoded_content(
                          Msg #basic_message.content)},
     persist_work(Txn, QName,
-                 [{publish, Msg1, {QName, Msg1 #basic_message.guid}}]).
+                 [{publish, Msg1, {QName, Msg1 #basic_message.guid}}]);
+persist_message(_QName, _IsDurable, _Txn, _Msg) ->
+    ok.
 
-persist_delivery(_QName, #basic_message { is_persistent = false },
-                 _IsDelivered) ->
-    ok;
-persist_delivery(_QName, _Message, true) ->
-    ok;
-persist_delivery(QName, #basic_message { guid = Guid }, _IsDelivered) ->
-    persist_work(none, QName, [{deliver, {QName, Guid}}]).
+persist_delivery(QName, true, false, #basic_message { is_persistent = true,
+                                                      guid = Guid }) ->
+    persist_work(none, QName, [{deliver, {QName, Guid}}]);
+persist_delivery(_QName, _IsDurable, _IsDelivered, _Msg) ->
+    ok.
 
-persist_acks(Txn, QName, AckTags, PA) ->
+persist_acks(QName, true, Txn, AckTags, PA) ->
     persist_work(Txn, QName,
                  [{ack, {QName, Guid}} || Guid <- AckTags,
                                           begin
                                               {ok, Msg} = dict:find(Guid, PA),
                                               Msg #basic_message.is_persistent
-                                          end]).
+                                          end]);
+persist_acks(_QName, _IsDurable, _Txn, _AckTags, _PA) ->
+    ok.
 
 persist_work(_Txn,_QName, []) ->
     ok;
diff --git a/src/rabbit_limiter.erl b/src/rabbit_limiter.erl
index 878af029..813ccc75 100644
--- a/src/rabbit_limiter.erl
+++ b/src/rabbit_limiter.erl
@@ -45,7 +45,7 @@
 
 -type(maybe_pid() :: pid() | 'undefined').
 
--spec(start_link/2 :: (pid(), non_neg_integer()) -> pid()).
+-spec(start_link/2 :: (pid(), non_neg_integer()) -> rabbit_types:ok(pid())).
 -spec(shutdown/1 :: (maybe_pid()) -> 'ok').
 -spec(limit/2 :: (maybe_pid(), non_neg_integer()) -> 'ok' | 'stopped').
 -spec(can_send/3 :: (maybe_pid(), pid(), boolean()) -> boolean()).
@@ -74,8 +74,7 @@
 %%----------------------------------------------------------------------------
 
 start_link(ChPid, UnackedMsgCount) ->
-    {ok, Pid} = gen_server2:start_link(?MODULE, [ChPid, UnackedMsgCount], []),
-    Pid.
+    gen_server2:start_link(?MODULE, [ChPid, UnackedMsgCount], []).
 
 shutdown(undefined) ->
     ok;
diff --git a/src/rabbit_load.erl b/src/rabbit_load.erl
index 4f467162..e0457b1e 100644
--- a/src/rabbit_load.erl
+++ b/src/rabbit_load.erl
@@ -40,11 +40,10 @@
 
 -ifdef(use_specs).
 
--type(erlang_node() :: atom()).
--type(load() :: {{non_neg_integer(), integer() | 'unknown'}, erlang_node()}).
+-type(load() :: {{non_neg_integer(), integer() | 'unknown'}, node()}).
 -spec(local_load/0 :: () -> load()).
 -spec(remote_loads/0 :: () -> [load()]).
--spec(pick/0 :: () -> erlang_node()).
+-spec(pick/0 :: () -> node()).
 
 -endif.
 
diff --git a/src/rabbit_log.erl b/src/rabbit_log.erl
index cc80e360..85bcbca0 100644
--- a/src/rabbit_log.erl
+++ b/src/rabbit_log.erl
@@ -50,7 +50,7 @@
 
 -ifdef(use_specs).
 
--spec(start_link/0 :: () -> {'ok', pid()} | 'ignore' | {'error', any()}).
+-spec(start_link/0 :: () -> 'ignore' | rabbit_types:ok_or_error2(pid(), any())).
 -spec(debug/1 :: (string()) -> 'ok').
 -spec(debug/2 :: (string(), [any()]) -> 'ok').
 -spec(info/1 :: (string()) -> 'ok').
diff --git a/src/rabbit_memory_monitor.erl b/src/rabbit_memory_monitor.erl
index 91e97ffe..bdf38075 100644
--- a/src/rabbit_memory_monitor.erl
+++ b/src/rabbit_memory_monitor.erl
@@ -86,11 +86,12 @@
 
 -ifdef(use_specs).
 
--spec(start_link/0 :: () -> 'ignore' | {'error', _} | {'ok', pid()}).
+-spec(start_link/0 :: () -> 'ignore' | rabbit_types:ok_or_error2(pid(), any())).
 -spec(update/0 :: () -> 'ok').
 -spec(register/2 :: (pid(), {atom(),atom(),[any()]}) -> 'ok').
 -spec(deregister/1 :: (pid()) -> 'ok').
--spec(report_ram_duration/2 :: (pid(), float() | 'infinity') -> number()).
+-spec(report_ram_duration/2 ::
+        (pid(), float() | 'infinity') -> number() | 'infinity').
 -spec(stop/0 :: () -> 'ok').
 
 -endif.
diff --git a/src/rabbit_misc.erl b/src/rabbit_misc.erl
index 723b818b..050b499f 100644
--- a/src/rabbit_misc.erl
+++ b/src/rabbit_misc.erl
@@ -32,14 +32,16 @@
 -module(rabbit_misc).
 -include("rabbit.hrl").
 -include("rabbit_framing.hrl").
+
 -include_lib("kernel/include/file.hrl").
 
 -export([method_record_type/1, polite_pause/0, polite_pause/1]).
 -export([die/1, frame_error/2, amqp_error/4,
-         protocol_error/3, protocol_error/4]).
--export([not_found/1]).
+         protocol_error/3, protocol_error/4, protocol_error/1]).
+-export([not_found/1, assert_args_equivalence/4]).
 -export([get_config/1, get_config/2, set_config/2]).
 -export([dirty_read/1]).
+-export([table_lookup/2]).
 -export([r/3, r/2, r_arg/4, rs/1]).
 -export([enable_cover/0, report_cover/0]).
 -export([enable_cover/1, report_cover/1]).
@@ -60,7 +62,8 @@
 -export([sort_field_table/1]).
 -export([pid_to_string/1, string_to_pid/1]).
 -export([version_compare/2, version_compare/3]).
--export([recursive_delete/1, dict_cons/3, unlink_and_capture_exit/1]).
+-export([recursive_delete/1, dict_cons/3, orddict_cons/3,
+         unlink_and_capture_exit/1]).
 
 -import(mnesia).
 -import(lists).
@@ -71,61 +74,91 @@
 
 -ifdef(use_specs).
 
--include_lib("kernel/include/inet.hrl").
+-export_type([resource_name/0]).
 
--type(ok_or_error() :: 'ok' | {'error', any()}).
+-type(ok_or_error() :: rabbit_types:ok_or_error(any())).
+-type(thunk(T) :: fun(() -> T)).
+-type(resource_name() :: binary()).
 
--spec(method_record_type/1 :: (tuple()) -> atom()).
+-spec(method_record_type/1 :: (rabbit_framing:amqp_method_record())
+                              -> rabbit_framing:amqp_method_name()).
 -spec(polite_pause/0 :: () -> 'done').
 -spec(polite_pause/1 :: (non_neg_integer()) -> 'done').
--spec(die/1 :: (atom()) -> no_return()).
--spec(frame_error/2 :: (atom(), binary()) -> no_return()).
--spec(amqp_error/4 :: (atom(), string(), [any()], atom()) -> amqp_error()).
--spec(protocol_error/3 :: (atom(), string(), [any()]) -> no_return()).
--spec(protocol_error/4 :: (atom(), string(), [any()], atom()) -> no_return()).
--spec(not_found/1 :: (r(atom())) -> no_return()).
--spec(get_config/1 :: (atom()) -> {'ok', any()} | not_found()).
+-spec(die/1 :: (rabbit_framing:amqp_exception()) -> no_return()).
+-spec(frame_error/2 :: (rabbit_framing:amqp_method_name(), binary())
+                       -> no_return()).
+-spec(amqp_error/4 ::
+        (rabbit_framing:amqp_exception(), string(), [any()],
+         rabbit_framing:amqp_method_name())
+        -> rabbit_types:amqp_error()).
+-spec(protocol_error/3 :: (rabbit_framing:amqp_exception(), string(), [any()])
+                          -> no_return()).
+-spec(protocol_error/4 ::
+        (rabbit_framing:amqp_exception(), string(), [any()],
+         rabbit_framing:amqp_method_name())
+        -> no_return()).
+-spec(protocol_error/1 :: (rabbit_types:amqp_error()) -> no_return()).
+-spec(not_found/1 :: (rabbit_types:r(atom())) -> no_return()).
+-spec(assert_args_equivalence/4 :: (rabbit_framing:amqp_table(),
+                                    rabbit_framing:amqp_table(),
+                                    rabbit_types:r(any()), [binary()]) ->
+                                        'ok' | no_return()).
+-spec(get_config/1 ::
+        (atom()) -> rabbit_types:ok_or_error2(any(), 'not_found')).
 -spec(get_config/2 :: (atom(), A) -> A).
 -spec(set_config/2 :: (atom(), any()) -> 'ok').
--spec(dirty_read/1 :: ({atom(), any()}) -> {'ok', any()} | not_found()).
--spec(r/3 :: (vhost() | r(atom()), K, resource_name()) ->
-             r(K) when is_subtype(K, atom())).
--spec(r/2 :: (vhost(), K) -> #resource{virtual_host :: vhost(),
-                                       kind         :: K,
-                                       name         :: '_'}
-                                 when is_subtype(K, atom())).
--spec(r_arg/4 :: (vhost() | r(atom()), K, amqp_table(), binary()) ->
-             undefined | r(K)  when is_subtype(K, atom())).
--spec(rs/1 :: (r(atom())) -> string()).
+-spec(dirty_read/1 ::
+        ({atom(), any()}) -> rabbit_types:ok_or_error2(any(), 'not_found')).
+-spec(table_lookup/2 ::
+        (rabbit_framing:amqp_table(), binary())
+         -> 'undefined' | {rabbit_framing:amqp_field_type(), any()}).
+-spec(r/2 :: (rabbit_types:vhost(), K)
+             -> rabbit_types:r3(rabbit_types:vhost(), K, '_')
+                    when is_subtype(K, atom())).
+-spec(r/3 ::
+        (rabbit_types:vhost() | rabbit_types:r(atom()), K, resource_name())
+        -> rabbit_types:r3(rabbit_types:vhost(), K, resource_name())
+               when is_subtype(K, atom())).
+-spec(r_arg/4 ::
+        (rabbit_types:vhost() | rabbit_types:r(atom()), K,
+         rabbit_framing:amqp_table(), binary())
+        -> undefined | rabbit_types:r(K)
+               when is_subtype(K, atom())).
+-spec(rs/1 :: (rabbit_types:r(atom())) -> string()).
 -spec(enable_cover/0 :: () -> ok_or_error()).
 -spec(start_cover/1 :: ([{string(), string()} | string()]) -> 'ok').
 -spec(report_cover/0 :: () -> 'ok').
--spec(enable_cover/1 :: (file_path()) -> ok_or_error()).
--spec(report_cover/1 :: (file_path()) -> 'ok').
+-spec(enable_cover/1 :: (file:filename()) -> ok_or_error()).
+-spec(report_cover/1 :: (file:filename()) -> 'ok').
 -spec(throw_on_error/2 ::
-      (atom(), thunk({error, any()} | {ok, A} | A)) -> A).
+        (atom(), thunk(rabbit_types:error(any()) | {ok, A} | A)) -> A).
 -spec(with_exit_handler/2 :: (thunk(A), thunk(A)) -> A).
 -spec(filter_exit_map/2 :: (fun ((A) -> B), [A]) -> [B]).
--spec(with_user/2 :: (username(), thunk(A)) -> A).
--spec(with_vhost/2 :: (vhost(), thunk(A)) -> A).
--spec(with_user_and_vhost/3 :: (username(), vhost(), thunk(A)) -> A).
+-spec(with_user/2 :: (rabbit_access_control:username(), thunk(A)) -> A).
+-spec(with_vhost/2 :: (rabbit_types:vhost(), thunk(A)) -> A).
+-spec(with_user_and_vhost/3 ::
+        (rabbit_access_control:username(), rabbit_types:vhost(), thunk(A))
+        -> A).
 -spec(execute_mnesia_transaction/1 :: (thunk(A)) -> A).
 -spec(ensure_ok/2 :: (ok_or_error(), atom()) -> 'ok').
--spec(makenode/1 :: ({string(), string()} | string()) -> erlang_node()).
--spec(nodeparts/1 :: (erlang_node() | string()) -> {string(), string()}).
+-spec(makenode/1 :: ({string(), string()} | string()) -> node()).
+-spec(nodeparts/1 :: (node() | string()) -> {string(), string()}).
 -spec(cookie_hash/0 :: () -> string()).
--spec(tcp_name/3 :: (atom(), ip_address(), ip_port()) -> atom()).
+-spec(tcp_name/3 ::
+        (atom(), inet:ip_address(), rabbit_networking:ip_port())
+        -> atom()).
 -spec(intersperse/2 :: (A, [A]) -> [A]).
 -spec(upmap/2 :: (fun ((A) -> B), [A]) -> [B]).
 -spec(map_in_order/2 :: (fun ((A) -> B), [A]) -> [B]).
 -spec(table_fold/3 :: (fun ((any(), A) -> A), A, atom()) -> A).
 -spec(dirty_read_all/1 :: (atom()) -> [any()]).
--spec(dirty_foreach_key/2 :: (fun ((any()) -> any()), atom()) ->
-             'ok' | 'aborted').
--spec(dirty_dump_log/1 :: (file_path()) -> ok_or_error()).
--spec(read_term_file/1 :: (file_path()) -> {'ok', [any()]} | {'error', any()}).
--spec(write_term_file/2 :: (file_path(), [any()]) -> ok_or_error()).
--spec(append_file/2 :: (file_path(), string()) -> ok_or_error()).
+-spec(dirty_foreach_key/2 :: (fun ((any()) -> any()), atom())
+                             -> 'ok' | 'aborted').
+-spec(dirty_dump_log/1 :: (file:filename()) -> ok_or_error()).
+-spec(read_term_file/1 ::
+        (file:filename()) -> {'ok', [any()]} | rabbit_types:error(any())).
+-spec(write_term_file/2 :: (file:filename(), [any()]) -> ok_or_error()).
+-spec(append_file/2 :: (file:filename(), string()) -> ok_or_error()).
 -spec(ensure_parent_dirs_exist/1 :: (string()) -> 'ok').
 -spec(format_stderr/2 :: (string(), [any()]) -> 'ok').
 -spec(start_applications/1 :: ([atom()]) -> 'ok').
@@ -133,15 +166,21 @@
 -spec(unfold/2  :: (fun ((A) -> ({'true', B, A} | 'false')), A) -> {[B], A}).
 -spec(ceil/1 :: (number()) -> integer()).
 -spec(queue_fold/3 :: (fun ((any(), B) -> B), B, queue()) -> B).
--spec(sort_field_table/1 :: (amqp_table()) -> amqp_table()).
+-spec(sort_field_table/1 ::
+        (rabbit_framing:amqp_table()) -> rabbit_framing:amqp_table()).
 -spec(pid_to_string/1 :: (pid()) -> string()).
 -spec(string_to_pid/1 :: (string()) -> pid()).
 -spec(version_compare/2 :: (string(), string()) -> 'lt' | 'eq' | 'gt').
--spec(version_compare/3 :: (string(), string(),
-                            ('lt' | 'lte' | 'eq' | 'gte' | 'gt')) -> boolean()).
--spec(recursive_delete/1 :: ([file_path()]) ->
-             'ok' | {'error', {file_path(), any()}}).
--spec(dict_cons/3 :: (any(), any(), dict()) -> dict()).
+-spec(version_compare/3 ::
+        (string(), string(), ('lt' | 'lte' | 'eq' | 'gte' | 'gt'))
+        -> boolean()).
+-spec(recursive_delete/1 ::
+        ([file:filename()])
+        -> rabbit_types:ok_or_error({file:filename(), any()})).
+-spec(dict_cons/3 :: (any(), any(), dict:dictionary()) ->
+                          dict:dictionary()).
+-spec(orddict_cons/3 :: (any(), any(), orddict:dictionary()) ->
+                             orddict:dictionary()).
 -spec(unlink_and_capture_exit/1 :: (pid()) -> 'ok').
 
 -endif.
@@ -173,10 +212,27 @@ protocol_error(Name, ExplanationFormat, Params) ->
     protocol_error(Name, ExplanationFormat, Params, none).
 
 protocol_error(Name, ExplanationFormat, Params, Method) ->
-    exit(amqp_error(Name, ExplanationFormat, Params, Method)).
+    protocol_error(amqp_error(Name, ExplanationFormat, Params, Method)).
+
+protocol_error(#amqp_error{} = Error) ->
+    exit(Error).
 
 not_found(R) -> protocol_error(not_found, "no ~s", [rs(R)]).
 
+assert_args_equivalence(Orig, New, Name, Keys) ->
+    [assert_args_equivalence1(Orig, New, Name, Key) || Key <- Keys],
+    ok.
+
+assert_args_equivalence1(Orig, New, Name, Key) ->
+    case {table_lookup(Orig, Key), table_lookup(New, Key)} of
+        {Same, Same}  -> ok;
+        {Orig1, New1} -> protocol_error(
+                           not_allowed,
+                           "cannot redeclare ~s with inequivalent args for ~s: "
+                           "required ~w, received ~w",
+                           [rabbit_misc:rs(Name), Key, New1, Orig1])
+    end.
+
 get_config(Key) ->
     case dirty_read({rabbit_config, Key}) of
         {ok, {rabbit_config, Key, V}} -> {ok, V};
@@ -198,6 +254,12 @@ dirty_read(ReadSpec) ->
         []       -> {error, not_found}
     end.
 
+table_lookup(Table, Key) ->
+    case lists:keysearch(Key, 1, Table) of
+        {value, {_, TypeBin, ValueBin}} -> {TypeBin, ValueBin};
+        false                           -> undefined
+    end.
+
 r(#resource{virtual_host = VHostPath}, Kind, Name)
   when is_binary(Name) ->
     #resource{virtual_host = VHostPath, kind = Kind, name = Name};
@@ -210,9 +272,9 @@ r(VHostPath, Kind) when is_binary(VHostPath) ->
 r_arg(#resource{virtual_host = VHostPath}, Kind, Table, Key) ->
     r_arg(VHostPath, Kind, Table, Key);
 r_arg(VHostPath, Kind, Table, Key) ->
-    case lists:keysearch(Key, 1, Table) of
-        {value, {_, longstr, NameBin}} -> r(VHostPath, Kind, NameBin);
-        false                          -> undefined
+    case table_lookup(Table, Key) of
+        {longstr, NameBin} -> r(VHostPath, Kind, NameBin);
+        undefined          -> undefined
     end.
 
 rs(#resource{virtual_host = VHostPath, kind = Kind, name = Name}) ->
@@ -242,12 +304,12 @@ report_cover([Root]) when is_atom(Root) ->
 report_cover(Root) ->
     Dir = filename:join(Root, "cover"),
     ok = filelib:ensure_dir(filename:join(Dir,"junk")),
-    lists:foreach(fun(F) -> file:delete(F) end,
+    lists:foreach(fun (F) -> file:delete(F) end,
                   filelib:wildcard(filename:join(Dir, "*.html"))),
     {ok, SummaryFile} = file:open(filename:join(Dir, "summary.txt"), [write]),
     {CT, NCT} =
         lists:foldl(
-          fun(M,{CovTot, NotCovTot}) ->
+          fun (M,{CovTot, NotCovTot}) ->
                   {ok, {M, {Cov, NotCov}}} = cover:analyze(M, module),
                   ok = report_coverage_percentage(SummaryFile,
                                                   Cov, NotCov, M),
@@ -367,7 +429,7 @@ upmap(F, L) ->
     Parent = self(),
     Ref = make_ref(),
     [receive {Ref, Result} -> Result end
-     || _ <- [spawn(fun() -> Parent ! {Ref, F(X)} end) || X <- L]].
+     || _ <- [spawn(fun () -> Parent ! {Ref, F(X)} end) || X <- L]].
 
 map_in_order(F, L) ->
     lists:reverse(
@@ -537,19 +599,25 @@ pid_to_string(Pid) when is_pid(Pid) ->
 
 %% inverse of above
 string_to_pid(Str) ->
+    Err = {error, {invalid_pid_syntax, Str}},
     %% The \ before the trailing $ is only there to keep emacs
     %% font-lock from getting confused.
     case re:run(Str, "^<(.*)\\.([0-9]+)\\.([0-9]+)>\$",
                 [{capture,all_but_first,list}]) of
         {match, [NodeStr, IdStr, SerStr]} ->
-            %% turn the triple into a pid - see pid_to_string
-            <<131,NodeEnc/binary>> = term_to_binary(list_to_atom(NodeStr)),
+            %% the NodeStr atom might be quoted, so we have to parse
+            %% it rather than doing a simple list_to_atom
+            NodeAtom = case erl_scan:string(NodeStr) of
+                           {ok, [{atom, _, X}], _} -> X;
+                           {error, _, _} -> throw(Err)
+                       end,
+            <<131,NodeEnc/binary>> = term_to_binary(NodeAtom),
             Id = list_to_integer(IdStr),
             Ser = list_to_integer(SerStr),
             binary_to_term(<<131,103,NodeEnc/binary,Id:32,Ser:32,0:8>>);
         nomatch ->
-            throw({error, {invalid_pid_syntax, Str}})
-    end. 
+            throw(Err)
+    end.
 
 version_compare(A, B, lte) ->
     case version_compare(A, B) of
@@ -625,6 +693,9 @@ recursive_delete1(Path) ->
 dict_cons(Key, Value, Dict) ->
     dict:update(Key, fun (List) -> [Value | List] end, [Value], Dict).
 
+orddict_cons(Key, Value, Dict) ->
+    orddict:update(Key, fun (List) -> [Value | List] end, [Value], Dict).
+
 unlink_and_capture_exit(Pid) ->
     unlink(Pid),
     receive {'EXIT', Pid, _} -> ok
diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl
index 55a6761d..c808499b 100644
--- a/src/rabbit_mnesia.erl
+++ b/src/rabbit_mnesia.erl
@@ -29,11 +29,12 @@
 %%   Contributor(s): ______________________________________.
 %%
 
+
 -module(rabbit_mnesia).
 
 -export([ensure_mnesia_dir/0, dir/0, status/0, init/0, is_db_empty/0,
-         cluster/1, reset/0, force_reset/0, is_clustered/0,
-         empty_ram_only_tables/0]).
+         cluster/1, force_cluster/1, reset/0, force_reset/0,
+         is_clustered/0, empty_ram_only_tables/0]).
 
 -export([table_names/0]).
 
@@ -47,12 +48,18 @@
 
 -ifdef(use_specs).
 
--spec(status/0 :: () -> [{'nodes' | 'running_nodes', [erlang_node()]}]).
--spec(dir/0 :: () -> file_path()).
+-export_type([node_type/0]).
+
+-type(node_type() :: disc_only | disc | ram | unknown).
+-spec(status/0 :: () -> [{'nodes', [{node_type(), [node()]}]} |
+                         {'running_nodes', [node()]}]).
+-spec(dir/0 :: () -> file:filename()).
 -spec(ensure_mnesia_dir/0 :: () -> 'ok').
 -spec(init/0 :: () -> 'ok').
 -spec(is_db_empty/0 :: () -> boolean()).
--spec(cluster/1 :: ([erlang_node()]) -> 'ok').
+-spec(cluster/1 :: ([node()]) -> 'ok').
+-spec(force_cluster/1 :: ([node()]) -> 'ok').
+-spec(cluster/2 :: ([node()], boolean()) -> 'ok').
 -spec(reset/0 :: () -> 'ok').
 -spec(force_reset/0 :: () -> 'ok').
 -spec(is_clustered/0 :: () -> boolean()).
@@ -64,13 +71,26 @@
 %%----------------------------------------------------------------------------
 
 status() ->
-    [{nodes, mnesia:system_info(db_nodes)},
+    [{nodes, case mnesia:system_info(is_running) of
+                 yes -> [{Key, Nodes} ||
+                            {Key, CopyType} <- [{disc_only, disc_only_copies},
+                                                {disc,      disc_copies},
+                                                {ram,       ram_copies}],
+                            begin
+                                Nodes = nodes_of_type(CopyType),
+                                Nodes =/= []
+                            end];
+                 no -> case mnesia:system_info(db_nodes) of
+                           [] -> [];
+                           Nodes -> [{unknown, Nodes}]
+                       end
+             end},
      {running_nodes, mnesia:system_info(running_db_nodes)}].
 
 init() ->
     ok = ensure_mnesia_running(),
     ok = ensure_mnesia_dir(),
-    ok = init_db(read_cluster_nodes_config()),
+    ok = init_db(read_cluster_nodes_config(), true),
     ok = wait_for_tables(),
     ok.
 
@@ -78,16 +98,22 @@ is_db_empty() ->
     lists:all(fun (Tab) -> mnesia:dirty_first(Tab) == '$end_of_table' end,
               table_names()).
 
+cluster(ClusterNodes) ->
+    cluster(ClusterNodes, false).
+force_cluster(ClusterNodes) ->
+    cluster(ClusterNodes, true).
+
 %% Alter which disk nodes this node is clustered with. This can be a
 %% subset of all the disk nodes in the cluster but can (and should)
 %% include the node itself if it is to be a disk rather than a ram
-%% node.
-cluster(ClusterNodes) ->
+%% node.  If Force is false, only connections to online nodes are
+%% allowed.
+cluster(ClusterNodes, Force) ->
     ok = ensure_mnesia_not_running(),
     ok = ensure_mnesia_dir(),
     rabbit_misc:ensure_ok(mnesia:start(), cannot_start_mnesia),
     try
-        ok = init_db(ClusterNodes),
+        ok = init_db(ClusterNodes, Force),
         ok = wait_for_tables(),
         ok = create_cluster_nodes_config(ClusterNodes)
     after
@@ -118,6 +144,15 @@ empty_ram_only_tables() ->
 
 %%--------------------------------------------------------------------
 
+nodes_of_type(Type) ->
+    %% This function should return the nodes of a certain type (ram,
+    %% disc or disc_only) in the current cluster.  The type of nodes
+    %% is determined when the cluster is initially configured.
+    %% Specifically, we check whether a certain table, which we know
+    %% will be written to disk on a disc node, is stored on disk or in
+    %% RAM.
+    mnesia:table_info(rabbit_durable_exchange, Type).
+
 table_definitions() ->
     [{rabbit_user,
       [{record_name, user},
@@ -149,6 +184,8 @@ table_definitions() ->
       [{record_name, reverse_route},
        {attributes, record_info(fields, reverse_route)},
        {type, ordered_set}]},
+     %% Consider the implications to nodes_of_type/1 before altering
+     %% the next entry.
      {rabbit_durable_exchange,
       [{record_name, exchange},
        {attributes, record_info(fields, exchange)},
@@ -227,20 +264,9 @@ read_cluster_nodes_config() ->
     case rabbit_misc:read_term_file(FileName) of
         {ok, [ClusterNodes]} -> ClusterNodes;
         {error, enoent} ->
-            case application:get_env(cluster_config) of
+            case application:get_env(cluster_nodes) of
                 undefined -> [];
-                {ok, DefaultFileName} ->
-                    case file:consult(DefaultFileName) of
-                        {ok, [ClusterNodes]} -> ClusterNodes;
-                        {error, enoent} ->
-                            error_logger:warning_msg(
-                              "default cluster config file ~p does not exist~n",
-                              [DefaultFileName]),
-                            [];
-                        {error, Reason} ->
-                            throw({error, {cannot_read_cluster_nodes_config,
-                                           DefaultFileName, Reason}})
-                    end
+                {ok, ClusterNodes} -> ClusterNodes
             end;
         {error, Reason} ->
             throw({error, {cannot_read_cluster_nodes_config,
@@ -259,38 +285,56 @@ delete_cluster_nodes_config() ->
 
 %% Take a cluster node config and create the right kind of node - a
 %% standalone disk node, or disk or ram node connected to the
-%% specified cluster nodes.
-init_db(ClusterNodes) ->
-    case mnesia:change_config(extra_db_nodes, ClusterNodes -- [node()]) of
-        {ok, []} ->
-            case mnesia:system_info(use_dir) of
-                true ->
-                    case check_schema_integrity() of
-                        ok ->
-                            ok;
-                        {error, Reason} ->
-                            %% NB: we cannot use rabbit_log here since
-                            %% it may not have been started yet
-                            error_logger:warning_msg(
-                              "schema integrity check failed: ~p~n"
-                              "moving database to backup location "
-                              "and recreating schema from scratch~n",
-                              [Reason]),
-                            ok = move_db(),
+%% specified cluster nodes.  If Force is false, don't allow
+%% connections to offline nodes.
+init_db(ClusterNodes, Force) ->
+    UClusterNodes = lists:usort(ClusterNodes),
+    ProperClusterNodes = UClusterNodes -- [node()],
+    case mnesia:change_config(extra_db_nodes, ProperClusterNodes) of
+        {ok, Nodes} ->
+            case Force of
+                false ->
+                    FailedClusterNodes = ProperClusterNodes -- Nodes,
+                    case FailedClusterNodes of
+                        [] -> ok;
+                        _ ->
+                            throw({error, {failed_to_cluster_with,
+                                           FailedClusterNodes,
+                                           "Mnesia could not connect to some nodes."}})
+                    end;
+                _ -> ok
+            end,
+            case Nodes of
+                [] ->
+                    case mnesia:system_info(use_dir) of
+                        true ->
+                            case check_schema_integrity() of
+                                ok ->
+                                    ok;
+                                {error, Reason} ->
+                                    %% NB: we cannot use rabbit_log here since
+                                    %% it may not have been started yet
+                                    error_logger:warning_msg(
+                                      "schema integrity check failed: ~p~n"
+                                      "moving database to backup location "
+                                      "and recreating schema from scratch~n",
+                                      [Reason]),
+                                    ok = move_db(),
+                                    ok = create_schema()
+                            end;
+                        false ->
                             ok = create_schema()
                     end;
-                false ->
-                    ok = create_schema()
-            end;
-        {ok, [_|_]} ->
-            IsDiskNode = ClusterNodes == [] orelse
-                lists:member(node(), ClusterNodes),
-            ok = wait_for_replicated_tables(),
-            ok = create_local_table_copy(schema, disc_copies),
-            ok = create_local_table_copies(case IsDiskNode of
-                                               true  -> disc;
-                                               false -> ram
-                                           end);
+                [_|_] ->
+                    IsDiskNode = ClusterNodes == [] orelse
+                        lists:member(node(), ClusterNodes),
+                    ok = wait_for_replicated_tables(),
+                    ok = create_local_table_copy(schema, disc_copies),
+                    ok = create_local_table_copies(case IsDiskNode of
+                                                       true  -> disc;
+                                                       false -> ram
+                                                   end)
+                end;
         {error, Reason} ->
             %% one reason we may end up here is if we try to join
             %% nodes together that are currently running standalone or
@@ -346,7 +390,7 @@ table_has_copy_type(TabDef, DiscType) ->
 
 create_local_table_copies(Type) ->
     lists:foreach(
-      fun({Tab, TabDef}) ->
+      fun ({Tab, TabDef}) ->
               HasDiscCopies     = table_has_copy_type(TabDef, disc_copies),
               HasDiscOnlyCopies = table_has_copy_type(TabDef, disc_only_copies),
               LocalTab          = proplists:get_bool(local_content, TabDef),
diff --git a/src/rabbit_msg_file.erl b/src/rabbit_msg_file.erl
new file mode 100644
index 00000000..4f178439
--- /dev/null
+++ b/src/rabbit_msg_file.erl
@@ -0,0 +1,136 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2010 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2010 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(rabbit_msg_file).
+
+-export([append/3, read/2, scan/2]).
+
+%%----------------------------------------------------------------------------
+
+-include("rabbit_msg_store.hrl").
+
+-define(INTEGER_SIZE_BYTES,      8).
+-define(INTEGER_SIZE_BITS,       (8 * ?INTEGER_SIZE_BYTES)).
+-define(WRITE_OK_SIZE_BITS,      8).
+-define(WRITE_OK_MARKER,         255).
+-define(FILE_PACKING_ADJUSTMENT, (1 + ?INTEGER_SIZE_BYTES)).
+-define(GUID_SIZE_BYTES,         16).
+-define(GUID_SIZE_BITS,          (8 * ?GUID_SIZE_BYTES)).
+-define(SCAN_BLOCK_SIZE,         4194304). %% 4MB
+
+%%----------------------------------------------------------------------------
+
+-ifdef(use_specs).
+
+-type(io_device() :: any()).
+-type(position() :: non_neg_integer()).
+-type(msg_size() :: non_neg_integer()).
+-type(file_size() :: non_neg_integer()).
+
+-spec(append/3 :: (io_device(), rabbit_guid:guid(), msg()) ->
+                       rabbit_types:ok_or_error2(msg_size(), any())).
+-spec(read/2 :: (io_device(), msg_size()) ->
+                     rabbit_types:ok_or_error2({rabbit_guid:guid(), msg()},
+                                               any())).
+-spec(scan/2 :: (io_device(), file_size()) ->
+                     {'ok', [{rabbit_guid:guid(), msg_size(), position()}],
+                      position()}).
+
+-endif.
+
+%%----------------------------------------------------------------------------
+
+append(FileHdl, Guid, MsgBody)
+  when is_binary(Guid) andalso size(Guid) =:= ?GUID_SIZE_BYTES ->
+    MsgBodyBin  = term_to_binary(MsgBody),
+    MsgBodyBinSize = size(MsgBodyBin),
+    Size = MsgBodyBinSize + ?GUID_SIZE_BYTES,
+    case file_handle_cache:append(FileHdl,
+                                  <<Size:?INTEGER_SIZE_BITS,
+                                   Guid:?GUID_SIZE_BYTES/binary,
+                                   MsgBodyBin:MsgBodyBinSize/binary,
+                                   ?WRITE_OK_MARKER:?WRITE_OK_SIZE_BITS>>) of
+        ok -> {ok, Size + ?FILE_PACKING_ADJUSTMENT};
+        KO -> KO
+    end.
+
+read(FileHdl, TotalSize) ->
+    Size = TotalSize - ?FILE_PACKING_ADJUSTMENT,
+    BodyBinSize = Size - ?GUID_SIZE_BYTES,
+    case file_handle_cache:read(FileHdl, TotalSize) of
+        {ok, <<Size:?INTEGER_SIZE_BITS,
+              Guid:?GUID_SIZE_BYTES/binary,
+              MsgBodyBin:BodyBinSize/binary,
+              ?WRITE_OK_MARKER:?WRITE_OK_SIZE_BITS>>} ->
+            {ok, {Guid, binary_to_term(MsgBodyBin)}};
+        KO -> KO
+    end.
+
+scan(FileHdl, FileSize) when FileSize >= 0 ->
+    scan(FileHdl, FileSize, <<>>, 0, [], 0).
+
+scan(_FileHdl, FileSize, _Data, FileSize, Acc, ScanOffset) ->
+    {ok, Acc, ScanOffset};
+scan(FileHdl, FileSize, Data, ReadOffset, Acc, ScanOffset) ->
+    Read = lists:min([?SCAN_BLOCK_SIZE, (FileSize - ReadOffset)]),
+    case file_handle_cache:read(FileHdl, Read) of
+        {ok, Data1} ->
+            {Data2, Acc1, ScanOffset1} =
+                scan(<<Data/binary, Data1/binary>>, Acc, ScanOffset),
+            ReadOffset1 = ReadOffset + size(Data1),
+            scan(FileHdl, FileSize, Data2, ReadOffset1, Acc1, ScanOffset1);
+        _KO ->
+            {ok, Acc, ScanOffset}
+    end.
+
+scan(<<>>, Acc, Offset) ->
+    {<<>>, Acc, Offset};
+scan(<<0:?INTEGER_SIZE_BITS, _Rest/binary>>, Acc, Offset) ->
+    {<<>>, Acc, Offset}; %% Nothing to do other than stop.
+scan(<<Size:?INTEGER_SIZE_BITS, GuidAndMsg:Size/binary,
+       WriteMarker:?WRITE_OK_SIZE_BITS, Rest/binary>>, Acc, Offset) ->
+    TotalSize = Size + ?FILE_PACKING_ADJUSTMENT,
+    case WriteMarker of
+        ?WRITE_OK_MARKER ->
+            %% Here we take option 5 from
+            %% http://www.erlang.org/cgi-bin/ezmlm-cgi?2:mss:1569 in
+            %% which we read the Guid as a number, and then convert it
+            %% back to a binary in order to work around bugs in
+            %% Erlang's GC.
+            <<GuidNum:?GUID_SIZE_BITS, _Msg/binary>> =
+                <<GuidAndMsg:Size/binary>>,
+            <<Guid:?GUID_SIZE_BYTES/binary>> = <<GuidNum:?GUID_SIZE_BITS>>,
+            scan(Rest, [{Guid, TotalSize, Offset} | Acc], Offset + TotalSize);
+        _ ->
+            scan(Rest, Acc, Offset + TotalSize)
+    end;
+scan(Data, Acc, Offset) ->
+    {Data, Acc, Offset}.
diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl
new file mode 100644
index 00000000..63100571
--- /dev/null
+++ b/src/rabbit_msg_store.erl
@@ -0,0 +1,1731 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2010 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2010 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(rabbit_msg_store).
+
+-behaviour(gen_server2).
+
+-export([start_link/4, write/4, read/3, contains/2, remove/2, release/2,
+         sync/3, client_init/2, client_terminate/1,
+         client_delete_and_terminate/3, successfully_recovered_state/1]).
+
+-export([sync/1, gc_done/4, set_maximum_since_use/2, gc/3]). %% internal
+
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
+         terminate/2, code_change/3]).
+
+%%----------------------------------------------------------------------------
+
+-include("rabbit_msg_store.hrl").
+
+-define(SYNC_INTERVAL,  5).   %% milliseconds
+-define(CLEAN_FILENAME, "clean.dot").
+-define(FILE_SUMMARY_FILENAME, "file_summary.ets").
+
+-define(BINARY_MODE,     [raw, binary]).
+-define(READ_MODE,       [read]).
+-define(READ_AHEAD_MODE, [read_ahead | ?READ_MODE]).
+-define(WRITE_MODE,      [write]).
+
+-define(FILE_EXTENSION,        ".rdq").
+-define(FILE_EXTENSION_TMP,    ".rdt").
+
+-define(HANDLE_CACHE_BUFFER_SIZE, 1048576). %% 1MB
+
+%%----------------------------------------------------------------------------
+
+-record(msstate,
+        { dir,                    %% store directory
+          index_module,           %% the module for index ops
+          index_state,            %% where are messages?
+          current_file,           %% current file name as number
+          current_file_handle,    %% current file handle since the last fsync?
+          file_handle_cache,      %% file handle cache
+          on_sync,                %% pending sync requests
+          sync_timer_ref,         %% TRef for our interval timer
+          sum_valid_data,         %% sum of valid data in all files
+          sum_file_size,          %% sum of file sizes
+          pending_gc_completion,  %% things to do once GC completes
+          gc_active,              %% is the GC currently working?
+          gc_pid,                 %% pid of our GC
+          file_handles_ets,       %% tid of the shared file handles table
+          file_summary_ets,       %% tid of the file summary table
+          dedup_cache_ets,        %% tid of dedup cache table
+          cur_file_cache_ets,     %% tid of current file cache table
+          client_refs,            %% set of references of all registered clients
+          successfully_recovered, %% boolean: did we recover state?
+          file_size_limit         %% how big are our files allowed to get?
+         }).
+
+-record(client_msstate,
+        { file_handle_cache,
+          index_state,
+          index_module,
+          dir,
+          gc_pid,
+          file_handles_ets,
+          file_summary_ets,
+          dedup_cache_ets,
+          cur_file_cache_ets
+         }).
+
+-record(file_summary,
+        {file, valid_total_size, contiguous_top, left, right, file_size,
+         locked, readers}).
+
+%%----------------------------------------------------------------------------
+
+-ifdef(use_specs).
+
+-type(server() :: pid() | atom()).
+-type(file_num() :: non_neg_integer()).
+-type(client_msstate() :: #client_msstate {
+                      file_handle_cache  :: dict:dictionary(),
+                      index_state        :: any(),
+                      index_module       :: atom(),
+                      dir                :: file:filename(),
+                      gc_pid             :: pid(),
+                      file_handles_ets   :: ets:tid(),
+                      file_summary_ets   :: ets:tid(),
+                      dedup_cache_ets    :: ets:tid(),
+                      cur_file_cache_ets :: ets:tid() }).
+-type(startup_fun_state() ::
+        {(fun ((A) -> 'finished' | {rabbit_guid:guid(), non_neg_integer(), A})),
+         A}).
+
+-spec(start_link/4 ::
+        (atom(), file:filename(), [binary()] | 'undefined',
+         startup_fun_state()) ->
+                           'ignore' | rabbit_types:ok_or_error2(pid(), any())).
+-spec(write/4 :: (server(), rabbit_guid:guid(), msg(), client_msstate()) ->
+                      rabbit_types:ok(client_msstate())).
+-spec(read/3 :: (server(), rabbit_guid:guid(), client_msstate()) ->
+                     {rabbit_types:ok(msg()) | 'not_found', client_msstate()}).
+-spec(contains/2 :: (server(), rabbit_guid:guid()) -> boolean()).
+-spec(remove/2 :: (server(), [rabbit_guid:guid()]) -> 'ok').
+-spec(release/2 :: (server(), [rabbit_guid:guid()]) -> 'ok').
+-spec(sync/3 :: (server(), [rabbit_guid:guid()], fun (() -> any())) -> 'ok').
+-spec(gc_done/4 :: (server(), non_neg_integer(), file_num(), file_num()) ->
+                        'ok').
+-spec(set_maximum_since_use/2 :: (server(), non_neg_integer()) -> 'ok').
+-spec(client_init/2 :: (server(), binary()) -> client_msstate()).
+-spec(client_terminate/1 :: (client_msstate()) -> 'ok').
+-spec(client_delete_and_terminate/3 ::
+        (client_msstate(), server(), binary()) -> 'ok').
+-spec(successfully_recovered_state/1 :: (server()) -> boolean()).
+
+-spec(gc/3 :: (non_neg_integer(), non_neg_integer(),
+               {ets:tid(), file:filename(), atom(), any()}) ->
+                   'concurrent_readers' | non_neg_integer()).
+
+-endif.
+
+%%----------------------------------------------------------------------------
+
+%% We run GC whenever (garbage / sum_file_size) > ?GARBAGE_FRACTION
+%% It is not recommended to set this to < 0.5
+-define(GARBAGE_FRACTION,      0.5).
+
+%% The components:
+%%
+%% Index: this is a mapping from Guid to #msg_location{}:
+%%        {Guid, RefCount, File, Offset, TotalSize}
+%%        By default, it's in ets, but it's also pluggable.
+%% FileSummary: this is an ets table which maps File to #file_summary{}:
+%%        {File, ValidTotalSize, ContiguousTop, Left, Right,
+%%         FileSize, Locked, Readers}
+%%
+%% The basic idea is that messages are appended to the current file up
+%% until that file becomes too big (> file_size_limit). At that point,
+%% the file is closed and a new file is created on the _right_ of the
+%% old file which is used for new messages. Files are named
+%% numerically ascending, thus the file with the lowest name is the
+%% eldest file.
+%%
+%% We need to keep track of which messages are in which files (this is
+%% the Index); how much useful data is in each file and which files
+%% are on the left and right of each other. This is the purpose of the
+%% FileSummary ets table.
+%%
+%% As messages are removed from files, holes appear in these
+%% files. The field ValidTotalSize contains the total amount of useful
+%% data left in the file, whilst ContiguousTop contains the amount of
+%% valid data right at the start of each file. These are needed for
+%% garbage collection.
+%%
+%% When we discover that a file is now empty, we delete it. When we
+%% discover that it can be combined with the useful data in either its
+%% left or right neighbour, and overall, across all the files, we have
+%% ((the amount of garbage) / (the sum of all file sizes)) >
+%% ?GARBAGE_FRACTION, we start a garbage collection run concurrently,
+%% which will compact the two files together. This keeps disk
+%% utilisation high and aids performance. We deliberately do this
+%% lazily in order to prevent doing GC on files which are soon to be
+%% emptied (and hence deleted) soon.
+%%
+%% Given the compaction between two files, the left file (i.e. elder
+%% file) is considered the ultimate destination for the good data in
+%% the right file. If necessary, the good data in the left file which
+%% is fragmented throughout the file is written out to a temporary
+%% file, then read back in to form a contiguous chunk of good data at
+%% the start of the left file. Thus the left file is garbage collected
+%% and compacted. Then the good data from the right file is copied
+%% onto the end of the left file. Index and FileSummary tables are
+%% updated.
+%%
+%% On non-clean startup, we scan the files we discover, dealing with
+%% the possibilites of a crash having occured during a compaction
+%% (this consists of tidyup - the compaction is deliberately designed
+%% such that data is duplicated on disk rather than risking it being
+%% lost), and rebuild the FileSummary ets table and Index.
+%%
+%% So, with this design, messages move to the left. Eventually, they
+%% should end up in a contiguous block on the left and are then never
+%% rewritten. But this isn't quite the case. If in a file there is one
+%% message that is being ignored, for some reason, and messages in the
+%% file to the right and in the current block are being read all the
+%% time then it will repeatedly be the case that the good data from
+%% both files can be combined and will be written out to a new
+%% file. Whenever this happens, our shunned message will be rewritten.
+%%
+%% So, provided that we combine messages in the right order,
+%% (i.e. left file, bottom to top, right file, bottom to top),
+%% eventually our shunned message will end up at the bottom of the
+%% left file. The compaction/combining algorithm is smart enough to
+%% read in good data from the left file that is scattered throughout
+%% (i.e. C and D in the below diagram), then truncate the file to just
+%% above B (i.e. truncate to the limit of the good contiguous region
+%% at the start of the file), then write C and D on top and then write
+%% E, F and G from the right file on top. Thus contiguous blocks of
+%% good data at the bottom of files are not rewritten (yes, this is
+%% the data the size of which is tracked by the ContiguousTop
+%% variable. Judicious use of a mirror is required).
+%%
+%% +-------+    +-------+         +-------+
+%% |   X   |    |   G   |         |   G   |
+%% +-------+    +-------+         +-------+
+%% |   D   |    |   X   |         |   F   |
+%% +-------+    +-------+         +-------+
+%% |   X   |    |   X   |         |   E   |
+%% +-------+    +-------+         +-------+
+%% |   C   |    |   F   |   ===>  |   D   |
+%% +-------+    +-------+         +-------+
+%% |   X   |    |   X   |         |   C   |
+%% +-------+    +-------+         +-------+
+%% |   B   |    |   X   |         |   B   |
+%% +-------+    +-------+         +-------+
+%% |   A   |    |   E   |         |   A   |
+%% +-------+    +-------+         +-------+
+%%   left         right             left
+%%
+%% From this reasoning, we do have a bound on the number of times the
+%% message is rewritten. From when it is inserted, there can be no
+%% files inserted between it and the head of the queue, and the worst
+%% case is that everytime it is rewritten, it moves one position lower
+%% in the file (for it to stay at the same position requires that
+%% there are no holes beneath it, which means truncate would be used
+%% and so it would not be rewritten at all). Thus this seems to
+%% suggest the limit is the number of messages ahead of it in the
+%% queue, though it's likely that that's pessimistic, given the
+%% requirements for compaction/combination of files.
+%%
+%% The other property is that we have is the bound on the lowest
+%% utilisation, which should be 50% - worst case is that all files are
+%% fractionally over half full and can't be combined (equivalent is
+%% alternating full files and files with only one tiny message in
+%% them).
+%%
+%% Messages are reference-counted. When a message with the same guid
+%% is written several times we only store it once, and only remove it
+%% from the store when it has been removed the same number of times.
+%%
+%% The reference counts do not persist. Therefore the initialisation
+%% function must be provided with a generator that produces ref count
+%% deltas for all recovered messages. This is only used on startup
+%% when the shutdown was non-clean.
+%%
+%% Read messages with a reference count greater than one are entered
+%% into a message cache. The purpose of the cache is not especially
+%% performance, though it can help there too, but prevention of memory
+%% explosion. It ensures that as messages with a high reference count
+%% are read from several processes they are read back as the same
+%% binary object rather than multiples of identical binary
+%% objects.
+%%
+%% Reads can be performed directly by clients without calling to the
+%% server. This is safe because multiple file handles can be used to
+%% read files. However, locking is used by the concurrent GC to make
+%% sure that reads are not attempted from files which are in the
+%% process of being garbage collected.
+%%
+%% The server automatically defers reads, removes and contains calls
+%% that occur which refer to files which are currently being
+%% GC'd. Contains calls are only deferred in order to ensure they do
+%% not overtake removes.
+%%
+%% The current file to which messages are being written has a
+%% write-back cache. This is written to immediately by clients and can
+%% be read from by clients too. This means that there are only ever
+%% writes made to the current file, thus eliminating delays due to
+%% flushing write buffers in order to be able to safely read from the
+%% current file. The one exception to this is that on start up, the
+%% cache is not populated with msgs found in the current file, and
+%% thus in this case only, reads may have to come from the file
+%% itself. The effect of this is that even if the msg_store process is
+%% heavily overloaded, clients can still write and read messages with
+%% very low latency and not block at all.
+%%
+%% For notes on Clean Shutdown and startup, see documentation in
+%% variable_queue.
+
+%%----------------------------------------------------------------------------
+%% public API
+%%----------------------------------------------------------------------------
+
+start_link(Server, Dir, ClientRefs, StartupFunState) ->
+    gen_server2:start_link({local, Server}, ?MODULE,
+                           [Server, Dir, ClientRefs, StartupFunState],
+                           [{timeout, infinity}]).
+
+write(Server, Guid, Msg,
+      CState = #client_msstate { cur_file_cache_ets = CurFileCacheEts }) ->
+    ok = update_msg_cache(CurFileCacheEts, Guid, Msg),
+    {gen_server2:cast(Server, {write, Guid, Msg}), CState}.
+
+read(Server, Guid,
+     CState = #client_msstate { dedup_cache_ets    = DedupCacheEts,
+                                cur_file_cache_ets = CurFileCacheEts }) ->
+    %% 1. Check the dedup cache
+    case fetch_and_increment_cache(DedupCacheEts, Guid) of
+        not_found ->
+            %% 2. Check the cur file cache
+            case ets:lookup(CurFileCacheEts, Guid) of
+                [] ->
+                    Defer = fun() -> {gen_server2:pcall(
+                                        Server, 2, {read, Guid}, infinity),
+                                      CState} end,
+                    case index_lookup(Guid, CState) of
+                        not_found   -> Defer();
+                        MsgLocation -> client_read1(Server, MsgLocation, Defer,
+                                                    CState)
+                    end;
+                [{Guid, Msg, _CacheRefCount}] ->
+                    %% Although we've found it, we don't know the
+                    %% refcount, so can't insert into dedup cache
+                    {{ok, Msg}, CState}
+            end;
+        Msg ->
+            {{ok, Msg}, CState}
+    end.
+
+contains(Server, Guid) -> gen_server2:call(Server, {contains, Guid}, infinity).
+remove(_Server, [])    -> ok;
+remove(Server, Guids)  -> gen_server2:cast(Server, {remove, Guids}).
+release(_Server, [])   -> ok;
+release(Server, Guids) -> gen_server2:cast(Server, {release, Guids}).
+sync(Server, Guids, K) -> gen_server2:cast(Server, {sync, Guids, K}).
+sync(Server)           -> gen_server2:pcast(Server, 8, sync). %% internal
+
+gc_done(Server, Reclaimed, Source, Destination) ->
+    gen_server2:pcast(Server, 8, {gc_done, Reclaimed, Source, Destination}).
+
+set_maximum_since_use(Server, Age) ->
+    gen_server2:pcast(Server, 8, {set_maximum_since_use, Age}).
+
+client_init(Server, Ref) ->
+    {IState, IModule, Dir, GCPid,
+     FileHandlesEts, FileSummaryEts, DedupCacheEts, CurFileCacheEts} =
+        gen_server2:call(Server, {new_client_state, Ref}, infinity),
+    #client_msstate { file_handle_cache  = dict:new(),
+                      index_state        = IState,
+                      index_module       = IModule,
+                      dir                = Dir,
+                      gc_pid             = GCPid,
+                      file_handles_ets   = FileHandlesEts,
+                      file_summary_ets   = FileSummaryEts,
+                      dedup_cache_ets    = DedupCacheEts,
+                      cur_file_cache_ets = CurFileCacheEts }.
+
+client_terminate(CState) ->
+    close_all_handles(CState),
+    ok.
+
+client_delete_and_terminate(CState, Server, Ref) ->
+    ok = client_terminate(CState),
+    ok = gen_server2:call(Server, {delete_client, Ref}, infinity).
+
+successfully_recovered_state(Server) ->
+    gen_server2:call(Server, successfully_recovered_state, infinity).
+
+%%----------------------------------------------------------------------------
+%% Client-side-only helpers
+%%----------------------------------------------------------------------------
+
+client_read1(Server,
+             #msg_location { guid = Guid, file = File } = MsgLocation,
+             Defer,
+             CState = #client_msstate { file_summary_ets = FileSummaryEts }) ->
+    case ets:lookup(FileSummaryEts, File) of
+        [] -> %% File has been GC'd and no longer exists. Go around again.
+            read(Server, Guid, CState);
+        [#file_summary { locked = Locked, right = Right }] ->
+            client_read2(Server, Locked, Right, MsgLocation, Defer, CState)
+    end.
+
+client_read2(_Server, false, undefined, _MsgLocation, Defer, _CState) ->
+    %% Although we've already checked both caches and not found the
+    %% message there, the message is apparently in the
+    %% current_file. We can only arrive here if we are trying to read
+    %% a message which we have not written, which is very odd, so just
+    %% defer.
+    %%
+    %% OR, on startup, the cur_file_cache is not populated with the
+    %% contents of the current file, thus reads from the current file
+    %% will end up here and will need to be deferred.
+    Defer();
+client_read2(_Server, true, _Right, _MsgLocation, Defer, _CState) ->
+    %% Of course, in the mean time, the GC could have run and our msg
+    %% is actually in a different file, unlocked. However, defering is
+    %% the safest and simplest thing to do.
+    Defer();
+client_read2(Server, false, _Right,
+             MsgLocation = #msg_location { guid = Guid, file = File },
+             Defer,
+             CState = #client_msstate { file_summary_ets = FileSummaryEts }) ->
+    %% It's entirely possible that everything we're doing from here on
+    %% is for the wrong file, or a non-existent file, as a GC may have
+    %% finished.
+    safe_ets_update_counter(
+      FileSummaryEts, File, {#file_summary.readers, +1},
+      fun (_) -> client_read3(Server, MsgLocation, Defer, CState) end,
+      fun () -> read(Server, Guid, CState) end).
+
+client_read3(Server, #msg_location { guid = Guid, file = File }, Defer,
+             CState = #client_msstate { file_handles_ets = FileHandlesEts,
+                                        file_summary_ets = FileSummaryEts,
+                                        dedup_cache_ets  = DedupCacheEts,
+                                        gc_pid           = GCPid }) ->
+    Release =
+        fun() -> ok = case ets:update_counter(FileSummaryEts, File,
+                                              {#file_summary.readers, -1}) of
+                          0 -> case ets:lookup(FileSummaryEts, File) of
+                                   [#file_summary { locked = true }] ->
+                                       rabbit_msg_store_gc:no_readers(
+                                         GCPid, File);
+                                   _ -> ok
+                               end;
+                          _ -> ok
+                      end
+        end,
+    %% If a GC involving the file hasn't already started, it won't
+    %% start now. Need to check again to see if we've been locked in
+    %% the meantime, between lookup and update_counter (thus GC
+    %% started before our +1. In fact, it could have finished by now
+    %% too).
+    case ets:lookup(FileSummaryEts, File) of
+        [] -> %% GC has deleted our file, just go round again.
+            read(Server, Guid, CState);
+        [#file_summary { locked = true }] ->
+            %% If we get a badarg here, then the GC has finished and
+            %% deleted our file. Try going around again. Otherwise,
+            %% just defer.
+            %%
+            %% badarg scenario: we lookup, msg_store locks, GC starts,
+            %% GC ends, we +1 readers, msg_store ets:deletes (and
+            %% unlocks the dest)
+            try Release(),
+                Defer()
+            catch error:badarg -> read(Server, Guid, CState)
+            end;
+        [#file_summary { locked = false }] ->
+            %% Ok, we're definitely safe to continue - a GC involving
+            %% the file cannot start up now, and isn't running, so
+            %% nothing will tell us from now on to close the handle if
+            %% it's already open.
+            %%
+            %% Finally, we need to recheck that the msg is still at
+            %% the same place - it's possible an entire GC ran between
+            %% us doing the lookup and the +1 on the readers. (Same as
+            %% badarg scenario above, but we don't have a missing file
+            %% - we just have the /wrong/ file).
+            case index_lookup(Guid, CState) of
+                #msg_location { file = File } = MsgLocation ->
+                    %% Still the same file.
+                    mark_handle_open(FileHandlesEts, File),
+
+                    CState1 = close_all_indicated(CState),
+                    {Msg, CState2} = %% This will never be the current file
+                        read_from_disk(MsgLocation, CState1, DedupCacheEts),
+                    Release(), %% this MUST NOT fail with badarg
+                    {{ok, Msg}, CState2};
+                MsgLocation -> %% different file!
+                    Release(), %% this MUST NOT fail with badarg
+                    client_read1(Server, MsgLocation, Defer, CState)
+            end
+    end.
+
+%%----------------------------------------------------------------------------
+%% gen_server callbacks
+%%----------------------------------------------------------------------------
+
+init([Server, BaseDir, ClientRefs, {MsgRefDeltaGen, MsgRefDeltaGenInit}]) ->
+    process_flag(trap_exit, true),
+
+    ok = file_handle_cache:register_callback(?MODULE, set_maximum_since_use,
+                                             [self()]),
+
+    Dir = filename:join(BaseDir, atom_to_list(Server)),
+
+    {ok, IndexModule} = application:get_env(msg_store_index_module),
+    rabbit_log:info("~w: using ~p to provide index~n", [Server, IndexModule]),
+
+    {AllCleanShutdown, IndexState, ClientRefs1} =
+        recover_index_and_client_refs(IndexModule, ClientRefs, Dir, Server),
+
+    {FileSummaryRecovered, FileSummaryEts} =
+        recover_file_summary(AllCleanShutdown, Dir, Server),
+
+    DedupCacheEts   = ets:new(rabbit_msg_store_dedup_cache, [set, public]),
+    FileHandlesEts  = ets:new(rabbit_msg_store_shared_file_handles,
+                              [ordered_set, public]),
+    CurFileCacheEts = ets:new(rabbit_msg_store_cur_file, [set, public]),
+
+    {ok, FileSizeLimit} = application:get_env(msg_store_file_size_limit),
+
+    State = #msstate { dir                    = Dir,
+                       index_module           = IndexModule,
+                       index_state            = IndexState,
+                       current_file           = 0,
+                       current_file_handle    = undefined,
+                       file_handle_cache      = dict:new(),
+                       on_sync                = [],
+                       sync_timer_ref         = undefined,
+                       sum_valid_data         = 0,
+                       sum_file_size          = 0,
+                       pending_gc_completion  = [],
+                       gc_active              = false,
+                       gc_pid                 = undefined,
+                       file_handles_ets       = FileHandlesEts,
+                       file_summary_ets       = FileSummaryEts,
+                       dedup_cache_ets        = DedupCacheEts,
+                       cur_file_cache_ets     = CurFileCacheEts,
+                       client_refs            = ClientRefs1,
+                       successfully_recovered = AllCleanShutdown,
+                       file_size_limit        = FileSizeLimit
+                      },
+
+    ok = case AllCleanShutdown of
+             true  -> ok;
+             false -> count_msg_refs(MsgRefDeltaGen, MsgRefDeltaGenInit, State)
+         end,
+
+    FileNames =
+        sort_file_names(filelib:wildcard("*" ++ ?FILE_EXTENSION, Dir)),
+    TmpFileNames =
+        sort_file_names(filelib:wildcard("*" ++ ?FILE_EXTENSION_TMP, Dir)),
+    ok = recover_crashed_compactions(Dir, FileNames, TmpFileNames),
+
+    %% There should be no more tmp files now, so go ahead and load the
+    %% whole lot
+    Files = [filename_to_num(FileName) || FileName <- FileNames],
+    {Offset, State1 = #msstate { current_file = CurFile }} =
+        build_index(FileSummaryRecovered, Files, State),
+
+    %% read is only needed so that we can seek
+    {ok, CurHdl} = open_file(Dir, filenum_to_name(CurFile),
+                             [read | ?WRITE_MODE]),
+    {ok, Offset} = file_handle_cache:position(CurHdl, Offset),
+    ok = file_handle_cache:truncate(CurHdl),
+
+    {ok, GCPid} = rabbit_msg_store_gc:start_link(Dir, IndexState, IndexModule,
+                                                 FileSummaryEts),
+
+    {ok, maybe_compact(
+           State1 #msstate { current_file_handle = CurHdl, gc_pid = GCPid }),
+     hibernate,
+     {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
+
+handle_call({read, Guid}, From, State) ->
+    State1 = read_message(Guid, From, State),
+    noreply(State1);
+
+handle_call({contains, Guid}, From, State) ->
+    State1 = contains_message(Guid, From, State),
+    noreply(State1);
+
+handle_call({new_client_state, CRef}, _From,
+            State = #msstate { dir                = Dir,
+                               index_state        = IndexState,
+                               index_module       = IndexModule,
+                               file_handles_ets   = FileHandlesEts,
+                               file_summary_ets   = FileSummaryEts,
+                               dedup_cache_ets    = DedupCacheEts,
+                               cur_file_cache_ets = CurFileCacheEts,
+                               client_refs        = ClientRefs,
+                               gc_pid             = GCPid }) ->
+    reply({IndexState, IndexModule, Dir, GCPid,
+           FileHandlesEts, FileSummaryEts, DedupCacheEts, CurFileCacheEts},
+          State #msstate { client_refs = sets:add_element(CRef, ClientRefs) });
+
+handle_call(successfully_recovered_state, _From, State) ->
+    reply(State #msstate.successfully_recovered, State);
+
+handle_call({delete_client, CRef}, _From,
+            State = #msstate { client_refs = ClientRefs }) ->
+    reply(ok,
+          State #msstate { client_refs = sets:del_element(CRef, ClientRefs) }).
+
+handle_cast({write, Guid, Msg},
+            State = #msstate { current_file_handle = CurHdl,
+                               current_file        = CurFile,
+                               sum_valid_data      = SumValid,
+                               sum_file_size       = SumFileSize,
+                               file_summary_ets    = FileSummaryEts,
+                               cur_file_cache_ets  = CurFileCacheEts }) ->
+    true = 0 =< ets:update_counter(CurFileCacheEts, Guid, {3, -1}),
+    case index_lookup(Guid, State) of
+        not_found ->
+            %% New message, lots to do
+            {ok, CurOffset} = file_handle_cache:current_virtual_offset(CurHdl),
+            {ok, TotalSize} = rabbit_msg_file:append(CurHdl, Guid, Msg),
+            ok = index_insert(#msg_location {
+                                guid = Guid, ref_count = 1, file = CurFile,
+                                offset = CurOffset, total_size = TotalSize },
+                              State),
+            [#file_summary { valid_total_size = ValidTotalSize,
+                             contiguous_top   = ContiguousTop,
+                             right            = undefined,
+                             locked           = false,
+                             file_size        = FileSize }] =
+                ets:lookup(FileSummaryEts, CurFile),
+            ValidTotalSize1 = ValidTotalSize + TotalSize,
+            ContiguousTop1 = case CurOffset =:= ContiguousTop of
+                                 true  -> ValidTotalSize1;
+                                 false -> ContiguousTop
+                             end,
+            true = ets:update_element(
+                     FileSummaryEts, CurFile,
+                     [{#file_summary.valid_total_size, ValidTotalSize1},
+                      {#file_summary.contiguous_top,   ContiguousTop1},
+                      {#file_summary.file_size,        FileSize + TotalSize}]),
+            NextOffset = CurOffset + TotalSize,
+            noreply(
+              maybe_roll_to_new_file(
+                NextOffset, State #msstate {
+                              sum_valid_data = SumValid + TotalSize,
+                              sum_file_size  = SumFileSize + TotalSize }));
+        #msg_location { ref_count = RefCount } ->
+            %% We already know about it, just update counter. Only
+            %% update field otherwise bad interaction with concurrent GC
+            ok = index_update_fields(Guid,
+                                     {#msg_location.ref_count, RefCount + 1},
+                                     State),
+            noreply(State)
+    end;
+
+handle_cast({remove, Guids}, State) ->
+    State1 = lists:foldl(
+               fun (Guid, State2) -> remove_message(Guid, State2) end,
+               State, Guids),
+    noreply(maybe_compact(State1));
+
+handle_cast({release, Guids}, State =
+                #msstate { dedup_cache_ets = DedupCacheEts }) ->
+    lists:foreach(
+      fun (Guid) -> decrement_cache(DedupCacheEts, Guid) end, Guids),
+    noreply(State);
+
+handle_cast({sync, Guids, K},
+            State = #msstate { current_file        = CurFile,
+                               current_file_handle = CurHdl,
+                               on_sync             = Syncs }) ->
+    {ok, SyncOffset} = file_handle_cache:last_sync_offset(CurHdl),
+    case lists:any(fun (Guid) ->
+                           #msg_location { file = File, offset = Offset } =
+                               index_lookup(Guid, State),
+                           File =:= CurFile andalso Offset >= SyncOffset
+                   end, Guids) of
+        false -> K(),
+                 noreply(State);
+        true  -> noreply(State #msstate { on_sync = [K | Syncs] })
+    end;
+
+handle_cast(sync, State) ->
+    noreply(internal_sync(State));
+
+handle_cast({gc_done, Reclaimed, Src, Dst},
+            State = #msstate { sum_file_size    = SumFileSize,
+                               gc_active        = {Src, Dst},
+                               file_handles_ets = FileHandlesEts,
+                               file_summary_ets = FileSummaryEts }) ->
+    %% GC done, so now ensure that any clients that have open fhs to
+    %% those files close them before using them again. This has to be
+    %% done here (given it's done in the msg_store, and not the gc),
+    %% and not when starting up the GC, because if done when starting
+    %% up the GC, the client could find the close, and close and
+    %% reopen the fh, whilst the GC is waiting for readers to
+    %% disappear, before it's actually done the GC.
+    true = mark_handle_to_close(FileHandlesEts, Src),
+    true = mark_handle_to_close(FileHandlesEts, Dst),
+    %% we always move data left, so Src has gone and was on the
+    %% right, so need to make dest = source.right.left, and also
+    %% dest.right = source.right
+    [#file_summary { left    = Dst,
+                     right   = SrcRight,
+                     locked  = true,
+                     readers = 0 }] = ets:lookup(FileSummaryEts, Src),
+    %% this could fail if SrcRight =:= undefined
+    ets:update_element(FileSummaryEts, SrcRight, {#file_summary.left, Dst}),
+    true = ets:update_element(FileSummaryEts, Dst,
+                              [{#file_summary.locked, false},
+                               {#file_summary.right,  SrcRight}]),
+    true = ets:delete(FileSummaryEts, Src),
+    noreply(
+      maybe_compact(run_pending(
+                      State #msstate { sum_file_size = SumFileSize - Reclaimed,
+                                       gc_active     = false })));
+
+handle_cast({set_maximum_since_use, Age}, State) ->
+    ok = file_handle_cache:set_maximum_since_use(Age),
+    noreply(State).
+
+handle_info(timeout, State) ->
+    noreply(internal_sync(State));
+
+handle_info({'EXIT', _Pid, Reason}, State) ->
+    {stop, Reason, State}.
+
+terminate(_Reason, State = #msstate { index_state         = IndexState,
+                                      index_module        = IndexModule,
+                                      current_file_handle = CurHdl,
+                                      gc_pid              = GCPid,
+                                      file_handles_ets    = FileHandlesEts,
+                                      file_summary_ets    = FileSummaryEts,
+                                      dedup_cache_ets     = DedupCacheEts,
+                                      cur_file_cache_ets  = CurFileCacheEts,
+                                      client_refs         = ClientRefs,
+                                      dir                 = Dir }) ->
+    %% stop the gc first, otherwise it could be working and we pull
+    %% out the ets tables from under it.
+    ok = rabbit_msg_store_gc:stop(GCPid),
+    State1 = case CurHdl of
+                 undefined -> State;
+                 _         -> State2 = internal_sync(State),
+                              file_handle_cache:close(CurHdl),
+                              State2
+             end,
+    State3 = close_all_handles(State1),
+    store_file_summary(FileSummaryEts, Dir),
+    [ets:delete(T) ||
+        T <- [FileSummaryEts, DedupCacheEts, FileHandlesEts, CurFileCacheEts]],
+    IndexModule:terminate(IndexState),
+    store_recovery_terms([{client_refs, sets:to_list(ClientRefs)},
+                          {index_module, IndexModule}], Dir),
+    State3 #msstate { index_state         = undefined,
+                      current_file_handle = undefined }.
+
+code_change(_OldVsn, State, _Extra) ->
+    {ok, State}.
+
+%%----------------------------------------------------------------------------
+%% general helper functions
+%%----------------------------------------------------------------------------
+
+noreply(State) ->
+    {State1, Timeout} = next_state(State),
+    {noreply, State1, Timeout}.
+
+reply(Reply, State) ->
+    {State1, Timeout} = next_state(State),
+    {reply, Reply, State1, Timeout}.
+
+next_state(State = #msstate { on_sync = [], sync_timer_ref = undefined }) ->
+    {State, hibernate};
+next_state(State = #msstate { sync_timer_ref = undefined }) ->
+    {start_sync_timer(State), 0};
+next_state(State = #msstate { on_sync = [] }) ->
+    {stop_sync_timer(State), hibernate};
+next_state(State) ->
+    {State, 0}.
+
+start_sync_timer(State = #msstate { sync_timer_ref = undefined }) ->
+    {ok, TRef} = timer:apply_after(?SYNC_INTERVAL, ?MODULE, sync, [self()]),
+    State #msstate { sync_timer_ref = TRef }.
+
+stop_sync_timer(State = #msstate { sync_timer_ref = undefined }) ->
+    State;
+stop_sync_timer(State = #msstate { sync_timer_ref = TRef }) ->
+    {ok, cancel} = timer:cancel(TRef),
+    State #msstate { sync_timer_ref = undefined }.
+
+internal_sync(State = #msstate { current_file_handle = CurHdl,
+                                 on_sync = Syncs }) ->
+    State1 = stop_sync_timer(State),
+    case Syncs of
+        [] -> State1;
+        _  -> ok = file_handle_cache:sync(CurHdl),
+              lists:foreach(fun (K) -> K() end, lists:reverse(Syncs)),
+              State1 #msstate { on_sync = [] }
+    end.
+
+read_message(Guid, From,
+             State = #msstate { dedup_cache_ets = DedupCacheEts }) ->
+    case index_lookup(Guid, State) of
+        not_found ->
+            gen_server2:reply(From, not_found),
+            State;
+        MsgLocation ->
+            case fetch_and_increment_cache(DedupCacheEts, Guid) of
+                not_found -> read_message1(From, MsgLocation, State);
+                Msg       -> gen_server2:reply(From, {ok, Msg}),
+                             State
+            end
+    end.
+
+read_message1(From, #msg_location { guid = Guid, ref_count = RefCount,
+                                    file = File, offset = Offset } = MsgLoc,
+              State = #msstate { current_file        = CurFile,
+                                 current_file_handle = CurHdl,
+                                 file_summary_ets    = FileSummaryEts,
+                                 dedup_cache_ets     = DedupCacheEts,
+                                 cur_file_cache_ets  = CurFileCacheEts }) ->
+    case File =:= CurFile of
+        true  -> {Msg, State1} =
+                     %% can return [] if msg in file existed on startup
+                     case ets:lookup(CurFileCacheEts, Guid) of
+                         [] ->
+                             {ok, RawOffSet} =
+                                 file_handle_cache:current_raw_offset(CurHdl),
+                             ok = case Offset >= RawOffSet of
+                                      true  -> file_handle_cache:flush(CurHdl);
+                                      false -> ok
+                                  end,
+                             read_from_disk(MsgLoc, State, DedupCacheEts);
+                         [{Guid, Msg1, _CacheRefCount}] ->
+                             ok = maybe_insert_into_cache(
+                                    DedupCacheEts, RefCount, Guid, Msg1),
+                             {Msg1, State}
+                     end,
+                 gen_server2:reply(From, {ok, Msg}),
+                 State1;
+        false -> [#file_summary { locked = Locked }] =
+                     ets:lookup(FileSummaryEts, File),
+                 case Locked of
+                     true  -> add_to_pending_gc_completion({read, Guid, From},
+                                                           State);
+                     false -> {Msg, State1} =
+                                  read_from_disk(MsgLoc, State, DedupCacheEts),
+                              gen_server2:reply(From, {ok, Msg}),
+                              State1
+                 end
+    end.
+
+read_from_disk(#msg_location { guid = Guid, ref_count = RefCount,
+                               file = File, offset = Offset,
+                               total_size = TotalSize },
+               State, DedupCacheEts) ->
+    {Hdl, State1} = get_read_handle(File, State),
+    {ok, Offset} = file_handle_cache:position(Hdl, Offset),
+    {ok, {Guid, Msg}} =
+        case rabbit_msg_file:read(Hdl, TotalSize) of
+            {ok, {Guid, _}} = Obj ->
+                Obj;
+            Rest ->
+                {error, {misread, [{old_state, State},
+                                   {file_num,  File},
+                                   {offset,    Offset},
+                                   {guid,      Guid},
+                                   {read,      Rest},
+                                   {proc_dict, get()}
+                                  ]}}
+        end,
+    ok = maybe_insert_into_cache(DedupCacheEts, RefCount, Guid, Msg),
+    {Msg, State1}.
+
+contains_message(Guid, From, State = #msstate { gc_active = GCActive }) ->
+    case index_lookup(Guid, State) of
+        not_found ->
+            gen_server2:reply(From, false),
+            State;
+        #msg_location { file = File } ->
+            case GCActive of
+                {A, B} when File =:= A orelse File =:= B ->
+                    add_to_pending_gc_completion(
+                      {contains, Guid, From}, State);
+                _ ->
+                    gen_server2:reply(From, true),
+                    State
+            end
+    end.
+
+remove_message(Guid, State = #msstate { sum_valid_data   = SumValid,
+                                        file_summary_ets = FileSummaryEts,
+                                        dedup_cache_ets  = DedupCacheEts }) ->
+    #msg_location { ref_count = RefCount, file = File,
+                    offset = Offset, total_size = TotalSize } =
+        index_lookup(Guid, State),
+    case RefCount of
+        1 ->
+            %% don't remove from CUR_FILE_CACHE_ETS_NAME here because
+            %% there may be further writes in the mailbox for the same
+            %% msg.
+            ok = remove_cache_entry(DedupCacheEts, Guid),
+            [#file_summary { valid_total_size = ValidTotalSize,
+                             contiguous_top   = ContiguousTop,
+                             locked           = Locked }] =
+                ets:lookup(FileSummaryEts, File),
+            case Locked of
+                true ->
+                    add_to_pending_gc_completion({remove, Guid}, State);
+                false ->
+                    ok = index_delete(Guid, State),
+                    ContiguousTop1 = lists:min([ContiguousTop, Offset]),
+                    ValidTotalSize1 = ValidTotalSize - TotalSize,
+                    true = ets:update_element(
+                             FileSummaryEts, File,
+                             [{#file_summary.valid_total_size, ValidTotalSize1},
+                              {#file_summary.contiguous_top, ContiguousTop1}]),
+                    State1 = delete_file_if_empty(File, State),
+                    State1 #msstate { sum_valid_data = SumValid - TotalSize }
+            end;
+        _ when 1 < RefCount ->
+            ok = decrement_cache(DedupCacheEts, Guid),
+            %% only update field, otherwise bad interaction with concurrent GC
+            ok = index_update_fields(Guid,
+                                     {#msg_location.ref_count, RefCount - 1},
+                                     State),
+            State
+    end.
+
+add_to_pending_gc_completion(
+  Op, State = #msstate { pending_gc_completion = Pending }) ->
+    State #msstate { pending_gc_completion = [Op | Pending] }.
+
+run_pending(State = #msstate { pending_gc_completion = [] }) ->
+    State;
+run_pending(State = #msstate { pending_gc_completion = Pending }) ->
+    State1 = State #msstate { pending_gc_completion = [] },
+    lists:foldl(fun run_pending/2, State1, lists:reverse(Pending)).
+
+run_pending({read, Guid, From}, State) ->
+    read_message(Guid, From, State);
+run_pending({contains, Guid, From}, State) ->
+    contains_message(Guid, From, State);
+run_pending({remove, Guid}, State) ->
+    remove_message(Guid, State).
+
+safe_ets_update_counter(Tab, Key, UpdateOp, SuccessFun, FailThunk) ->
+    try
+        SuccessFun(ets:update_counter(Tab, Key, UpdateOp))
+    catch error:badarg -> FailThunk()
+    end.
+
+safe_ets_update_counter_ok(Tab, Key, UpdateOp, FailThunk) ->
+    safe_ets_update_counter(Tab, Key, UpdateOp, fun (_) -> ok end, FailThunk).
+
+%%----------------------------------------------------------------------------
+%% file helper functions
+%%----------------------------------------------------------------------------
+
+open_file(Dir, FileName, Mode) ->
+    file_handle_cache:open(form_filename(Dir, FileName), ?BINARY_MODE ++ Mode,
+                           [{write_buffer, ?HANDLE_CACHE_BUFFER_SIZE}]).
+
+close_handle(Key, CState = #client_msstate { file_handle_cache = FHC }) ->
+    CState #client_msstate { file_handle_cache = close_handle(Key, FHC) };
+
+close_handle(Key, State = #msstate { file_handle_cache = FHC }) ->
+    State #msstate { file_handle_cache = close_handle(Key, FHC) };
+
+close_handle(Key, FHC) ->
+    case dict:find(Key, FHC) of
+        {ok, Hdl} -> ok = file_handle_cache:close(Hdl),
+                     dict:erase(Key, FHC);
+        error     -> FHC
+    end.
+
+mark_handle_open(FileHandlesEts, File) ->
+    %% This is fine to fail (already exists)
+    ets:insert_new(FileHandlesEts, {{self(), File}, open}),
+    true.
+
+mark_handle_to_close(FileHandlesEts, File) ->
+    [ ets:update_element(FileHandlesEts, Key, {2, close})
+      || {Key, open} <- ets:match_object(FileHandlesEts, {{'_', File}, open}) ],
+    true.
+
+close_all_indicated(#client_msstate { file_handles_ets = FileHandlesEts } =
+                    CState) ->
+    Objs = ets:match_object(FileHandlesEts, {{self(), '_'}, close}),
+    lists:foldl(fun ({Key = {_Self, File}, close}, CStateM) ->
+                        true = ets:delete(FileHandlesEts, Key),
+                        close_handle(File, CStateM)
+                end, CState, Objs).
+
+close_all_handles(CState = #client_msstate { file_handles_ets = FileHandlesEts,
+                                             file_handle_cache = FHC }) ->
+    Self = self(),
+    ok = dict:fold(fun (File, Hdl, ok) ->
+                           true = ets:delete(FileHandlesEts, {Self, File}),
+                           file_handle_cache:close(Hdl)
+                   end, ok, FHC),
+    CState #client_msstate { file_handle_cache = dict:new() };
+
+close_all_handles(State = #msstate { file_handle_cache = FHC }) ->
+    ok = dict:fold(fun (_Key, Hdl, ok) -> file_handle_cache:close(Hdl) end,
+                   ok, FHC),
+    State #msstate { file_handle_cache = dict:new() }.
+
+get_read_handle(FileNum, CState = #client_msstate { file_handle_cache = FHC,
+                                                    dir = Dir }) ->
+    {Hdl, FHC2} = get_read_handle(FileNum, FHC, Dir),
+    {Hdl, CState #client_msstate { file_handle_cache = FHC2 }};
+
+get_read_handle(FileNum, State = #msstate { file_handle_cache = FHC,
+                                            dir = Dir }) ->
+    {Hdl, FHC2} = get_read_handle(FileNum, FHC, Dir),
+    {Hdl, State #msstate { file_handle_cache = FHC2 }}.
+
+get_read_handle(FileNum, FHC, Dir) ->
+    case dict:find(FileNum, FHC) of
+        {ok, Hdl} -> {Hdl, FHC};
+        error     -> {ok, Hdl} = open_file(Dir, filenum_to_name(FileNum),
+                                           ?READ_MODE),
+                     {Hdl, dict:store(FileNum, Hdl, FHC)}
+    end.
+
+preallocate(Hdl, FileSizeLimit, FinalPos) ->
+    {ok, FileSizeLimit} = file_handle_cache:position(Hdl, FileSizeLimit),
+    ok = file_handle_cache:truncate(Hdl),
+    {ok, FinalPos} = file_handle_cache:position(Hdl, FinalPos),
+    ok.
+
+truncate_and_extend_file(Hdl, Lowpoint, Highpoint) ->
+    {ok, Lowpoint} = file_handle_cache:position(Hdl, Lowpoint),
+    ok = file_handle_cache:truncate(Hdl),
+    ok = preallocate(Hdl, Highpoint, Lowpoint).
+
+form_filename(Dir, Name) -> filename:join(Dir, Name).
+
+filenum_to_name(File) -> integer_to_list(File) ++ ?FILE_EXTENSION.
+
+filename_to_num(FileName) -> list_to_integer(filename:rootname(FileName)).
+
+sort_file_names(FileNames) ->
+    lists:sort(fun (A, B) -> filename_to_num(A) < filename_to_num(B) end,
+               FileNames).
+
+%%----------------------------------------------------------------------------
+%% message cache helper functions
+%%----------------------------------------------------------------------------
+
+maybe_insert_into_cache(DedupCacheEts, RefCount, Guid, Msg)
+  when RefCount > 1 ->
+    update_msg_cache(DedupCacheEts, Guid, Msg);
+maybe_insert_into_cache(_DedupCacheEts, _RefCount, _Guid, _Msg) ->
+    ok.
+
+update_msg_cache(CacheEts, Guid, Msg) ->
+    case ets:insert_new(CacheEts, {Guid, Msg, 1}) of
+        true  -> ok;
+        false -> safe_ets_update_counter_ok(
+                   CacheEts, Guid, {3, +1},
+                   fun () -> update_msg_cache(CacheEts, Guid, Msg) end)
+    end.
+
+remove_cache_entry(DedupCacheEts, Guid) ->
+    true = ets:delete(DedupCacheEts, Guid),
+    ok.
+
+fetch_and_increment_cache(DedupCacheEts, Guid) ->
+    case ets:lookup(DedupCacheEts, Guid) of
+        [] ->
+            not_found;
+        [{_Guid, Msg, _RefCount}] ->
+            safe_ets_update_counter_ok(
+              DedupCacheEts, Guid, {3, +1},
+              %% someone has deleted us in the meantime, insert us
+              fun () -> ok = update_msg_cache(DedupCacheEts, Guid, Msg) end),
+            Msg
+    end.
+
+decrement_cache(DedupCacheEts, Guid) ->
+    true = safe_ets_update_counter(
+             DedupCacheEts, Guid, {3, -1},
+             fun (N) when N =< 0 -> true = ets:delete(DedupCacheEts, Guid);
+                 (_N)            -> true
+             end,
+             %% Guid is not in there because although it's been
+             %% delivered, it's never actually been read (think:
+             %% persistent message held in RAM)
+             fun () -> true end),
+    ok.
+
+%%----------------------------------------------------------------------------
+%% index
+%%----------------------------------------------------------------------------
+
+index_lookup(Key, #client_msstate { index_module = Index,
+                                    index_state  = State }) ->
+    Index:lookup(Key, State);
+
+index_lookup(Key, #msstate { index_module = Index, index_state = State }) ->
+    Index:lookup(Key, State).
+
+index_insert(Obj, #msstate { index_module = Index, index_state = State }) ->
+    Index:insert(Obj, State).
+
+index_update(Obj, #msstate { index_module = Index, index_state = State }) ->
+    Index:update(Obj, State).
+
+index_update_fields(Key, Updates, #msstate { index_module = Index,
+                                             index_state  = State }) ->
+    Index:update_fields(Key, Updates, State).
+
+index_delete(Key, #msstate { index_module = Index, index_state = State }) ->
+    Index:delete(Key, State).
+
+index_delete_by_file(File, #msstate { index_module = Index,
+                                      index_state  = State }) ->
+    Index:delete_by_file(File, State).
+
+%%----------------------------------------------------------------------------
+%% shutdown and recovery
+%%----------------------------------------------------------------------------
+
+recover_index_and_client_refs(IndexModule, undefined, Dir, _Server) ->
+    ok = rabbit_misc:recursive_delete([Dir]),
+    ok = filelib:ensure_dir(filename:join(Dir, "nothing")),
+    {false, IndexModule:new(Dir), sets:new()};
+recover_index_and_client_refs(IndexModule, ClientRefs, Dir, Server) ->
+    ok = filelib:ensure_dir(filename:join(Dir, "nothing")),
+    Fresh = fun (ErrorMsg, ErrorArgs) ->
+                    rabbit_log:warning("~w: " ++ ErrorMsg ++
+                                       "~nrebuilding indices from scratch~n",
+                                       [Server | ErrorArgs]),
+                    {false, IndexModule:new(Dir), sets:new()}
+            end,
+    case read_recovery_terms(Dir) of
+        {false, Error} ->
+            Fresh("failed to read recovery terms: ~p", [Error]);
+        {true, Terms} ->
+            RecClientRefs  = proplists:get_value(client_refs, Terms, []),
+            RecIndexModule = proplists:get_value(index_module, Terms),
+            case (lists:sort(ClientRefs) =:= lists:sort(RecClientRefs)
+                  andalso IndexModule =:= RecIndexModule) of
+                true  -> case IndexModule:recover(Dir) of
+                             {ok, IndexState1} ->
+                                 ClientRefs1 = sets:from_list(ClientRefs),
+                                 {true, IndexState1, ClientRefs1};
+                             {error, Error} ->
+                                 Fresh("failed to recover index: ~p", [Error])
+                         end;
+                false  -> Fresh("recovery terms differ from present", [])
+            end
+    end.
+
+store_recovery_terms(Terms, Dir) ->
+    rabbit_misc:write_term_file(filename:join(Dir, ?CLEAN_FILENAME), Terms).
+
+read_recovery_terms(Dir) ->
+    Path = filename:join(Dir, ?CLEAN_FILENAME),
+    case rabbit_misc:read_term_file(Path) of
+        {ok, Terms}    -> case file:delete(Path) of
+                              ok             -> {true,  Terms};
+                              {error, Error} -> {false, Error}
+                          end;
+        {error, Error} -> {false, Error}
+    end.
+
+store_file_summary(Tid, Dir) ->
+    ok = ets:tab2file(Tid, filename:join(Dir, ?FILE_SUMMARY_FILENAME),
+                      [{extended_info, [object_count]}]).
+
+recover_file_summary(false, _Dir, _Server) ->
+    %% TODO: the only reason for this to be an *ordered*_set is so
+    %% that a) maybe_compact can start a traversal from the eldest
+    %% file, and b) build_index in fast recovery mode can easily
+    %% identify the current file. It's awkward to have both that
+    %% odering and the left/right pointers in the entries - replacing
+    %% the former with some additional bit of state would be easy, but
+    %% ditching the latter would be neater.
+    {false, ets:new(rabbit_msg_store_file_summary,
+                    [ordered_set, public, {keypos, #file_summary.file}])};
+recover_file_summary(true, Dir, Server) ->
+    Path = filename:join(Dir, ?FILE_SUMMARY_FILENAME),
+    case ets:file2tab(Path) of
+        {ok, Tid}      -> file:delete(Path),
+                          {true, Tid};
+        {error, Error} -> rabbit_log:warning(
+                            "~w: failed to recover file summary: ~p~n"
+                            "rebuilding~n", [Server, Error]),
+                          recover_file_summary(false, Dir, Server)
+    end.
+
+count_msg_refs(Gen, Seed, State) ->
+    case Gen(Seed) of
+        finished ->
+            ok;
+        {_Guid, 0, Next} ->
+            count_msg_refs(Gen, Next, State);
+        {Guid, Delta, Next} ->
+            ok = case index_lookup(Guid, State) of
+                     not_found ->
+                         index_insert(#msg_location { guid = Guid,
+                                                      ref_count = Delta },
+                                      State);
+                     #msg_location { ref_count = RefCount } = StoreEntry ->
+                         NewRefCount = RefCount + Delta,
+                         case NewRefCount of
+                             0 -> index_delete(Guid, State);
+                             _ -> index_update(StoreEntry #msg_location {
+                                                 ref_count = NewRefCount },
+                                               State)
+                         end
+                 end,
+            count_msg_refs(Gen, Next, State)
+    end.
+
+recover_crashed_compactions(Dir, FileNames, TmpFileNames) ->
+    lists:foreach(
+      fun (TmpFileName) ->
+              NonTmpRelatedFileName =
+                  filename:rootname(TmpFileName) ++ ?FILE_EXTENSION,
+              true = lists:member(NonTmpRelatedFileName, FileNames),
+              ok = recover_crashed_compaction(
+                     Dir, TmpFileName, NonTmpRelatedFileName)
+      end, TmpFileNames),
+    ok.
+
+recover_crashed_compaction(Dir, TmpFileName, NonTmpRelatedFileName) ->
+    {ok, UncorruptedMessagesTmp, GuidsTmp} =
+        scan_file_for_valid_messages_and_guids(Dir, TmpFileName),
+    {ok, UncorruptedMessages, Guids} =
+        scan_file_for_valid_messages_and_guids(Dir, NonTmpRelatedFileName),
+    %% 1) It's possible that everything in the tmp file is also in the
+    %%    main file such that the main file is (prefix ++
+    %%    tmpfile). This means that compaction failed immediately
+    %%    prior to the final step of deleting the tmp file. Plan: just
+    %%    delete the tmp file
+    %% 2) It's possible that everything in the tmp file is also in the
+    %%    main file but with holes throughout (or just somthing like
+    %%    main = (prefix ++ hole ++ tmpfile)). This means that
+    %%    compaction wrote out the tmp file successfully and then
+    %%    failed. Plan: just delete the tmp file and allow the
+    %%    compaction to eventually be triggered later
+    %% 3) It's possible that everything in the tmp file is also in the
+    %%    main file but such that the main file does not end with tmp
+    %%    file (and there are valid messages in the suffix; main =
+    %%    (prefix ++ tmpfile[with extra holes?] ++ suffix)). This
+    %%    means that compaction failed as we were writing out the tmp
+    %%    file. Plan: just delete the tmp file and allow the
+    %%    compaction to eventually be triggered later
+    %% 4) It's possible that there are messages in the tmp file which
+    %%    are not in the main file. This means that writing out the
+    %%    tmp file succeeded, but then we failed as we were copying
+    %%    them back over to the main file, after truncating the main
+    %%    file. As the main file has already been truncated, it should
+    %%    consist only of valid messages. Plan: Truncate the main file
+    %%    back to before any of the files in the tmp file and copy
+    %%    them over again
+    TmpPath = form_filename(Dir, TmpFileName),
+    case is_sublist(GuidsTmp, Guids) of
+        true -> %% we're in case 1, 2 or 3 above. Just delete the tmp file
+                %% note this also catches the case when the tmp file
+                %% is empty
+            ok = file:delete(TmpPath);
+        false ->
+            %% We're in case 4 above. We only care about the inital
+            %% msgs in main file that are not in the tmp file. If
+            %% there are no msgs in the tmp file then we would be in
+            %% the 'true' branch of this case, so we know the
+            %% lists:last call is safe.
+            EldestTmpGuid = lists:last(GuidsTmp),
+            {Guids1, UncorruptedMessages1}
+                = case lists:splitwith(
+                         fun (Guid) -> Guid =/= EldestTmpGuid end, Guids) of
+                      {_Guids, []} -> %% no msgs from tmp in main
+                          {Guids, UncorruptedMessages};
+                      {Dropped, [EldestTmpGuid | Rest]} ->
+                          %% Msgs in Dropped are in tmp, so forget them.
+                          %% *cry*. Lists indexed from 1.
+                          {Rest, lists:sublist(UncorruptedMessages,
+                                               2 + length(Dropped),
+                                               length(Rest))}
+                  end,
+            %% The main file prefix should be contiguous
+            {Top, Guids1} = find_contiguous_block_prefix(
+                               lists:reverse(UncorruptedMessages1)),
+            %% we should have that none of the messages in the prefix
+            %% are in the tmp file
+            true = is_disjoint(Guids1, GuidsTmp),
+            %% must open with read flag, otherwise will stomp over contents
+            {ok, MainHdl} = open_file(Dir, NonTmpRelatedFileName,
+                                      [read | ?WRITE_MODE]),
+            %% Wipe out any rubbish at the end of the file. Remember
+            %% the head of the list will be the highest entry in the
+            %% file.
+            [{_, TmpTopTotalSize, TmpTopOffset}|_] = UncorruptedMessagesTmp,
+            TmpSize = TmpTopOffset + TmpTopTotalSize,
+            %% Extend the main file as big as necessary in a single
+            %% move. If we run out of disk space, this truncate could
+            %% fail, but we still aren't risking losing data
+            ok = truncate_and_extend_file(MainHdl, Top, Top + TmpSize),
+            {ok, TmpHdl} = open_file(Dir, TmpFileName, ?READ_AHEAD_MODE),
+            {ok, TmpSize} = file_handle_cache:copy(TmpHdl, MainHdl, TmpSize),
+            ok = file_handle_cache:close(MainHdl),
+            ok = file_handle_cache:delete(TmpHdl),
+
+            {ok, _MainMessages, GuidsMain} =
+                scan_file_for_valid_messages_and_guids(
+                  Dir, NonTmpRelatedFileName),
+            %% check that everything in Guids1 is in GuidsMain
+            true = is_sublist(Guids1, GuidsMain),
+            %% check that everything in GuidsTmp is in GuidsMain
+            true = is_sublist(GuidsTmp, GuidsMain)
+    end,
+    ok.
+
+is_sublist(SmallerL, BiggerL) ->
+    lists:all(fun (Item) -> lists:member(Item, BiggerL) end, SmallerL).
+
+is_disjoint(SmallerL, BiggerL) ->
+    lists:all(fun (Item) -> not lists:member(Item, BiggerL) end, SmallerL).
+
+scan_file_for_valid_messages(Dir, FileName) ->
+    case open_file(Dir, FileName, ?READ_MODE) of
+        {ok, Hdl}       -> Valid = rabbit_msg_file:scan(
+                                     Hdl, filelib:file_size(
+                                            form_filename(Dir, FileName))),
+                           %% if something really bad has happened,
+                           %% the close could fail, but ignore
+                           file_handle_cache:close(Hdl),
+                           Valid;
+        {error, enoent} -> {ok, [], 0};
+        {error, Reason} -> {error, {unable_to_scan_file, FileName, Reason}}
+    end.
+
+scan_file_for_valid_messages_and_guids(Dir, FileName) ->
+    {ok, Messages, _FileSize} = scan_file_for_valid_messages(Dir, FileName),
+    {ok, Messages, [Guid || {Guid, _TotalSize, _FileOffset} <- Messages]}.
+
+%% Takes the list in *ascending* order (i.e. eldest message
+%% first). This is the opposite of what scan_file_for_valid_messages
+%% produces. The list of msgs that is produced is youngest first.
+find_contiguous_block_prefix(L) -> find_contiguous_block_prefix(L, 0, []).
+
+find_contiguous_block_prefix([], ExpectedOffset, Guids) ->
+    {ExpectedOffset, Guids};
+find_contiguous_block_prefix([{Guid, TotalSize, ExpectedOffset} | Tail],
+                             ExpectedOffset, Guids) ->
+    ExpectedOffset1 = ExpectedOffset + TotalSize,
+    find_contiguous_block_prefix(Tail, ExpectedOffset1, [Guid | Guids]);
+find_contiguous_block_prefix([_MsgAfterGap | _Tail], ExpectedOffset, Guids) ->
+    {ExpectedOffset, Guids}.
+
+build_index(true, _Files, State = #msstate {
+                            file_summary_ets = FileSummaryEts }) ->
+    ets:foldl(
+      fun (#file_summary { valid_total_size = ValidTotalSize,
+                           file_size        = FileSize,
+                           file             = File },
+           {_Offset, State1 = #msstate { sum_valid_data = SumValid,
+                                         sum_file_size  = SumFileSize }}) ->
+              {FileSize, State1 #msstate {
+                           sum_valid_data = SumValid + ValidTotalSize,
+                           sum_file_size  = SumFileSize + FileSize,
+                           current_file   = File }}
+      end, {0, State}, FileSummaryEts);
+build_index(false, Files, State) ->
+    {ok, Pid} = gatherer:start_link(),
+    case Files of
+        [] -> build_index(Pid, undefined, [State #msstate.current_file], State);
+        _  -> {Offset, State1} = build_index(Pid, undefined, Files, State),
+              {Offset, lists:foldl(fun delete_file_if_empty/2, State1, Files)}
+    end.
+
+build_index(Gatherer, Left, [],
+            State = #msstate { file_summary_ets = FileSummaryEts,
+                               sum_valid_data   = SumValid,
+                               sum_file_size    = SumFileSize }) ->
+    case gatherer:out(Gatherer) of
+        empty ->
+            ok = gatherer:stop(Gatherer),
+            ok = rabbit_misc:unlink_and_capture_exit(Gatherer),
+            ok = index_delete_by_file(undefined, State),
+            Offset = case ets:lookup(FileSummaryEts, Left) of
+                         []                                       -> 0;
+                         [#file_summary { file_size = FileSize }] -> FileSize
+                     end,
+            {Offset, State #msstate { current_file = Left }};
+        {value, #file_summary { valid_total_size = ValidTotalSize,
+                                file_size = FileSize } = FileSummary} ->
+            true = ets:insert_new(FileSummaryEts, FileSummary),
+            build_index(Gatherer, Left, [],
+                        State #msstate {
+                          sum_valid_data = SumValid + ValidTotalSize,
+                          sum_file_size  = SumFileSize + FileSize })
+    end;
+build_index(Gatherer, Left, [File|Files], State) ->
+    ok = gatherer:fork(Gatherer),
+    ok = worker_pool:submit_async(
+           fun () -> build_index_worker(Gatherer, State,
+                                        Left, File, Files)
+           end),
+    build_index(Gatherer, File, Files, State).
+
+build_index_worker(Gatherer, State = #msstate { dir = Dir },
+                   Left, File, Files) ->
+    {ok, Messages, FileSize} =
+        scan_file_for_valid_messages(Dir, filenum_to_name(File)),
+    {ValidMessages, ValidTotalSize} =
+        lists:foldl(
+          fun (Obj = {Guid, TotalSize, Offset}, {VMAcc, VTSAcc}) ->
+                  case index_lookup(Guid, State) of
+                      not_found ->
+                          {VMAcc, VTSAcc};
+                      StoreEntry ->
+                          ok = index_update(StoreEntry #msg_location {
+                                              file = File, offset = Offset,
+                                              total_size = TotalSize },
+                                            State),
+                          {[Obj | VMAcc], VTSAcc + TotalSize}
+                  end
+          end, {[], 0}, Messages),
+    %% foldl reverses lists, find_contiguous_block_prefix needs
+    %% msgs eldest first, so, ValidMessages is the right way round
+    {ContiguousTop, _} = find_contiguous_block_prefix(ValidMessages),
+    {Right, FileSize1} =
+        case Files of
+            %% if it's the last file, we'll truncate to remove any
+            %% rubbish above the last valid message. This affects the
+            %% file size.
+            []    -> {undefined, case ValidMessages of
+                                     [] -> 0;
+                                     _  -> {_Guid, TotalSize, Offset} =
+                                               lists:last(ValidMessages),
+                                           Offset + TotalSize
+                                 end};
+            [F|_] -> {F, FileSize}
+        end,
+    ok = gatherer:in(Gatherer, #file_summary {
+                       file             = File,
+                       valid_total_size = ValidTotalSize,
+                       contiguous_top   = ContiguousTop,
+                       left             = Left,
+                       right            = Right,
+                       file_size        = FileSize1,
+                       locked           = false,
+                       readers          = 0 }),
+    ok = gatherer:finish(Gatherer).
+
+%%----------------------------------------------------------------------------
+%% garbage collection / compaction / aggregation -- internal
+%%----------------------------------------------------------------------------
+
+maybe_roll_to_new_file(
+  Offset,
+  State = #msstate { dir                 = Dir,
+                     current_file_handle = CurHdl,
+                     current_file        = CurFile,
+                     file_summary_ets    = FileSummaryEts,
+                     cur_file_cache_ets  = CurFileCacheEts,
+                     file_size_limit     = FileSizeLimit })
+  when Offset >= FileSizeLimit ->
+    State1 = internal_sync(State),
+    ok = file_handle_cache:close(CurHdl),
+    NextFile = CurFile + 1,
+    {ok, NextHdl} = open_file(Dir, filenum_to_name(NextFile), ?WRITE_MODE),
+    true = ets:insert_new(FileSummaryEts, #file_summary {
+                            file             = NextFile,
+                            valid_total_size = 0,
+                            contiguous_top   = 0,
+                            left             = CurFile,
+                            right            = undefined,
+                            file_size        = 0,
+                            locked           = false,
+                            readers          = 0 }),
+    true = ets:update_element(FileSummaryEts, CurFile,
+                              {#file_summary.right, NextFile}),
+    true = ets:match_delete(CurFileCacheEts, {'_', '_', 0}),
+    maybe_compact(State1 #msstate { current_file_handle = NextHdl,
+                                    current_file        = NextFile });
+maybe_roll_to_new_file(_, State) ->
+    State.
+
+maybe_compact(State = #msstate { sum_valid_data   = SumValid,
+                                 sum_file_size    = SumFileSize,
+                                 gc_active        = false,
+                                 gc_pid           = GCPid,
+                                 file_summary_ets = FileSummaryEts,
+                                 file_size_limit  = FileSizeLimit })
+  when (SumFileSize > 2 * FileSizeLimit andalso
+        (SumFileSize - SumValid) / SumFileSize > ?GARBAGE_FRACTION) ->
+    %% TODO: the algorithm here is sub-optimal - it may result in a
+    %% complete traversal of FileSummaryEts.
+    case ets:first(FileSummaryEts) of
+        '$end_of_table' ->
+            State;
+        First ->
+            case find_files_to_gc(FileSummaryEts, FileSizeLimit,
+                                  ets:lookup(FileSummaryEts, First)) of
+                not_found ->
+                    State;
+                {Src, Dst} ->
+                    State1 = close_handle(Src, close_handle(Dst, State)),
+                    true = ets:update_element(FileSummaryEts, Src,
+                                              {#file_summary.locked, true}),
+                    true = ets:update_element(FileSummaryEts, Dst,
+                                              {#file_summary.locked, true}),
+                    ok = rabbit_msg_store_gc:gc(GCPid, Src, Dst),
+                    State1 #msstate { gc_active = {Src, Dst} }
+            end
+    end;
+maybe_compact(State) ->
+    State.
+
+find_files_to_gc(FileSummaryEts, FileSizeLimit,
+                 [#file_summary { file             = Dst,
+                                  valid_total_size = DstValid,
+                                  right            = Src }]) ->
+    case Src of
+        undefined ->
+            not_found;
+        _   ->
+            [#file_summary { file             = Src,
+                             valid_total_size = SrcValid,
+                             left             = Dst,
+                             right            = SrcRight }] = Next =
+                ets:lookup(FileSummaryEts, Src),
+            case SrcRight of
+                undefined -> not_found;
+                _         -> case DstValid + SrcValid =< FileSizeLimit of
+                                 true  -> {Src, Dst};
+                                 false -> find_files_to_gc(
+                                            FileSummaryEts, FileSizeLimit, Next)
+                             end
+            end
+    end.
+
+delete_file_if_empty(File, State = #msstate { current_file = File }) ->
+    State;
+delete_file_if_empty(File, State = #msstate {
+                             dir              = Dir,
+                             sum_file_size    = SumFileSize,
+                             file_handles_ets = FileHandlesEts,
+                             file_summary_ets = FileSummaryEts }) ->
+    [#file_summary { valid_total_size = ValidData,
+                     left             = Left,
+                     right            = Right,
+                     file_size        = FileSize,
+                     locked           = false }] =
+        ets:lookup(FileSummaryEts, File),
+    case ValidData of
+        %% we should NEVER find the current file in here hence right
+        %% should always be a file, not undefined
+        0 -> case {Left, Right} of
+                 {undefined, _} when Right =/= undefined ->
+                     %% the eldest file is empty.
+                     true = ets:update_element(
+                              FileSummaryEts, Right,
+                              {#file_summary.left, undefined});
+                 {_, _} when Right =/= undefined ->
+                     true = ets:update_element(FileSummaryEts, Right,
+                                               {#file_summary.left, Left}),
+                     true = ets:update_element(FileSummaryEts, Left,
+                                               {#file_summary.right, Right})
+             end,
+             true = mark_handle_to_close(FileHandlesEts, File),
+             true = ets:delete(FileSummaryEts, File),
+             State1 = close_handle(File, State),
+             ok = file:delete(form_filename(Dir, filenum_to_name(File))),
+             State1 #msstate { sum_file_size = SumFileSize - FileSize };
+        _ -> State
+    end.
+
+%%----------------------------------------------------------------------------
+%% garbage collection / compaction / aggregation -- external
+%%----------------------------------------------------------------------------
+
+gc(SrcFile, DstFile, State = {FileSummaryEts, _Dir, _Index, _IndexState}) ->
+    [SrcObj = #file_summary {
+       readers          = SrcReaders,
+       left             = DstFile,
+       file_size        = SrcFileSize,
+       locked           = true }] = ets:lookup(FileSummaryEts, SrcFile),
+    [DstObj = #file_summary {
+       readers          = DstReaders,
+       right            = SrcFile,
+       file_size        = DstFileSize,
+       locked           = true }] = ets:lookup(FileSummaryEts, DstFile),
+
+    case SrcReaders =:= 0 andalso DstReaders =:= 0 of
+        true  -> TotalValidData = combine_files(SrcObj, DstObj, State),
+                 %% don't update dest.right, because it could be
+                 %% changing at the same time
+                 true = ets:update_element(
+                          FileSummaryEts, DstFile,
+                          [{#file_summary.valid_total_size, TotalValidData},
+                           {#file_summary.contiguous_top,   TotalValidData},
+                           {#file_summary.file_size,        TotalValidData}]),
+                 SrcFileSize + DstFileSize - TotalValidData;
+        false -> concurrent_readers
+    end.
+
+combine_files(#file_summary { file             = Source,
+                              valid_total_size = SourceValid,
+                              left             = Destination },
+              #file_summary { file             = Destination,
+                              valid_total_size = DestinationValid,
+                              contiguous_top   = DestinationContiguousTop,
+                              right            = Source },
+              State = {_FileSummaryEts, Dir, _Index, _IndexState}) ->
+    SourceName      = filenum_to_name(Source),
+    DestinationName = filenum_to_name(Destination),
+    {ok, SourceHdl}      = open_file(Dir, SourceName,
+                                     ?READ_AHEAD_MODE),
+    {ok, DestinationHdl} = open_file(Dir, DestinationName,
+                                     ?READ_AHEAD_MODE ++ ?WRITE_MODE),
+    ExpectedSize = SourceValid + DestinationValid,
+    %% if DestinationValid =:= DestinationContiguousTop then we don't
+    %% need a tmp file
+    %% if they're not equal, then we need to write out everything past
+    %%   the DestinationContiguousTop to a tmp file then truncate,
+    %%   copy back in, and then copy over from Source
+    %% otherwise we just truncate straight away and copy over from Source
+    case DestinationContiguousTop =:= DestinationValid of
+        true ->
+            ok = truncate_and_extend_file(
+                   DestinationHdl, DestinationContiguousTop, ExpectedSize);
+        false ->
+            {DestinationWorkList, DestinationValid} =
+                find_unremoved_messages_in_file(Destination, State),
+            Worklist =
+                lists:dropwhile(
+                  fun (#msg_location { offset = Offset })
+                      when Offset =/= DestinationContiguousTop ->
+                          %% it cannot be that Offset =:=
+                          %% DestinationContiguousTop because if it
+                          %% was then DestinationContiguousTop would
+                          %% have been extended by TotalSize
+                          Offset < DestinationContiguousTop
+                  end, DestinationWorkList),
+            Tmp = filename:rootname(DestinationName) ++ ?FILE_EXTENSION_TMP,
+            {ok, TmpHdl} = open_file(Dir, Tmp, ?READ_AHEAD_MODE ++ ?WRITE_MODE),
+            ok = copy_messages(
+                   Worklist, DestinationContiguousTop, DestinationValid,
+                   DestinationHdl, TmpHdl, Destination, State),
+            TmpSize = DestinationValid - DestinationContiguousTop,
+            %% so now Tmp contains everything we need to salvage from
+            %% Destination, and index_state has been updated to
+            %% reflect the compaction of Destination so truncate
+            %% Destination and copy from Tmp back to the end
+            {ok, 0} = file_handle_cache:position(TmpHdl, 0),
+            ok = truncate_and_extend_file(
+                   DestinationHdl, DestinationContiguousTop, ExpectedSize),
+            {ok, TmpSize} =
+                file_handle_cache:copy(TmpHdl, DestinationHdl, TmpSize),
+            %% position in DestinationHdl should now be DestinationValid
+            ok = file_handle_cache:sync(DestinationHdl),
+            ok = file_handle_cache:delete(TmpHdl)
+    end,
+    {SourceWorkList, SourceValid} =
+        find_unremoved_messages_in_file(Source, State),
+    ok = copy_messages(SourceWorkList, DestinationValid, ExpectedSize,
+                       SourceHdl, DestinationHdl, Destination, State),
+    %% tidy up
+    ok = file_handle_cache:close(DestinationHdl),
+    ok = file_handle_cache:delete(SourceHdl),
+    ExpectedSize.
+
+find_unremoved_messages_in_file(File,
+                                {_FileSummaryEts, Dir, Index, IndexState}) ->
+    %% Messages here will be end-of-file at start-of-list
+    {ok, Messages, _FileSize} =
+        scan_file_for_valid_messages(Dir, filenum_to_name(File)),
+    %% foldl will reverse so will end up with msgs in ascending offset order
+    lists:foldl(fun ({Guid, TotalSize, _Offset}, Acc = {List, Size}) ->
+                        case Index:lookup(Guid, IndexState) of
+                            #msg_location { file = File } = Entry ->
+                                {[ Entry | List ], TotalSize + Size};
+                            _ ->
+                                Acc
+                        end
+                end, {[], 0}, Messages).
+
+copy_messages(WorkList, InitOffset, FinalOffset, SourceHdl, DestinationHdl,
+              Destination, {_FileSummaryEts, _Dir, Index, IndexState}) ->
+    Copy = fun ({BlockStart, BlockEnd}) ->
+                   BSize = BlockEnd - BlockStart,
+                   {ok, BlockStart} =
+                       file_handle_cache:position(SourceHdl, BlockStart),
+                   {ok, BSize} =
+                       file_handle_cache:copy(SourceHdl, DestinationHdl, BSize)
+           end,
+    case
+        lists:foldl(
+          fun (#msg_location { guid = Guid, offset = Offset,
+                               total_size = TotalSize },
+               {CurOffset, Block = {BlockStart, BlockEnd}}) ->
+                  %% CurOffset is in the DestinationFile.
+                  %% Offset, BlockStart and BlockEnd are in the SourceFile
+                  %% update MsgLocation to reflect change of file and offset
+                  ok = Index:update_fields(Guid,
+                                           [{#msg_location.file, Destination},
+                                            {#msg_location.offset, CurOffset}],
+                                           IndexState),
+                  {CurOffset + TotalSize,
+                   case BlockEnd of
+                       undefined ->
+                           %% base case, called only for the first list elem
+                           {Offset, Offset + TotalSize};
+                       Offset ->
+                           %% extend the current block because the
+                           %% next msg follows straight on
+                           {BlockStart, BlockEnd + TotalSize};
+                       _ ->
+                           %% found a gap, so actually do the work for
+                           %% the previous block
+                           Copy(Block),
+                           {Offset, Offset + TotalSize}
+                   end}
+          end, {InitOffset, {undefined, undefined}}, WorkList) of
+        {FinalOffset, Block} ->
+            case WorkList of
+                [] -> ok;
+                _  -> Copy(Block), %% do the last remaining block
+                      ok = file_handle_cache:sync(DestinationHdl)
+            end;
+        {FinalOffsetZ, _Block} ->
+            {gc_error, [{expected, FinalOffset},
+                        {got, FinalOffsetZ},
+                        {destination, Destination}]}
+    end.
diff --git a/src/rabbit_msg_store_ets_index.erl b/src/rabbit_msg_store_ets_index.erl
new file mode 100644
index 00000000..1eb3c11f
--- /dev/null
+++ b/src/rabbit_msg_store_ets_index.erl
@@ -0,0 +1,90 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2010 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2010 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(rabbit_msg_store_ets_index).
+
+-behaviour(rabbit_msg_store_index).
+
+-export([new/1, recover/1,
+         lookup/2, insert/2, update/2, update_fields/3, delete/2,
+         delete_by_file/2, terminate/1]).
+
+-define(MSG_LOC_NAME, rabbit_msg_store_ets_index).
+-define(FILENAME, "msg_store_index.ets").
+
+-include("rabbit_msg_store_index.hrl").
+
+-record(state, { table, dir }).
+
+new(Dir) ->
+    file:delete(filename:join(Dir, ?FILENAME)),
+    Tid = ets:new(?MSG_LOC_NAME, [set, public, {keypos, #msg_location.guid}]),
+    #state { table = Tid, dir = Dir }.
+
+recover(Dir) ->
+    Path = filename:join(Dir, ?FILENAME),
+    case ets:file2tab(Path) of
+        {ok, Tid}  -> file:delete(Path),
+                      {ok, #state { table = Tid, dir = Dir }};
+        Error      -> Error
+    end.
+
+lookup(Key, State) ->
+    case ets:lookup(State #state.table, Key) of
+        []      -> not_found;
+        [Entry] -> Entry
+    end.
+
+insert(Obj, State) ->
+    true = ets:insert_new(State #state.table, Obj),
+    ok.
+
+update(Obj, State) ->
+    true = ets:insert(State #state.table, Obj),
+    ok.
+
+update_fields(Key, Updates, State) ->
+    true = ets:update_element(State #state.table, Key, Updates),
+    ok.
+
+delete(Key, State) ->
+    true = ets:delete(State #state.table, Key),
+    ok.
+
+delete_by_file(File, State) ->
+    MatchHead = #msg_location { file = File, _ = '_' },
+    ets:select_delete(State #state.table, [{MatchHead, [], [true]}]),
+    ok.
+
+terminate(#state { table = MsgLocations, dir = Dir }) ->
+    ok = ets:tab2file(MsgLocations, filename:join(Dir, ?FILENAME),
+                      [{extended_info, [object_count]}]),
+    ets:delete(MsgLocations).
diff --git a/src/rabbit_msg_store_gc.erl b/src/rabbit_msg_store_gc.erl
new file mode 100644
index 00000000..eaa41173
--- /dev/null
+++ b/src/rabbit_msg_store_gc.erl
@@ -0,0 +1,141 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2010 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2010 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(rabbit_msg_store_gc).
+
+-behaviour(gen_server2).
+
+-export([start_link/4, gc/3, no_readers/2, stop/1]).
+
+-export([set_maximum_since_use/2]).
+
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
+         terminate/2, code_change/3]).
+
+-record(gcstate,
+        {dir,
+         index_state,
+         index_module,
+         parent,
+         file_summary_ets,
+         scheduled
+        }).
+
+-include("rabbit.hrl").
+
+%%----------------------------------------------------------------------------
+
+-ifdef(use_specs).
+
+-spec(start_link/4 :: (file:filename(), any(), atom(), ets:tid()) ->
+                           'ignore' | rabbit_types:ok_or_error2(pid(), any())).
+-spec(gc/3 :: (pid(), non_neg_integer(), non_neg_integer()) -> 'ok').
+-spec(no_readers/2 :: (pid(), non_neg_integer()) -> 'ok').
+-spec(stop/1 :: (pid()) -> 'ok').
+-spec(set_maximum_since_use/2 :: (pid(), non_neg_integer()) -> 'ok').
+
+-endif.
+
+%%----------------------------------------------------------------------------
+
+start_link(Dir, IndexState, IndexModule, FileSummaryEts) ->
+    gen_server2:start_link(
+      ?MODULE, [self(), Dir, IndexState, IndexModule, FileSummaryEts],
+      [{timeout, infinity}]).
+
+gc(Server, Source, Destination) ->
+    gen_server2:cast(Server, {gc, Source, Destination}).
+
+no_readers(Server, File) ->
+    gen_server2:cast(Server, {no_readers, File}).
+
+stop(Server) ->
+    gen_server2:call(Server, stop, infinity).
+
+set_maximum_since_use(Pid, Age) ->
+    gen_server2:pcast(Pid, 8, {set_maximum_since_use, Age}).
+
+%%----------------------------------------------------------------------------
+
+init([Parent, Dir, IndexState, IndexModule, FileSummaryEts]) ->
+    ok = file_handle_cache:register_callback(?MODULE, set_maximum_since_use,
+                                             [self()]),
+    {ok, #gcstate { dir              = Dir,
+                    index_state      = IndexState,
+                    index_module     = IndexModule,
+                    parent           = Parent,
+                    file_summary_ets = FileSummaryEts,
+                    scheduled        = undefined },
+     hibernate,
+     {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}.
+
+handle_call(stop, _From, State) ->
+    {stop, normal, ok, State}.
+
+handle_cast({gc, Source, Destination},
+            State = #gcstate { scheduled = undefined }) ->
+    {noreply, attempt_gc(State #gcstate { scheduled = {Source, Destination} }),
+     hibernate};
+
+handle_cast({no_readers, File},
+            State = #gcstate { scheduled = {Source, Destination} })
+  when File =:= Source orelse File =:= Destination ->
+    {noreply, attempt_gc(State), hibernate};
+
+handle_cast({no_readers, _File}, State) ->
+    {noreply, State, hibernate};
+
+handle_cast({set_maximum_since_use, Age}, State) ->
+    ok = file_handle_cache:set_maximum_since_use(Age),
+    {noreply, State, hibernate}.
+
+handle_info(Info, State) ->
+    {stop, {unhandled_info, Info}, State}.
+
+terminate(_Reason, State) ->
+    State.
+
+code_change(_OldVsn, State, _Extra) ->
+    {ok, State}.
+
+attempt_gc(State = #gcstate { dir              = Dir,
+                              index_state      = IndexState,
+                              index_module     = Index,
+                              parent           = Parent,
+                              file_summary_ets = FileSummaryEts,
+                              scheduled        = {Source, Destination} }) ->
+    case rabbit_msg_store:gc(Source, Destination,
+                             {FileSummaryEts, Dir, Index, IndexState}) of
+        concurrent_readers -> State;
+        Reclaimed          -> ok = rabbit_msg_store:gc_done(
+                                     Parent, Reclaimed, Source, Destination),
+                              State #gcstate { scheduled = undefined }
+    end.
diff --git a/src/rabbit_msg_store_index.erl b/src/rabbit_msg_store_index.erl
new file mode 100644
index 00000000..0ed64a9d
--- /dev/null
+++ b/src/rabbit_msg_store_index.erl
@@ -0,0 +1,47 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2010 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2010 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(rabbit_msg_store_index).
+
+-export([behaviour_info/1]).
+
+behaviour_info(callbacks) ->
+    [{new,            1},
+     {recover,        1},
+     {lookup,         2},
+     {insert,         2},
+     {update,         2},
+     {update_fields,  3},
+     {delete,         2},
+     {delete_by_file, 2},
+     {terminate,      1}];
+behaviour_info(_Other) ->
+    undefined.
diff --git a/src/rabbit_multi.erl b/src/rabbit_multi.erl
index 336f74bf..3facef17 100644
--- a/src/rabbit_multi.erl
+++ b/src/rabbit_multi.erl
@@ -111,7 +111,7 @@ action(start_all, [NodeCount], RpcTimeout) ->
 action(status, [], RpcTimeout) ->
     io:format("Status of all running nodes...~n", []),
     call_all_nodes(
-      fun({Node, Pid}) ->
+      fun ({Node, Pid}) ->
               RabbitRunning =
                   case is_rabbit_running(Node, RpcTimeout) of
                       false -> not_running;
@@ -123,7 +123,7 @@ action(status, [], RpcTimeout) ->
 
 action(stop_all, [], RpcTimeout) ->
     io:format("Stopping all nodes...~n", []),
-    call_all_nodes(fun({Node, Pid}) ->
+    call_all_nodes(fun ({Node, Pid}) ->
                            io:format("Stopping node ~p~n", [Node]),
                            rpc:call(Node, rabbit, stop_and_halt, []),
                            case kill_wait(Pid, RpcTimeout, false) of
@@ -309,9 +309,9 @@ is_dead(Pid) ->
              {win32, fun () ->
                              Res = os:cmd("tasklist /nh /fi \"pid eq " ++
                                           PidS ++ "\""),
-                             case regexp:first_match(Res, "erl.exe") of
-                                 {match, _, _} -> false;
-                                 _             -> true
+                             case re:run(Res, "erl\\.exe", [{capture, none}]) of
+                                 match -> false;
+                                 _     -> true
                              end
                      end}]).
 
diff --git a/src/rabbit_net.erl b/src/rabbit_net.erl
index 406977b4..6baa4b88 100644
--- a/src/rabbit_net.erl
+++ b/src/rabbit_net.erl
@@ -31,31 +31,42 @@
 
 -module(rabbit_net).
 -include("rabbit.hrl").
--include_lib("kernel/include/inet.hrl").
 
 -export([async_recv/3, close/1, controlling_process/2,
         getstat/2, peername/1, port_command/2,
         send/2, sockname/1]).
+
 %%---------------------------------------------------------------------------
 
 -ifdef(use_specs).
 
+-export_type([socket/0]).
+
 -type(stat_option() ::
 	'recv_cnt' | 'recv_max' | 'recv_avg' | 'recv_oct' | 'recv_dvi' |
 	'send_cnt' | 'send_max' | 'send_avg' | 'send_oct' | 'send_pend').
--type(error() :: {'error', any()}).
-
--spec(async_recv/3 :: (socket(), integer(), timeout()) -> {'ok', any()}).
--spec(close/1 :: (socket()) -> 'ok' | error()).
--spec(controlling_process/2 :: (socket(), pid()) -> 'ok' | error()).
+-type(error() :: rabbit_types:error(any())).
+-type(socket() :: rabbit_networking:ip_port() | rabbit_types:ssl_socket()).
+
+-spec(async_recv/3 ::
+        (socket(), integer(), timeout()) -> rabbit_types:ok(any())).
+-spec(close/1 :: (socket()) -> rabbit_types:ok_or_error(any())).
+-spec(controlling_process/2 ::
+        (socket(), pid()) -> rabbit_types:ok_or_error(any())).
 -spec(port_command/2 :: (socket(), iolist()) -> 'true').
--spec(send/2 :: (socket(), binary() | iolist()) -> 'ok' | error()).
--spec(peername/1 :: (socket()) ->
-        {'ok', {ip_address(), non_neg_integer()}} | error()).
--spec(sockname/1 :: (socket()) ->
-        {'ok', {ip_address(), non_neg_integer()}} | error()).
--spec(getstat/2 :: (socket(), [stat_option()]) ->
-        {'ok', [{stat_option(), integer()}]} | error()).
+-spec(send/2 ::
+        (socket(), binary() | iolist()) -> rabbit_types:ok_or_error(any())).
+-spec(peername/1 ::
+        (socket())
+        -> rabbit_types:ok({inet:ip_address(), rabbit_networking:ip_port()}) |
+           error()).
+-spec(sockname/1 ::
+        (socket())
+        -> rabbit_types:ok({inet:ip_address(), rabbit_networking:ip_port()}) |
+           error()).
+-spec(getstat/2 ::
+        (socket(), [stat_option()])
+        -> rabbit_types:ok([{stat_option(), integer()}]) | error()).
 
 -endif.
 
@@ -66,7 +77,7 @@ async_recv(Sock, Length, Timeout) when is_record(Sock, ssl_socket) ->
     Pid = self(),
     Ref = make_ref(),
 
-    spawn(fun() -> Pid ! {inet_async, Sock, Ref,
+    spawn(fun () -> Pid ! {inet_async, Sock, Ref,
                     ssl:recv(Sock#ssl_socket.ssl, Length, Timeout)}
         end),
 
diff --git a/src/rabbit_networking.erl b/src/rabbit_networking.erl
index c3d0b7b7..3a3357ba 100644
--- a/src/rabbit_networking.erl
+++ b/src/rabbit_networking.erl
@@ -63,25 +63,29 @@
 
 -ifdef(use_specs).
 
--type(host() :: ip_address() | string() | atom()).
--type(connection() :: pid()).
+-export_type([ip_port/0, hostname/0]).
 
 -spec(start/0 :: () -> 'ok').
--spec(start_tcp_listener/2 :: (host(), ip_port()) -> 'ok').
--spec(start_ssl_listener/3 :: (host(), ip_port(), [info()]) -> 'ok').
--spec(stop_tcp_listener/2 :: (host(), ip_port()) -> 'ok').
--spec(active_listeners/0 :: () -> [listener()]).
--spec(node_listeners/1 :: (erlang_node()) -> [listener()]).
--spec(connections/0 :: () -> [connection()]).
--spec(connection_info_keys/0 :: () -> [info_key()]).
--spec(connection_info/1 :: (connection()) -> [info()]).
--spec(connection_info/2 :: (connection(), [info_key()]) -> [info()]).
--spec(connection_info_all/0 :: () -> [[info()]]).
--spec(connection_info_all/1 :: ([info_key()]) -> [[info()]]).
+-spec(start_tcp_listener/2 :: (hostname(), ip_port()) -> 'ok').
+-spec(start_ssl_listener/3 :: (hostname(), ip_port(), [rabbit_types:info()])
+                              -> 'ok').
+-spec(stop_tcp_listener/2 :: (hostname(), ip_port()) -> 'ok').
+-spec(active_listeners/0 :: () -> [rabbit_types:listener()]).
+-spec(node_listeners/1 :: (node()) -> [rabbit_types:listener()]).
+-spec(connections/0 :: () -> [rabbit_types:connection()]).
+-spec(connection_info_keys/0 :: () -> [rabbit_types:info_key()]).
+-spec(connection_info/1 ::
+        (rabbit_types:connection()) -> [rabbit_types:info()]).
+-spec(connection_info/2 ::
+        (rabbit_types:connection(), [rabbit_types:info_key()])
+        -> [rabbit_types:info()]).
+-spec(connection_info_all/0 :: () -> [[rabbit_types:info()]]).
+-spec(connection_info_all/1 ::
+        ([rabbit_types:info_key()]) -> [[rabbit_types:info()]]).
 -spec(close_connection/2 :: (pid(), string()) -> 'ok').
--spec(on_node_down/1 :: (erlang_node()) -> 'ok').
--spec(check_tcp_listener_address/3 :: (atom(), host(), ip_port()) ->
-             {ip_address(), atom()}).
+-spec(on_node_down/1 :: (node()) -> 'ok').
+-spec(check_tcp_listener_address/3 ::
+        (atom(), hostname(), ip_port()) -> {inet:ip_address(), atom()}).
 
 -endif.
 
@@ -102,7 +106,7 @@ boot_ssl() ->
         {ok, []} ->
             ok;
         {ok, SslListeners} ->
-            ok = rabbit_misc:start_applications([crypto, ssl]),
+            ok = rabbit_misc:start_applications([crypto, public_key, ssl]),
             {ok, SslOpts} = application:get_env(ssl_options),
             [start_ssl_listener(Host, Port, SslOpts) || {Host, Port} <- SslListeners],
             ok
diff --git a/src/rabbit_persister.erl b/src/rabbit_persister.erl
index 3cd42e47..a427b135 100644
--- a/src/rabbit_persister.erl
+++ b/src/rabbit_persister.erl
@@ -65,21 +65,29 @@
 
 -ifdef(use_specs).
 
--type(pmsg() :: {queue_name(), pkey()}).
+-type(pkey() :: rabbit_guid:guid()).
+-type(pmsg() :: {rabbit_amqqueue:name(), pkey()}).
+
 -type(work_item() ::
-      {publish, message(), pmsg()} |
+      {publish, rabbit_types:message(), pmsg()} |
       {deliver, pmsg()} |
       {ack, pmsg()}).
 
--spec(start_link/1 :: ([queue_name()]) ->
-                           {'ok', pid()} | 'ignore' | {'error', any()}).
+-spec(start_link/1 ::
+        ([rabbit_amqqueue:name()])
+        -> 'ignore' | rabbit_types:ok_or_error2(pid(), any())).
 -spec(transaction/1 :: ([work_item()]) -> 'ok').
--spec(extend_transaction/2 :: ({txn(), queue_name()}, [work_item()]) -> 'ok').
+-spec(extend_transaction/2 ::
+        ({rabbit_types:txn(), rabbit_amqqueue:name()}, [work_item()])
+        -> 'ok').
 -spec(dirty_work/1 :: ([work_item()]) -> 'ok').
--spec(commit_transaction/1 :: ({txn(), queue_name()}) -> 'ok').
--spec(rollback_transaction/1 :: ({txn(), queue_name()}) -> 'ok').
+-spec(commit_transaction/1 ::
+        ({rabbit_types:txn(), rabbit_amqqueue:name()}) -> 'ok').
+-spec(rollback_transaction/1 ::
+        ({rabbit_types:txn(), rabbit_amqqueue:name()}) -> 'ok').
 -spec(force_snapshot/0 :: () -> 'ok').
--spec(queue_content/1 :: (queue_name()) -> [{message(), boolean()}]).
+-spec(queue_content/1 ::
+        (rabbit_amqqueue:name()) -> [{rabbit_types:message(), boolean()}]).
 
 -endif.
 
@@ -236,7 +244,7 @@ log_work(CreateWorkUnit, MessageList,
            snapshot = Snapshot = #psnapshot{messages = Messages}}) ->
     Unit = CreateWorkUnit(
              rabbit_misc:map_in_order(
-               fun(M = {publish, Message, QK = {_QName, PKey}}) ->
+               fun (M = {publish, Message, QK = {_QName, PKey}}) ->
                        case ets:lookup(Messages, PKey) of
                            [_] -> {tied, QK};
                            []  -> ets:insert(Messages, {PKey, Message}),
diff --git a/src/rabbit_plugin_activator.erl b/src/rabbit_plugin_activator.erl
index 274981ef..ef3c5cc2 100644
--- a/src/rabbit_plugin_activator.erl
+++ b/src/rabbit_plugin_activator.erl
@@ -108,6 +108,7 @@ start() ->
                                             WApp == stdlib;
                                             WApp == kernel;
                                             WApp == sasl;
+                                            WApp == crypto;
                                             WApp == os_mon -> false;
                                      _ -> true
                                  end]),
diff --git a/src/rabbit_queue_collector.erl b/src/rabbit_queue_collector.erl
new file mode 100644
index 00000000..ea3768d4
--- /dev/null
+++ b/src/rabbit_queue_collector.erl
@@ -0,0 +1,106 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2010 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2010 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(rabbit_queue_collector).
+
+-behaviour(gen_server).
+
+-export([start_link/0, register/2, delete_all/1, shutdown/1]).
+
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
+         terminate/2, code_change/3]).
+
+-record(state, {queues}).
+
+-include("rabbit.hrl").
+
+%%----------------------------------------------------------------------------
+
+-ifdef(use_specs).
+
+-spec(start_link/0 :: () -> rabbit_types:ok(pid())).
+-spec(register/2 :: (pid(), rabbit_types:amqqueue()) -> 'ok').
+-spec(delete_all/1 :: (pid()) -> 'ok').
+
+-endif.
+
+%%----------------------------------------------------------------------------
+
+start_link() ->
+    gen_server:start_link(?MODULE, [], []).
+
+register(CollectorPid, Q) ->
+    gen_server:call(CollectorPid, {register, Q}, infinity).
+
+delete_all(CollectorPid) ->
+    gen_server:call(CollectorPid, delete_all, infinity).
+
+shutdown(CollectorPid) ->
+    gen_server:call(CollectorPid, shutdown, infinity).
+
+%%----------------------------------------------------------------------------
+
+init([]) ->
+    {ok, #state{queues = dict:new()}}.
+
+%%--------------------------------------------------------------------------
+
+handle_call({register, Q}, _From,
+            State = #state{queues = Queues}) ->
+    MonitorRef = erlang:monitor(process, Q#amqqueue.pid),
+    {reply, ok,
+     State#state{queues = dict:store(MonitorRef, Q, Queues)}};
+
+handle_call(delete_all, _From, State = #state{queues = Queues}) ->
+    [rabbit_misc:with_exit_handler(
+       fun () -> ok end,
+       fun () ->
+               erlang:demonitor(MonitorRef),
+               rabbit_amqqueue:delete(Q, false, false)
+       end)
+     || {MonitorRef, Q} <- dict:to_list(Queues)],
+    {reply, ok, State};
+
+handle_call(shutdown, _From, State) ->
+    {stop, normal, ok, State}.
+
+handle_cast(_Msg, State) ->
+    {noreply, State}.
+
+handle_info({'DOWN', MonitorRef, process, _DownPid, _Reason},
+            State = #state{queues = Queues}) ->
+    {noreply, State#state{queues = dict:erase(MonitorRef, Queues)}}.
+
+terminate(_Reason, _State) ->
+    ok.
+
+code_change(_OldVsn, State, _Extra) ->
+    {ok, State}.
diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl
new file mode 100644
index 00000000..d6b8bb28
--- /dev/null
+++ b/src/rabbit_queue_index.erl
@@ -0,0 +1,932 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2010 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2010 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(rabbit_queue_index).
+
+-export([init/4, terminate/2, delete_and_terminate/1, publish/4,
+         deliver/2, ack/2, sync/2, flush/1, read/3,
+         next_segment_boundary/1, bounds/1, recover/1]).
+
+-define(CLEAN_FILENAME, "clean.dot").
+
+%%----------------------------------------------------------------------------
+
+%% The queue index is responsible for recording the order of messages
+%% within a queue on disk.
+%%
+%% Because of the fact that the queue can decide at any point to send
+%% a queue entry to disk, you can not rely on publishes appearing in
+%% order. The only thing you can rely on is a message being published,
+%% then delivered, then ack'd.
+%%
+%% In order to be able to clean up ack'd messages, we write to segment
+%% files. These files have a fixed maximum size: ?SEGMENT_ENTRY_COUNT
+%% publishes, delivers and acknowledgements. They are numbered, and so
+%% it is known that the 0th segment contains messages 0 ->
+%% ?SEGMENT_ENTRY_COUNT - 1, the 1st segment contains messages
+%% ?SEGMENT_ENTRY_COUNT -> 2*?SEGMENT_ENTRY_COUNT - 1 and so on. As
+%% such, in the segment files, we only refer to message sequence ids
+%% by the LSBs as SeqId rem ?SEGMENT_ENTRY_COUNT. This gives them a
+%% fixed size.
+%%
+%% However, transient messages which are not sent to disk at any point
+%% will cause gaps to appear in segment files. Therefore, we delete a
+%% segment file whenever the number of publishes == number of acks
+%% (note that although it is not fully enforced, it is assumed that a
+%% message will never be ackd before it is delivered, thus this test
+%% also implies == number of delivers). In practise, this does not
+%% cause disk churn in the pathological case because of the journal
+%% and caching (see below).
+%%
+%% Because of the fact that publishes, delivers and acks can occur all
+%% over, we wish to avoid lots of seeking. Therefore we have a fixed
+%% sized journal to which all actions are appended. When the number of
+%% entries in this journal reaches max_journal_entries, the journal
+%% entries are scattered out to their relevant files, and the journal
+%% is truncated to zero size. Note that entries in the journal must
+%% carry the full sequence id, thus the format of entries in the
+%% journal is different to that in the segments.
+%%
+%% The journal is also kept fully in memory, pre-segmented: the state
+%% contains a mapping from segment numbers to state-per-segment (this
+%% state is held for all segments which have been "seen": thus a
+%% segment which has been read but has no pending entries in the
+%% journal is still held in this mapping. Also note that a dict is
+%% used for this mapping, not an array because with an array, you will
+%% always have entries from 0). Actions are stored directly in this
+%% state. Thus at the point of flushing the journal, firstly no
+%% reading from disk is necessary, but secondly if the known number of
+%% acks and publishes in a segment are equal, given the known state of
+%% the segment file combined with the journal, no writing needs to be
+%% done to the segment file either (in fact it is deleted if it exists
+%% at all). This is safe given that the set of acks is a subset of the
+%% set of publishes. When it's necessary to sync messages because of
+%% transactions, it's only necessary to fsync on the journal: when
+%% entries are distributed from the journal to segment files, those
+%% segments appended to are fsync'd prior to the journal being
+%% truncated.
+%%
+%% This module is also responsible for scanning the queue index files
+%% and seeding the message store on start up.
+%%
+%% Note that in general, the representation of a message's state as
+%% the tuple: {('no_pub'|{Guid, IsPersistent}), ('del'|'no_del'),
+%% ('ack'|'no_ack')} is richer than strictly necessary for most
+%% operations. However, for startup, and to ensure the safe and
+%% correct combination of journal entries with entries read from the
+%% segment on disk, this richer representation vastly simplifies and
+%% clarifies the code.
+%%
+%% For notes on Clean Shutdown and startup, see documentation in
+%% variable_queue.
+%%
+%%----------------------------------------------------------------------------
+
+%% ---- Journal details ----
+
+-define(JOURNAL_FILENAME, "journal.jif").
+
+-define(PUB_PERSIST_JPREFIX, 2#00).
+-define(PUB_TRANS_JPREFIX,   2#01).
+-define(DEL_JPREFIX,         2#10).
+-define(ACK_JPREFIX,         2#11).
+-define(JPREFIX_BITS, 2).
+-define(SEQ_BYTES, 8).
+-define(SEQ_BITS, ((?SEQ_BYTES * 8) - ?JPREFIX_BITS)).
+
+%% ---- Segment details ----
+
+-define(SEGMENT_EXTENSION, ".idx").
+
+%% TODO: The segment size would be configurable, but deriving all the
+%% other values is quite hairy and quite possibly noticably less
+%% efficient, depending on how clever the compiler is when it comes to
+%% binary generation/matching with constant vs variable lengths.
+
+-define(REL_SEQ_BITS, 14).
+-define(SEGMENT_ENTRY_COUNT, 16384). %% trunc(math:pow(2,?REL_SEQ_BITS))).
+
+%% seq only is binary 00 followed by 14 bits of rel seq id
+%% (range: 0 - 16383)
+-define(REL_SEQ_ONLY_PREFIX, 00).
+-define(REL_SEQ_ONLY_PREFIX_BITS, 2).
+-define(REL_SEQ_ONLY_ENTRY_LENGTH_BYTES, 2).
+
+%% publish record is binary 1 followed by a bit for is_persistent,
+%% then 14 bits of rel seq id, and 128 bits of md5sum msg id
+-define(PUBLISH_PREFIX, 1).
+-define(PUBLISH_PREFIX_BITS, 1).
+
+-define(GUID_BYTES, 16). %% md5sum is 128 bit or 16 bytes
+-define(GUID_BITS, (?GUID_BYTES * 8)).
+%% 16 bytes for md5sum + 2 for seq, bits and prefix
+-define(PUBLISH_RECORD_LENGTH_BYTES, ?GUID_BYTES + 2).
+
+%% 1 publish, 1 deliver, 1 ack per msg
+-define(SEGMENT_TOTAL_SIZE, ?SEGMENT_ENTRY_COUNT *
+        (?PUBLISH_RECORD_LENGTH_BYTES +
+         (2 * ?REL_SEQ_ONLY_ENTRY_LENGTH_BYTES))).
+
+%% ---- misc ----
+
+-define(PUB, {_, _}). %% {Guid, IsPersistent}
+
+-define(READ_MODE, [binary, raw, read, {read_ahead, ?SEGMENT_TOTAL_SIZE}]).
+
+%%----------------------------------------------------------------------------
+
+-record(qistate, { dir, segments, journal_handle, dirty_count,
+                   max_journal_entries }).
+
+-record(segment, { num, path, journal_entries, unacked }).
+
+-include("rabbit.hrl").
+
+%%----------------------------------------------------------------------------
+
+-ifdef(use_specs).
+
+-type(hdl() :: ('undefined' | any())).
+-type(segment() :: ('undefined' |
+                    #segment { num             :: non_neg_integer(),
+                               path            :: file:filename(),
+                               journal_entries :: array(),
+                               unacked         :: non_neg_integer()
+                              })).
+-type(seq_id() :: integer()).
+-type(seg_dict() :: {dict:dictionary(), [segment()]}).
+-type(qistate() :: #qistate { dir                 :: file:filename(),
+                              segments            :: 'undefined' | seg_dict(),
+                              journal_handle      :: hdl(),
+                              dirty_count         :: integer(),
+                              max_journal_entries :: non_neg_integer()
+                             }).
+-type(startup_fun_state() ::
+        {(fun ((A) -> 'finished' | {rabbit_guid:guid(), non_neg_integer(), A})),
+         A}).
+
+-spec(init/4 :: (rabbit_amqqueue:name(), boolean(), boolean(),
+                 fun ((rabbit_guid:guid()) -> boolean())) ->
+             {'undefined' | non_neg_integer(), [any()], qistate()}).
+-spec(terminate/2 :: ([any()], qistate()) -> qistate()).
+-spec(delete_and_terminate/1 :: (qistate()) -> qistate()).
+-spec(publish/4 :: (rabbit_guid:guid(), seq_id(), boolean(), qistate()) ->
+                        qistate()).
+-spec(deliver/2 :: ([seq_id()], qistate()) -> qistate()).
+-spec(ack/2 :: ([seq_id()], qistate()) -> qistate()).
+-spec(sync/2 :: ([seq_id()], qistate()) -> qistate()).
+-spec(flush/1 :: (qistate()) -> qistate()).
+-spec(read/3 :: (seq_id(), seq_id(), qistate()) ->
+                     {[{rabbit_guid:guid(), seq_id(), boolean(), boolean()}],
+                      qistate()}).
+-spec(next_segment_boundary/1 :: (seq_id()) -> seq_id()).
+-spec(bounds/1 :: (qistate()) ->
+             {non_neg_integer(), non_neg_integer(), qistate()}).
+-spec(recover/1 ::
+        ([rabbit_amqqueue:name()]) -> {[[any()]], startup_fun_state()}).
+
+-endif.
+
+
+%%----------------------------------------------------------------------------
+%% public API
+%%----------------------------------------------------------------------------
+
+init(Name, Recover, MsgStoreRecovered, ContainsCheckFun) ->
+    State = #qistate { dir = Dir } = blank_state(Name, not Recover),
+    Terms = case read_shutdown_terms(Dir) of
+                {error, _}   -> [];
+                {ok, Terms1} -> Terms1
+            end,
+    CleanShutdown = detect_clean_shutdown(Dir),
+    {Count, State1} =
+        case CleanShutdown andalso MsgStoreRecovered of
+            true  -> RecoveredCounts = proplists:get_value(segments, Terms, []),
+                     init_clean(RecoveredCounts, State);
+            false -> init_dirty(CleanShutdown, ContainsCheckFun, State)
+        end,
+    {Count, Terms, State1}.
+
+terminate(Terms, State) ->
+    {SegmentCounts, State1 = #qistate { dir = Dir }} = terminate(State),
+    store_clean_shutdown([{segments, SegmentCounts} | Terms], Dir),
+    State1.
+
+delete_and_terminate(State) ->
+    {_SegmentCounts, State1 = #qistate { dir = Dir }} = terminate(State),
+    ok = rabbit_misc:recursive_delete([Dir]),
+    State1.
+
+publish(Guid, SeqId, IsPersistent, State) when is_binary(Guid) ->
+    ?GUID_BYTES = size(Guid),
+    {JournalHdl, State1} = get_journal_handle(State),
+    ok = file_handle_cache:append(
+           JournalHdl, [<<(case IsPersistent of
+                               true  -> ?PUB_PERSIST_JPREFIX;
+                               false -> ?PUB_TRANS_JPREFIX
+                           end):?JPREFIX_BITS, SeqId:?SEQ_BITS>>, Guid]),
+    maybe_flush_journal(add_to_journal(SeqId, {Guid, IsPersistent}, State1)).
+
+deliver(SeqIds, State) ->
+    deliver_or_ack(del, SeqIds, State).
+
+ack(SeqIds, State) ->
+    deliver_or_ack(ack, SeqIds, State).
+
+sync([], State) ->
+    State;
+sync(_SeqIds, State = #qistate { journal_handle = undefined }) ->
+    State;
+sync(_SeqIds, State = #qistate { journal_handle = JournalHdl }) ->
+    %% The SeqIds here contains the SeqId of every publish and ack in
+    %% the transaction. Ideally we should go through these seqids and
+    %% only sync the journal if the pubs or acks appear in the
+    %% journal. However, this would be complex to do, and given that
+    %% the variable queue publishes and acks to the qi, and then
+    %% syncs, all in one operation, there is no possibility of the
+    %% seqids not being in the journal, provided the transaction isn't
+    %% emptied (handled above anyway).
+    ok = file_handle_cache:sync(JournalHdl),
+    State.
+
+flush(State = #qistate { dirty_count = 0 }) -> State;
+flush(State)                                -> flush_journal(State).
+
+read(StartEnd, StartEnd, State) ->
+    {[], State};
+read(Start, End, State = #qistate { segments = Segments,
+                                    dir = Dir }) when Start =< End ->
+    %% Start is inclusive, End is exclusive.
+    LowerB = {StartSeg, _StartRelSeq} = seq_id_to_seg_and_rel_seq_id(Start),
+    UpperB = {EndSeg,   _EndRelSeq}   = seq_id_to_seg_and_rel_seq_id(End - 1),
+    {Messages, Segments1} =
+        lists:foldr(fun (Seg, Acc) ->
+                            read_bounded_segment(Seg, LowerB, UpperB, Acc, Dir)
+                    end, {[], Segments}, lists:seq(StartSeg, EndSeg)),
+    {Messages, State #qistate { segments = Segments1 }}.
+
+next_segment_boundary(SeqId) ->
+    {Seg, _RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
+    reconstruct_seq_id(Seg + 1, 0).
+
+bounds(State = #qistate { segments = Segments }) ->
+    %% This is not particularly efficient, but only gets invoked on
+    %% queue initialisation.
+    SegNums = lists:sort(segment_nums(Segments)),
+    %% Don't bother trying to figure out the lowest seq_id, merely the
+    %% seq_id of the start of the lowest segment. That seq_id may not
+    %% actually exist, but that's fine. The important thing is that
+    %% the segment exists and the seq_id reported is on a segment
+    %% boundary.
+    %%
+    %% We also don't really care about the max seq_id. Just start the
+    %% next segment: it makes life much easier.
+    %%
+    %% SegNums is sorted, ascending.
+    {LowSeqId, NextSeqId} =
+        case SegNums of
+            []         -> {0, 0};
+            [MinSeg|_] -> {reconstruct_seq_id(MinSeg, 0),
+                           reconstruct_seq_id(1 + lists:last(SegNums), 0)}
+        end,
+    {LowSeqId, NextSeqId, State}.
+
+recover(DurableQueues) ->
+    DurableDict = dict:from_list([ {queue_name_to_dir_name(Queue), Queue} ||
+                                     Queue <- DurableQueues ]),
+    QueuesDir = queues_dir(),
+    Directories = case file:list_dir(QueuesDir) of
+                      {ok, Entries}   -> [ Entry || Entry <- Entries,
+                                                    filelib:is_dir(
+                                                      filename:join(
+                                                        QueuesDir, Entry)) ];
+                      {error, enoent} -> []
+                  end,
+    DurableDirectories = sets:from_list(dict:fetch_keys(DurableDict)),
+    {DurableQueueNames, DurableTerms} =
+        lists:foldl(
+          fun (QueueDir, {DurableAcc, TermsAcc}) ->
+                  case sets:is_element(QueueDir, DurableDirectories) of
+                      true ->
+                          TermsAcc1 =
+                              case read_shutdown_terms(
+                                     filename:join(QueuesDir, QueueDir)) of
+                                  {error, _}  -> TermsAcc;
+                                  {ok, Terms} -> [Terms | TermsAcc]
+                              end,
+                          {[dict:fetch(QueueDir, DurableDict) | DurableAcc],
+                           TermsAcc1};
+                      false ->
+                          Dir = filename:join(queues_dir(), QueueDir),
+                          ok = rabbit_misc:recursive_delete([Dir]),
+                          {DurableAcc, TermsAcc}
+                  end
+          end, {[], []}, Directories),
+    {DurableTerms, {fun queue_index_walker/1, {start, DurableQueueNames}}}.
+
+%%----------------------------------------------------------------------------
+%% startup and shutdown
+%%----------------------------------------------------------------------------
+
+blank_state(QueueName, EnsureFresh) ->
+    StrName = queue_name_to_dir_name(QueueName),
+    Dir = filename:join(queues_dir(), StrName),
+    ok = case EnsureFresh of
+             true  -> false = filelib:is_file(Dir), %% is_file == is file or dir
+                      ok;
+             false -> ok
+         end,
+    ok = filelib:ensure_dir(filename:join(Dir, "nothing")),
+    {ok, MaxJournal} =
+        application:get_env(rabbit, queue_index_max_journal_entries),
+    #qistate { dir                 = Dir,
+               segments            = segments_new(),
+               journal_handle      = undefined,
+               dirty_count         = 0,
+               max_journal_entries = MaxJournal }.
+
+detect_clean_shutdown(Dir) ->
+    case file:delete(filename:join(Dir, ?CLEAN_FILENAME)) of
+        ok              -> true;
+        {error, enoent} -> false
+    end.
+
+read_shutdown_terms(Dir) ->
+    rabbit_misc:read_term_file(filename:join(Dir, ?CLEAN_FILENAME)).
+
+store_clean_shutdown(Terms, Dir) ->
+    rabbit_misc:write_term_file(filename:join(Dir, ?CLEAN_FILENAME), Terms).
+
+init_clean(RecoveredCounts, State) ->
+    %% Load the journal. Since this is a clean recovery this (almost)
+    %% gets us back to where we were on shutdown.
+    State1 = #qistate { dir = Dir, segments = Segments } = load_journal(State),
+    %% The journal loading only creates records for segments touched
+    %% by the journal, and the counts are based on the journal entries
+    %% only. We need *complete* counts for *all* segments. By an
+    %% amazing coincidence we stored that information on shutdown.
+    Segments1 =
+        lists:foldl(
+          fun ({Seg, UnackedCount}, SegmentsN) ->
+                  Segment = segment_find_or_new(Seg, Dir, SegmentsN),
+                  segment_store(Segment #segment { unacked = UnackedCount },
+                                SegmentsN)
+          end, Segments, RecoveredCounts),
+    %% the counts above include transient messages, which would be the
+    %% wrong thing to return
+    {undefined, State1 # qistate { segments = Segments1 }}.
+
+init_dirty(CleanShutdown, ContainsCheckFun, State) ->
+    %% Recover the journal completely. This will also load segments
+    %% which have entries in the journal and remove duplicates. The
+    %% counts will correctly reflect the combination of the segment
+    %% and the journal.
+    State1 = #qistate { dir = Dir, segments = Segments } =
+        recover_journal(State),
+    {Segments1, Count} =
+        %% Load each segment in turn and filter out messages that are
+        %% not in the msg_store, by adding acks to the journal. These
+        %% acks only go to the RAM journal as it doesn't matter if we
+        %% lose them. Also mark delivered if not clean shutdown. Also
+        %% find the number of unacked messages.
+        lists:foldl(
+          fun (Seg, {Segments2, CountAcc}) ->
+                  Segment = #segment { unacked = UnackedCount } =
+                      recover_segment(ContainsCheckFun, CleanShutdown,
+                                      segment_find_or_new(Seg, Dir, Segments2)),
+                  {segment_store(Segment, Segments2), CountAcc + UnackedCount}
+          end, {Segments, 0}, all_segment_nums(State1)),
+    %% Unconditionally flush since the dirty_count doesn't get updated
+    %% by the above foldl.
+    State2 = flush_journal(State1 #qistate { segments = Segments1 }),
+    {Count, State2}.
+
+terminate(State = #qistate { journal_handle = JournalHdl,
+                             segments = Segments }) ->
+    ok = case JournalHdl of
+             undefined -> ok;
+             _         -> file_handle_cache:close(JournalHdl)
+         end,
+    SegmentCounts =
+        segment_fold(
+          fun (#segment { num = Seg, unacked = UnackedCount }, Acc) ->
+                  [{Seg, UnackedCount} | Acc]
+          end, [], Segments),
+    {SegmentCounts, State #qistate { journal_handle = undefined,
+                                     segments = undefined }}.
+
+recover_segment(ContainsCheckFun, CleanShutdown,
+                Segment = #segment { journal_entries = JEntries }) ->
+    {SegEntries, UnackedCount} = load_segment(false, Segment),
+    {SegEntries1, UnackedCountDelta} =
+        segment_plus_journal(SegEntries, JEntries),
+    array:sparse_foldl(
+      fun (RelSeq, {{Guid, _IsPersistent}, Del, no_ack}, Segment1) ->
+              recover_message(ContainsCheckFun(Guid), CleanShutdown,
+                              Del, RelSeq, Segment1)
+      end,
+      Segment #segment { unacked = UnackedCount + UnackedCountDelta },
+      SegEntries1).
+
+recover_message( true,  true,   _Del, _RelSeq, Segment) ->
+    Segment;
+recover_message( true, false,    del, _RelSeq, Segment) ->
+    Segment;
+recover_message( true, false, no_del,  RelSeq, Segment) ->
+    add_to_journal(RelSeq, del, Segment);
+recover_message(false,     _,    del,  RelSeq, Segment) ->
+    add_to_journal(RelSeq, ack, Segment);
+recover_message(false,     _, no_del,  RelSeq, Segment) ->
+    add_to_journal(RelSeq, ack, add_to_journal(RelSeq, del, Segment)).
+
+queue_name_to_dir_name(Name = #resource { kind = queue }) ->
+    <<Num:128>> = erlang:md5(term_to_binary(Name)),
+    lists:flatten(io_lib:format("~.36B", [Num])).
+
+queues_dir() ->
+    filename:join(rabbit_mnesia:dir(), "queues").
+
+%%----------------------------------------------------------------------------
+%% msg store startup delta function
+%%----------------------------------------------------------------------------
+
+queue_index_walker({start, DurableQueues}) when is_list(DurableQueues) ->
+    {ok, Gatherer} = gatherer:start_link(),
+    [begin
+         ok = gatherer:fork(Gatherer),
+         ok = worker_pool:submit_async(
+                fun () -> queue_index_walker_reader(QueueName, Gatherer)
+                end)
+     end || QueueName <- DurableQueues],
+    queue_index_walker({next, Gatherer});
+
+queue_index_walker({next, Gatherer}) when is_pid(Gatherer) ->
+    case gatherer:out(Gatherer) of
+        empty ->
+            ok = gatherer:stop(Gatherer),
+            ok = rabbit_misc:unlink_and_capture_exit(Gatherer),
+            finished;
+        {value, {Guid, Count}} ->
+            {Guid, Count, {next, Gatherer}}
+    end.
+
+queue_index_walker_reader(QueueName, Gatherer) ->
+    State = #qistate { segments = Segments, dir = Dir } =
+        recover_journal(blank_state(QueueName, false)),
+    [ok = segment_entries_foldr(
+            fun (_RelSeq, {{Guid, true}, _IsDelivered, no_ack}, ok) ->
+                    gatherer:in(Gatherer, {Guid, 1});
+                (_RelSeq, _Value, Acc) ->
+                    Acc
+            end, ok, segment_find_or_new(Seg, Dir, Segments)) ||
+        Seg <- all_segment_nums(State)],
+    {_SegmentCounts, _State} = terminate(State),
+    ok = gatherer:finish(Gatherer).
+
+%%----------------------------------------------------------------------------
+%% journal manipulation
+%%----------------------------------------------------------------------------
+
+add_to_journal(SeqId, Action, State = #qistate { dirty_count = DCount,
+                                                 segments = Segments,
+                                                 dir = Dir }) ->
+    {Seg, RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId),
+    Segment = segment_find_or_new(Seg, Dir, Segments),
+    Segment1 = add_to_journal(RelSeq, Action, Segment),
+    State #qistate { dirty_count = DCount + 1,
+                     segments = segment_store(Segment1, Segments) };
+
+add_to_journal(RelSeq, Action,
+               Segment = #segment { journal_entries = JEntries,
+                                    unacked = UnackedCount }) ->
+    Segment1 = Segment #segment {
+                 journal_entries = add_to_journal(RelSeq, Action, JEntries) },
+    case Action of
+        del  -> Segment1;
+        ack  -> Segment1 #segment { unacked = UnackedCount - 1 };
+        ?PUB -> Segment1 #segment { unacked = UnackedCount + 1 }
+    end;
+
+add_to_journal(RelSeq, Action, JEntries) ->
+    Val = case array:get(RelSeq, JEntries) of
+              undefined ->
+                  case Action of
+                      ?PUB -> {Action, no_del, no_ack};
+                      del  -> {no_pub,    del, no_ack};
+                      ack  -> {no_pub, no_del,    ack}
+                  end;
+              ({Pub, no_del, no_ack}) when Action == del ->
+                  {Pub, del, no_ack};
+              ({Pub,    Del, no_ack}) when Action == ack ->
+                  {Pub, Del,    ack}
+          end,
+    array:set(RelSeq, Val, JEntries).
+
+maybe_flush_journal(State = #qistate { dirty_count = DCount,
+                                       max_journal_entries = MaxJournal })
+  when DCount > MaxJournal ->
+    flush_journal(State);
+maybe_flush_journal(State) ->
+    State.
+
+flush_journal(State = #qistate { segments = Segments }) ->
+    Segments1 =
+        segment_fold(
+          fun (#segment { unacked = 0, path = Path }, SegmentsN) ->
+                  case filelib:is_file(Path) of
+                      true  -> ok = file:delete(Path);
+                      false -> ok
+                  end,
+                  SegmentsN;
+              (#segment {} = Segment, SegmentsN) ->
+                  segment_store(append_journal_to_segment(Segment), SegmentsN)
+          end, segments_new(), Segments),
+    {JournalHdl, State1} =
+        get_journal_handle(State #qistate { segments = Segments1 }),
+    ok = file_handle_cache:clear(JournalHdl),
+    State1 #qistate { dirty_count = 0 }.
+
+append_journal_to_segment(#segment { journal_entries = JEntries,
+                                     path = Path } = Segment) ->
+    case array:sparse_size(JEntries) of
+        0 -> Segment;
+        _ -> {ok, Hdl} = file_handle_cache:open(Path, [write | ?READ_MODE],
+                                                [{write_buffer, infinity}]),
+             array:sparse_foldl(fun write_entry_to_segment/3, Hdl, JEntries),
+             ok = file_handle_cache:close(Hdl),
+             Segment #segment { journal_entries = array_new() }
+    end.
+
+get_journal_handle(State = #qistate { journal_handle = undefined,
+                                      dir = Dir }) ->
+    Path = filename:join(Dir, ?JOURNAL_FILENAME),
+    {ok, Hdl} = file_handle_cache:open(Path, [write | ?READ_MODE],
+                                       [{write_buffer, infinity}]),
+    {Hdl, State #qistate { journal_handle = Hdl }};
+get_journal_handle(State = #qistate { journal_handle = Hdl }) ->
+    {Hdl, State}.
+
+%% Loading Journal. This isn't idempotent and will mess up the counts
+%% if you call it more than once on the same state. Assumes the counts
+%% are 0 to start with.
+load_journal(State) ->
+    {JournalHdl, State1} = get_journal_handle(State),
+    {ok, 0} = file_handle_cache:position(JournalHdl, 0),
+    load_journal_entries(State1).
+
+%% ditto
+recover_journal(State) ->
+    State1 = #qistate { segments = Segments } = load_journal(State),
+    Segments1 =
+        segment_map(
+          fun (Segment = #segment { journal_entries = JEntries,
+                                    unacked = UnackedCountInJournal }) ->
+                  %% We want to keep ack'd entries in so that we can
+                  %% remove them if duplicates are in the journal. The
+                  %% counts here are purely from the segment itself.
+                  {SegEntries, UnackedCountInSeg} = load_segment(true, Segment),
+                  {JEntries1, UnackedCountDuplicates} =
+                      journal_minus_segment(JEntries, SegEntries),
+                  Segment #segment { journal_entries = JEntries1,
+                                     unacked = (UnackedCountInJournal +
+                                                UnackedCountInSeg -
+                                                UnackedCountDuplicates) }
+          end, Segments),
+    State1 #qistate { segments = Segments1 }.
+
+load_journal_entries(State = #qistate { journal_handle = Hdl }) ->
+    case file_handle_cache:read(Hdl, ?SEQ_BYTES) of
+        {ok, <<Prefix:?JPREFIX_BITS, SeqId:?SEQ_BITS>>} ->
+            case Prefix of
+                ?DEL_JPREFIX ->
+                    load_journal_entries(add_to_journal(SeqId, del, State));
+                ?ACK_JPREFIX ->
+                    load_journal_entries(add_to_journal(SeqId, ack, State));
+                _ ->
+                    case file_handle_cache:read(Hdl, ?GUID_BYTES) of
+                        {ok, <<GuidNum:?GUID_BITS>>} ->
+                            %% work around for binary data
+                            %% fragmentation. See
+                            %% rabbit_msg_file:read_next/2
+                            <<Guid:?GUID_BYTES/binary>> =
+                                <<GuidNum:?GUID_BITS>>,
+                            Publish = {Guid, case Prefix of
+                                                 ?PUB_PERSIST_JPREFIX -> true;
+                                                 ?PUB_TRANS_JPREFIX   -> false
+                                             end},
+                            load_journal_entries(
+                              add_to_journal(SeqId, Publish, State));
+                        _ErrOrEoF -> %% err, we've lost at least a publish
+                            State
+                    end
+            end;
+        _ErrOrEoF -> State
+    end.
+
+deliver_or_ack(_Kind, [], State) ->
+    State;
+deliver_or_ack(Kind, SeqIds, State) ->
+    JPrefix = case Kind of ack -> ?ACK_JPREFIX; del -> ?DEL_JPREFIX end,
+    {JournalHdl, State1} = get_journal_handle(State),
+    ok = file_handle_cache:append(
+           JournalHdl,
+           [<<JPrefix:?JPREFIX_BITS, SeqId:?SEQ_BITS>> || SeqId <- SeqIds]),
+    maybe_flush_journal(lists:foldl(fun (SeqId, StateN) ->
+                                            add_to_journal(SeqId, Kind, StateN)
+                                    end, State1, SeqIds)).
+
+%%----------------------------------------------------------------------------
+%% segment manipulation
+%%----------------------------------------------------------------------------
+
+seq_id_to_seg_and_rel_seq_id(SeqId) ->
+    { SeqId div ?SEGMENT_ENTRY_COUNT, SeqId rem ?SEGMENT_ENTRY_COUNT }.
+
+reconstruct_seq_id(Seg, RelSeq) ->
+    (Seg * ?SEGMENT_ENTRY_COUNT) + RelSeq.
+
+all_segment_nums(#qistate { dir = Dir, segments = Segments }) ->
+    lists:sort(
+      sets:to_list(
+        lists:foldl(
+          fun (SegName, Set) ->
+                  sets:add_element(
+                    list_to_integer(
+                      lists:takewhile(fun (C) -> $0 =< C andalso C =< $9 end,
+                                      SegName)), Set)
+          end, sets:from_list(segment_nums(Segments)),
+          filelib:wildcard("*" ++ ?SEGMENT_EXTENSION, Dir)))).
+
+segment_find_or_new(Seg, Dir, Segments) ->
+    case segment_find(Seg, Segments) of
+        {ok, Segment} -> Segment;
+        error         -> SegName = integer_to_list(Seg)  ++ ?SEGMENT_EXTENSION,
+                         Path = filename:join(Dir, SegName),
+                         #segment { num             = Seg,
+                                    path            = Path,
+                                    journal_entries = array_new(),
+                                    unacked         = 0 }
+    end.
+
+segment_find(Seg, {_Segments, [Segment = #segment { num = Seg } |_]}) ->
+    {ok, Segment}; %% 1 or (2, matches head)
+segment_find(Seg, {_Segments, [_, Segment = #segment { num = Seg }]}) ->
+    {ok, Segment}; %% 2, matches tail
+segment_find(Seg, {Segments, _}) -> %% no match
+    dict:find(Seg, Segments).
+
+segment_store(Segment = #segment { num = Seg }, %% 1 or (2, matches head)
+              {Segments, [#segment { num = Seg } | Tail]}) ->
+    {Segments, [Segment | Tail]};
+segment_store(Segment = #segment { num = Seg }, %% 2, matches tail
+              {Segments, [SegmentA, #segment { num = Seg }]}) ->
+    {Segments, [Segment, SegmentA]};
+segment_store(Segment = #segment { num = Seg }, {Segments, []}) ->
+    {dict:erase(Seg, Segments), [Segment]};
+segment_store(Segment = #segment { num = Seg }, {Segments, [SegmentA]}) ->
+    {dict:erase(Seg, Segments), [Segment, SegmentA]};
+segment_store(Segment = #segment { num = Seg },
+              {Segments, [SegmentA, SegmentB]}) ->
+    {dict:store(SegmentB#segment.num, SegmentB, dict:erase(Seg, Segments)),
+     [Segment, SegmentA]}.
+
+segment_fold(Fun, Acc, {Segments, CachedSegments}) ->
+    dict:fold(fun (_Seg, Segment, Acc1) -> Fun(Segment, Acc1) end,
+              lists:foldl(Fun, Acc, CachedSegments), Segments).
+
+segment_map(Fun, {Segments, CachedSegments}) ->
+    {dict:map(fun (_Seg, Segment) -> Fun(Segment) end, Segments),
+     lists:map(Fun, CachedSegments)}.
+
+segment_nums({Segments, CachedSegments}) ->
+    lists:map(fun (#segment { num = Num }) -> Num end, CachedSegments) ++
+        dict:fetch_keys(Segments).
+
+segments_new() ->
+    {dict:new(), []}.
+
+write_entry_to_segment(_RelSeq, {?PUB, del, ack}, Hdl) ->
+    Hdl;
+write_entry_to_segment(RelSeq, {Pub, Del, Ack}, Hdl) ->
+    ok = case Pub of
+             no_pub ->
+                 ok;
+             {Guid, IsPersistent} ->
+                 file_handle_cache:append(
+                   Hdl, [<<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS,
+                          (bool_to_int(IsPersistent)):1,
+                          RelSeq:?REL_SEQ_BITS>>, Guid])
+         end,
+    ok = case {Del, Ack} of
+             {no_del, no_ack} ->
+                 ok;
+             _ ->
+                 Binary = <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
+                           RelSeq:?REL_SEQ_BITS>>,
+                 file_handle_cache:append(
+                   Hdl, case {Del, Ack} of
+                            {del, ack} -> [Binary, Binary];
+                            _          -> Binary
+                        end)
+         end,
+    Hdl.
+
+read_bounded_segment(Seg, {StartSeg, StartRelSeq}, {EndSeg, EndRelSeq},
+                     {Messages, Segments}, Dir) ->
+    Segment = segment_find_or_new(Seg, Dir, Segments),
+    {segment_entries_foldr(
+       fun (RelSeq, {{Guid, IsPersistent}, IsDelivered, no_ack}, Acc)
+             when (Seg > StartSeg orelse StartRelSeq =< RelSeq) andalso
+                  (Seg < EndSeg   orelse EndRelSeq   >= RelSeq) ->
+               [ {Guid, reconstruct_seq_id(StartSeg, RelSeq),
+                  IsPersistent, IsDelivered == del} | Acc ];
+           (_RelSeq, _Value, Acc) ->
+               Acc
+       end, Messages, Segment),
+     segment_store(Segment, Segments)}.
+
+segment_entries_foldr(Fun, Init,
+                      Segment = #segment { journal_entries = JEntries }) ->
+    {SegEntries, _UnackedCount} = load_segment(false, Segment),
+    {SegEntries1, _UnackedCountD} = segment_plus_journal(SegEntries, JEntries),
+    array:sparse_foldr(Fun, Init, SegEntries1).
+
+%% Loading segments
+%%
+%% Does not do any combining with the journal at all.
+load_segment(KeepAcked, #segment { path = Path }) ->
+    case filelib:is_file(Path) of
+        false -> {array_new(), 0};
+        true  -> {ok, Hdl} = file_handle_cache:open(Path, ?READ_MODE, []),
+                 {ok, 0} = file_handle_cache:position(Hdl, bof),
+                 Res = load_segment_entries(KeepAcked, Hdl, array_new(), 0),
+                 ok = file_handle_cache:close(Hdl),
+                 Res
+    end.
+
+load_segment_entries(KeepAcked, Hdl, SegEntries, UnackedCount) ->
+    case file_handle_cache:read(Hdl, ?REL_SEQ_ONLY_ENTRY_LENGTH_BYTES) of
+        {ok, <<?PUBLISH_PREFIX:?PUBLISH_PREFIX_BITS,
+              IsPersistentNum:1, RelSeq:?REL_SEQ_BITS>>} ->
+            %% because we specify /binary, and binaries are complete
+            %% bytes, the size spec is in bytes, not bits.
+            {ok, Guid} = file_handle_cache:read(Hdl, ?GUID_BYTES),
+            Obj = {{Guid, 1 == IsPersistentNum}, no_del, no_ack},
+            SegEntries1 = array:set(RelSeq, Obj, SegEntries),
+            load_segment_entries(KeepAcked, Hdl, SegEntries1,
+                                 UnackedCount + 1);
+        {ok, <<?REL_SEQ_ONLY_PREFIX:?REL_SEQ_ONLY_PREFIX_BITS,
+              RelSeq:?REL_SEQ_BITS>>} ->
+            {UnackedCountDelta, SegEntries1} =
+                case array:get(RelSeq, SegEntries) of
+                    {Pub, no_del, no_ack} ->
+                        { 0, array:set(RelSeq, {Pub, del, no_ack}, SegEntries)};
+                    {Pub, del, no_ack} when KeepAcked ->
+                        {-1, array:set(RelSeq, {Pub, del, ack}, SegEntries)};
+                    {_Pub, del, no_ack} ->
+                        {-1, array:reset(RelSeq, SegEntries)}
+                end,
+            load_segment_entries(KeepAcked, Hdl, SegEntries1,
+                                 UnackedCount + UnackedCountDelta);
+        _ErrOrEoF ->
+            {SegEntries, UnackedCount}
+    end.
+
+array_new() ->
+    array:new([{default, undefined}, fixed, {size, ?SEGMENT_ENTRY_COUNT}]).
+
+bool_to_int(true ) -> 1;
+bool_to_int(false) -> 0.
+
+%%----------------------------------------------------------------------------
+%% journal & segment combination
+%%----------------------------------------------------------------------------
+
+%% Combine what we have just read from a segment file with what we're
+%% holding for that segment in memory. There must be no duplicates.
+segment_plus_journal(SegEntries, JEntries) ->
+    array:sparse_foldl(
+      fun (RelSeq, JObj, {SegEntriesOut, AdditionalUnacked}) ->
+              SegEntry = array:get(RelSeq, SegEntriesOut),
+              {Obj, AdditionalUnackedDelta} =
+                  segment_plus_journal1(SegEntry, JObj),
+              {case Obj of
+                   undefined -> array:reset(RelSeq, SegEntriesOut);
+                   _         -> array:set(RelSeq, Obj, SegEntriesOut)
+               end,
+               AdditionalUnacked + AdditionalUnackedDelta}
+      end, {SegEntries, 0}, JEntries).
+
+%% Here, the result is a tuple with the first element containing the
+%% item which we may be adding to (for items only in the journal),
+%% modifying in (bits in both), or, when returning 'undefined',
+%% erasing from (ack in journal, not segment) the segment array. The
+%% other element of the tuple is the delta for AdditionalUnacked.
+segment_plus_journal1(undefined, {?PUB, no_del, no_ack} = Obj) ->
+    {Obj, 1};
+segment_plus_journal1(undefined, {?PUB, del, no_ack} = Obj) ->
+    {Obj, 1};
+segment_plus_journal1(undefined, {?PUB, del, ack}) ->
+    {undefined, 0};
+
+segment_plus_journal1({?PUB = Pub, no_del, no_ack}, {no_pub, del, no_ack}) ->
+    {{Pub, del, no_ack}, 0};
+segment_plus_journal1({?PUB, no_del, no_ack},       {no_pub, del, ack}) ->
+    {undefined, -1};
+segment_plus_journal1({?PUB, del, no_ack},          {no_pub, no_del, ack}) ->
+    {undefined, -1}.
+
+%% Remove from the journal entries for a segment, items that are
+%% duplicates of entries found in the segment itself. Used on start up
+%% to clean up the journal.
+journal_minus_segment(JEntries, SegEntries) ->
+    array:sparse_foldl(
+      fun (RelSeq, JObj, {JEntriesOut, UnackedRemoved}) ->
+              SegEntry = array:get(RelSeq, SegEntries),
+              {Obj, UnackedRemovedDelta} =
+                  journal_minus_segment1(JObj, SegEntry),
+              {case Obj of
+                   keep      -> JEntriesOut;
+                   undefined -> array:reset(RelSeq, JEntriesOut);
+                   _         -> array:set(RelSeq, Obj, JEntriesOut)
+               end,
+               UnackedRemoved + UnackedRemovedDelta}
+      end, {JEntries, 0}, JEntries).
+
+%% Here, the result is a tuple with the first element containing the
+%% item we are adding to or modifying in the (initially fresh) journal
+%% array. If the item is 'undefined' we leave the journal array
+%% alone. The other element of the tuple is the deltas for
+%% UnackedRemoved.
+
+%% Both the same. Must be at least the publish
+journal_minus_segment1({?PUB, _Del, no_ack} = Obj, Obj) ->
+    {undefined, 1};
+journal_minus_segment1({?PUB, _Del, ack} = Obj,    Obj) ->
+    {undefined, 0};
+
+%% Just publish in journal
+journal_minus_segment1({?PUB, no_del, no_ack},     undefined) ->
+    {keep, 0};
+
+%% Publish and deliver in journal
+journal_minus_segment1({?PUB, del, no_ack},        undefined) ->
+    {keep, 0};
+journal_minus_segment1({?PUB = Pub, del, no_ack},  {Pub, no_del, no_ack}) ->
+    {{no_pub, del, no_ack}, 1};
+
+%% Publish, deliver and ack in journal
+journal_minus_segment1({?PUB, del, ack},           undefined) ->
+    {keep, 0};
+journal_minus_segment1({?PUB = Pub, del, ack},     {Pub, no_del, no_ack}) ->
+    {{no_pub, del, ack}, 1};
+journal_minus_segment1({?PUB = Pub, del, ack},     {Pub, del, no_ack}) ->
+    {{no_pub, no_del, ack}, 1};
+
+%% Just deliver in journal
+journal_minus_segment1({no_pub, del, no_ack},      {?PUB, no_del, no_ack}) ->
+    {keep, 0};
+journal_minus_segment1({no_pub, del, no_ack},      {?PUB, del, no_ack}) ->
+    {undefined, 0};
+
+%% Just ack in journal
+journal_minus_segment1({no_pub, no_del, ack},      {?PUB, del, no_ack}) ->
+    {keep, 0};
+journal_minus_segment1({no_pub, no_del, ack},      {?PUB, del, ack}) ->
+    {undefined, -1};
+
+%% Deliver and ack in journal
+journal_minus_segment1({no_pub, del, ack},         {?PUB, no_del, no_ack}) ->
+    {keep, 0};
+journal_minus_segment1({no_pub, del, ack},         {?PUB, del, no_ack}) ->
+    {{no_pub, no_del, ack}, 0};
+journal_minus_segment1({no_pub, del, ack},         {?PUB, del, ack}) ->
+    {undefined, -1}.
diff --git a/src/rabbit_reader.erl b/src/rabbit_reader.erl
index 5cf519b7..a8b2ae54 100644
--- a/src/rabbit_reader.erl
+++ b/src/rabbit_reader.erl
@@ -41,7 +41,7 @@
 
 -export([server_properties/0]).
 
--export([analyze_frame/2]).
+-export([analyze_frame/3]).
 
 -import(gen_tcp).
 -import(fprof).
@@ -52,15 +52,18 @@
 -define(NORMAL_TIMEOUT, 3).
 -define(CLOSING_TIMEOUT, 1).
 -define(CHANNEL_TERMINATION_TIMEOUT, 3).
+-define(SILENT_CLOSE_DELAY, 3).
+-define(FRAME_MAX, 131072). %% set to zero once QPid fix their negotiation
 
 %---------------------------------------------------------------------------
 
--record(v1, {sock, connection, callback, recv_ref, connection_state}).
+-record(v1, {sock, connection, callback, recv_ref, connection_state,
+             queue_collector}).
 
 -define(INFO_KEYS,
         [pid, address, port, peer_address, peer_port,
-         recv_oct, recv_cnt, send_oct, send_cnt, send_pend,
-         state, channels, user, vhost, timeout, frame_max, client_properties]).
+         recv_oct, recv_cnt, send_oct, send_cnt, send_pend, state, channels,
+         protocol, user, vhost, timeout, frame_max, client_properties]).
 
 %% connection lifecycle
 %%
@@ -100,6 +103,8 @@
 %%   heartbeat timeout -> *throw*
 %% closing:
 %%   socket close -> *terminate*
+%%   receive connection.close -> send connection.close_ok,
+%%     *closing*
 %%   receive frame -> ignore, *closing*
 %%   handshake_timeout -> ignore, *closing*
 %%   heartbeat timeout -> *throw*
@@ -116,6 +121,8 @@
 %%      start terminate_connection timer, *closed*
 %% closed:
 %%   socket close -> *terminate*
+%%   receive connection.close -> send connection.close_ok,
+%%     *closed*
 %%   receive connection.close_ok -> self() ! terminate_connection,
 %%     *closed*
 %%   receive frame -> ignore, *closed*
@@ -131,11 +138,11 @@
 
 -ifdef(use_specs).
 
--spec(info_keys/0 :: () -> [info_key()]).
--spec(info/1 :: (pid()) -> [info()]).
--spec(info/2 :: (pid(), [info_key()]) -> [info()]).
+-spec(info_keys/0 :: () -> [rabbit_types:info_key()]).
+-spec(info/1 :: (pid()) -> [rabbit_types:info()]).
+-spec(info/2 :: (pid(), [rabbit_types:info_key()]) -> [rabbit_types:info()]).
 -spec(shutdown/2 :: (pid(), string()) -> 'ok').
--spec(server_properties/0 :: () -> amqp_table()).
+-spec(server_properties/0 :: () -> rabbit_framing:amqp_table()).
 
 -endif.
 
@@ -233,6 +240,7 @@ start_connection(Parent, Deb, Sock, SockTransform) ->
     erlang:send_after(?HANDSHAKE_TIMEOUT * 1000, self(),
                       handshake_timeout),
     ProfilingValue = setup_profiling(),
+    {ok, Collector} = rabbit_queue_collector:start_link(),
     try
         mainloop(Parent, Deb, switch_callback(
                                 #v1{sock = ClientSock,
@@ -241,10 +249,12 @@ start_connection(Parent, Deb, Sock, SockTransform) ->
                                       timeout_sec = ?HANDSHAKE_TIMEOUT,
                                       frame_max = ?FRAME_MIN_SIZE,
                                       vhost = none,
-                                      client_properties = none},
+                                      client_properties = none,
+                                      protocol = none},
                                     callback = uninitialized_callback,
                                     recv_ref = none,
-                                    connection_state = pre_init},
+                                    connection_state = pre_init,
+                                    queue_collector = Collector},
                                 handshake, 8))
     catch
         Ex -> (if Ex == connection_closed_abruptly ->
@@ -262,7 +272,9 @@ start_connection(Parent, Deb, Sock, SockTransform) ->
         %% output to be sent, which results in unnecessary delays.
         %%
         %% gen_tcp:close(ClientSock),
-        teardown_profiling(ProfilingValue)
+        teardown_profiling(ProfilingValue),
+        rabbit_queue_collector:shutdown(Collector),
+        rabbit_misc:unlink_and_capture_exit(Collector)
     end,
     done.
 
@@ -425,19 +437,29 @@ wait_for_channel_termination(N, TimerRef) ->
             exit(channel_termination_timeout)
     end.
 
-maybe_close(State = #v1{connection_state = closing}) ->
+maybe_close(State = #v1{connection_state = closing,
+                        queue_collector = Collector,
+                        connection = #connection{protocol = Protocol},
+                        sock = Sock}) ->
     case all_channels() of
-        [] -> ok = send_on_channel0(
-                     State#v1.sock, #'connection.close_ok'{}),
-              close_connection(State);
+        [] ->
+            %% Spec says "Exclusive queues may only be accessed by the current
+            %% connection, and are deleted when that connection closes."
+            %% This does not strictly imply synchrony, but in practice it seems
+            %% to be what people assume.
+            rabbit_queue_collector:delete_all(Collector),
+            ok = send_on_channel0(Sock, #'connection.close_ok'{}, Protocol),
+            close_connection(State);
         _  -> State
     end;
 maybe_close(State) ->
     State.
 
-handle_frame(Type, 0, Payload, State = #v1{connection_state = CS})
+handle_frame(Type, 0, Payload,
+             State = #v1{connection_state = CS,
+                         connection = #connection{protocol = Protocol}})
   when CS =:= closing; CS =:= closed ->
-    case analyze_frame(Type, Payload) of
+    case analyze_frame(Type, Payload, Protocol) of
         {method, MethodName, FieldsBin} ->
             handle_method0(MethodName, FieldsBin, State);
         _Other -> State
@@ -445,20 +467,20 @@ handle_frame(Type, 0, Payload, State = #v1{connection_state = CS})
 handle_frame(_Type, _Channel, _Payload, State = #v1{connection_state = CS})
   when CS =:= closing; CS =:= closed ->
     State;
-handle_frame(Type, 0, Payload, State) ->
-    case analyze_frame(Type, Payload) of
+handle_frame(Type, 0, Payload,
+             State = #v1{connection = #connection{protocol = Protocol}}) ->
+    case analyze_frame(Type, Payload, Protocol) of
         error     -> throw({unknown_frame, 0, Type, Payload});
         heartbeat -> State;
-        trace     -> State;
         {method, MethodName, FieldsBin} ->
             handle_method0(MethodName, FieldsBin, State);
         Other -> throw({unexpected_frame_on_channel0, Other})
     end;
-handle_frame(Type, Channel, Payload, State) ->
-    case analyze_frame(Type, Payload) of
+handle_frame(Type, Channel, Payload,
+             State = #v1{connection = #connection{protocol = Protocol}}) ->
+    case analyze_frame(Type, Payload, Protocol) of
         error         -> throw({unknown_frame, Channel, Type, Payload});
         heartbeat     -> throw({unexpected_heartbeat_frame, Channel});
-        trace         -> throw({unexpected_trace_frame, Channel});
         AnalyzedFrame ->
             %%?LOGDEBUG("Ch ~p Frame ~p~n", [Channel, AnalyzedFrame]),
             case get({channel, Channel}) of
@@ -473,10 +495,18 @@ handle_frame(Type, Channel, Payload, State) ->
                 closing ->
                     %% According to the spec, after sending a
                     %% channel.close we must ignore all frames except
+                    %% channel.close and channel.close_ok.  In the
+                    %% event of a channel.close, we should send back a
                     %% channel.close_ok.
                     case AnalyzedFrame of
                         {method, 'channel.close_ok', _} ->
                             erase({channel, Channel});
+                        {method, 'channel.close', _} ->
+                            %% We're already closing this channel, so
+                            %% there's no cleanup to do (notify
+                            %% queues, etc.)
+                            ok = rabbit_writer:send_command(State#v1.sock,
+                                                            #'channel.close_ok'{});
                         _ -> ok
                     end,
                     State;
@@ -491,17 +521,20 @@ handle_frame(Type, Channel, Payload, State) ->
             end
     end.
 
-analyze_frame(?FRAME_METHOD, <<ClassId:16, MethodId:16, MethodFields/binary>>) ->
-    {method, rabbit_framing:lookup_method_name({ClassId, MethodId}), MethodFields};
-analyze_frame(?FRAME_HEADER, <<ClassId:16, Weight:16, BodySize:64, Properties/binary>>) ->
+analyze_frame(?FRAME_METHOD,
+              <<ClassId:16, MethodId:16, MethodFields/binary>>,
+              Protocol) ->
+    MethodName = Protocol:lookup_method_name({ClassId, MethodId}),
+    {method, MethodName, MethodFields};
+analyze_frame(?FRAME_HEADER,
+              <<ClassId:16, Weight:16, BodySize:64, Properties/binary>>,
+              _Protocol) ->
     {content_header, ClassId, Weight, BodySize, Properties};
-analyze_frame(?FRAME_BODY, Body) ->
+analyze_frame(?FRAME_BODY, Body, _Protocol) ->
     {content_body, Body};
-analyze_frame(?FRAME_TRACE, _Body) ->
-    trace;
-analyze_frame(?FRAME_HEARTBEAT, <<>>) ->
+analyze_frame(?FRAME_HEARTBEAT, <<>>, _Protocol) ->
     heartbeat;
-analyze_frame(_Type, _Body) ->
+analyze_frame(_Type, _Body, _Protocol) ->
     error.
 
 handle_input(frame_header, <<Type:8,Channel:16,PayloadSize:32>>, State) ->
@@ -518,54 +551,70 @@ handle_input({frame_payload, Type, Channel, PayloadSize}, PayloadAndMarker, Stat
             throw({bad_payload, PayloadAndMarker})
     end;
 
-handle_input(handshake, <<"AMQP",1,1,ProtocolMajor,ProtocolMinor>>,
-             State = #v1{sock = Sock, connection = Connection}) ->
-    case check_version({ProtocolMajor, ProtocolMinor},
-                       {?PROTOCOL_VERSION_MAJOR, ?PROTOCOL_VERSION_MINOR}) of
-        true ->
-            ok = send_on_channel0(
-                   Sock,
-                   #'connection.start'{
-                     version_major = ?PROTOCOL_VERSION_MAJOR,
-                     version_minor = ?PROTOCOL_VERSION_MINOR,
-                     server_properties = server_properties(),
-                     mechanisms = <<"PLAIN AMQPLAIN">>,
-                     locales = <<"en_US">> }),
-            {State#v1{connection = Connection#connection{
-                                     timeout_sec = ?NORMAL_TIMEOUT},
-                      connection_state = starting},
-             frame_header, 7};
-        false ->
-            throw({bad_version, ProtocolMajor, ProtocolMinor})
-    end;
+%% The two rules pertaining to version negotiation:
+%%
+%% * If the server cannot support the protocol specified in the
+%% protocol header, it MUST respond with a valid protocol header and
+%% then close the socket connection.
+%%
+%% * The server MUST provide a protocol version that is lower than or
+%% equal to that requested by the client in the protocol header.
+handle_input(handshake, <<"AMQP", 0, 0, 9, 1>>, State) ->
+    start_connection({0, 9, 1}, rabbit_framing_amqp_0_9_1, State);
+
+%% This is the protocol header for 0-9, which we can safely treat as
+%% though it were 0-9-1.
+handle_input(handshake, <<"AMQP", 1, 1, 0, 9>>, State) ->
+    start_connection({0, 9, 0}, rabbit_framing_amqp_0_9_1, State);
+
+%% This is what most clients send for 0-8.  The 0-8 spec, confusingly,
+%% defines the version as 8-0.
+handle_input(handshake, <<"AMQP", 1, 1, 8, 0>>, State) ->
+    start_connection({8, 0, 0}, rabbit_framing_amqp_0_8, State);
+
+%% The 0-8 spec as on the AMQP web site actually has this as the
+%% protocol header; some libraries e.g., py-amqplib, send it when they
+%% want 0-8.
+handle_input(handshake, <<"AMQP", 1, 1, 9, 1>>, State) ->
+    start_connection({8, 0, 0}, rabbit_framing_amqp_0_8, State);
+
+handle_input(handshake, <<"AMQP", A, B, C, D>>, #v1{sock = Sock}) ->
+    refuse_connection(Sock, {bad_version, A, B, C, D});
 
 handle_input(handshake, Other, #v1{sock = Sock}) ->
-    ok = inet_op(fun () -> rabbit_net:send(
-                             Sock, <<"AMQP",1,1,
-                                    ?PROTOCOL_VERSION_MAJOR,
-                                    ?PROTOCOL_VERSION_MINOR>>) end),
-    throw({bad_header, Other});
+    refuse_connection(Sock, {bad_header, Other});
 
 handle_input(Callback, Data, _State) ->
     throw({bad_input, Callback, Data}).
 
-%% the 0-8 spec, confusingly, defines the version as 8-0
-adjust_version({8,0})   -> {0,8};
-adjust_version(Version) -> Version.
-check_version(ClientVersion, ServerVersion) ->
-    {ClientMajor, ClientMinor} = adjust_version(ClientVersion),
-    {ServerMajor, ServerMinor} = adjust_version(ServerVersion),
-    ClientMajor > ServerMajor
-        orelse
-          (ClientMajor == ServerMajor andalso
-           ClientMinor >= ServerMinor).
+%% Offer a protocol version to the client.  Connection.start only
+%% includes a major and minor version number, Luckily 0-9 and 0-9-1
+%% are similar enough that clients will be happy with either.
+start_connection({ProtocolMajor, ProtocolMinor, _ProtocolRevision},
+                 Protocol,
+                 State = #v1{sock = Sock, connection = Connection}) ->
+    Start = #'connection.start'{ version_major = ProtocolMajor,
+                                 version_minor = ProtocolMinor,
+                                 server_properties = server_properties(),
+                                 mechanisms = <<"PLAIN AMQPLAIN">>,
+                                 locales = <<"en_US">> },
+    ok = send_on_channel0(Sock, Start, Protocol),
+    {State#v1{connection = Connection#connection{
+                             timeout_sec = ?NORMAL_TIMEOUT,
+                             protocol = Protocol},
+              connection_state = starting},
+     frame_header, 7}.
+
+refuse_connection(Sock, Exception) ->
+    ok = inet_op(fun () -> rabbit_net:send(Sock, <<"AMQP",0,0,9,1>>) end),
+    throw(Exception).
 
 %%--------------------------------------------------------------------------
 
-handle_method0(MethodName, FieldsBin, State) ->
+handle_method0(MethodName, FieldsBin,
+               State = #v1{connection = #connection{protocol = Protocol}}) ->
     try
-        handle_method0(rabbit_framing:decode_method_fields(
-                         MethodName, FieldsBin),
+        handle_method0(Protocol:decode_method_fields(MethodName, FieldsBin),
                        State)
     catch exit:Reason ->
             CompleteReason = case Reason of
@@ -575,7 +624,11 @@ handle_method0(MethodName, FieldsBin, State) ->
                              end,
             case State#v1.connection_state of
                 running -> send_exception(State, 0, CompleteReason);
-                Other   -> throw({channel0_error, Other, CompleteReason})
+                %% We don't trust the client at this point - force
+                %% them to wait for a bit so they can't DOS us with
+                %% repeated failed logins etc.
+                Other   -> timer:sleep(?SILENT_CLOSE_DELAY * 1000),
+                           throw({channel0_error, Other, CompleteReason})
             end
     end.
 
@@ -583,66 +636,64 @@ handle_method0(#'connection.start_ok'{mechanism = Mechanism,
                                       response = Response,
                                       client_properties = ClientProperties},
                State = #v1{connection_state = starting,
-                           connection = Connection,
+                           connection = Connection =
+                               #connection{protocol = Protocol},
                            sock = Sock}) ->
     User = rabbit_access_control:check_login(Mechanism, Response),
-    ok = send_on_channel0(
-           Sock,
-           #'connection.tune'{channel_max = 0,
-                              %% set to zero once QPid fix their negotiation
-                              frame_max = 131072,
-                              heartbeat = 0}),
+    Tune = #'connection.tune'{channel_max = 0,
+                              frame_max = ?FRAME_MAX,
+                              heartbeat = 0},
+    ok = send_on_channel0(Sock, Tune, Protocol),
     State#v1{connection_state = tuning,
              connection = Connection#connection{
                             user = User,
                             client_properties = ClientProperties}};
-handle_method0(#'connection.tune_ok'{channel_max = _ChannelMax,
-                                     frame_max = FrameMax,
+handle_method0(#'connection.tune_ok'{frame_max = FrameMax,
                                      heartbeat = ClientHeartbeat},
                State = #v1{connection_state = tuning,
                            connection = Connection,
                            sock = Sock}) ->
-    %% if we have a channel_max limit that the client wishes to
-    %% exceed, die as per spec.  Not currently a problem, so we ignore
-    %% the client's channel_max parameter.
-    rabbit_heartbeat:start_heartbeat(Sock, ClientHeartbeat),
-    State#v1{connection_state = opening,
-             connection = Connection#connection{
-                            timeout_sec = ClientHeartbeat,
-                            frame_max = FrameMax}};
-handle_method0(#'connection.open'{virtual_host = VHostPath,
-                                  insist = Insist},
+    if (FrameMax /= 0) and (FrameMax < ?FRAME_MIN_SIZE) ->
+            rabbit_misc:protocol_error(
+              not_allowed, "frame_max=~w < ~w min size",
+              [FrameMax, ?FRAME_MIN_SIZE]);
+       (?FRAME_MAX /= 0) and (FrameMax > ?FRAME_MAX) ->
+            rabbit_misc:protocol_error(
+              not_allowed, "frame_max=~w > ~w max size",
+              [FrameMax, ?FRAME_MAX]);
+       true ->
+            rabbit_heartbeat:start_heartbeat(Sock, ClientHeartbeat),
+            State#v1{connection_state = opening,
+                     connection = Connection#connection{
+                                    timeout_sec = ClientHeartbeat,
+                                    frame_max = FrameMax}}
+    end;
+
+handle_method0(#'connection.open'{virtual_host = VHostPath},
+
                State = #v1{connection_state = opening,
                            connection = Connection = #connection{
-                                          user = User},
+                                          user = User,
+                                          protocol = Protocol},
                            sock = Sock}) ->
     ok = rabbit_access_control:check_vhost_access(User, VHostPath),
     NewConnection = Connection#connection{vhost = VHostPath},
-    KnownHosts = format_listeners(rabbit_networking:active_listeners()),
-    Redirects = compute_redirects(Insist),
-    if Redirects == [] ->
-            ok = send_on_channel0(
-                   Sock,
-                   #'connection.open_ok'{known_hosts = KnownHosts}),
-            State#v1{connection_state = running,
-                     connection = NewConnection};
-       true ->
-            %% FIXME: 'host' is supposed to only contain one
-            %% address; but which one do we pick? This is
-            %% really a problem with the spec.
-            Host = format_listeners(Redirects),
-            rabbit_log:info("connection ~p redirecting to ~p~n",
-                            [self(), Host]),
-            ok = send_on_channel0(
-                   Sock,
-                   #'connection.redirect'{host = Host,
-                                          known_hosts = KnownHosts}),
-            close_connection(State#v1{connection = NewConnection})
-    end;
+    ok = send_on_channel0(Sock, #'connection.open_ok'{}, Protocol),
+    State#v1{connection_state = running,
+             connection = NewConnection};
 handle_method0(#'connection.close'{},
                State = #v1{connection_state = running}) ->
     lists:foreach(fun rabbit_framing_channel:shutdown/1, all_channels()),
     maybe_close(State#v1{connection_state = closing});
+handle_method0(#'connection.close'{},
+               State = #v1{connection_state = CS,
+                           connection = #connection{protocol = Protocol},
+                           sock = Sock})
+  when CS =:= closing; CS =:= closed ->
+    %% We're already closed or closing, so we don't need to cleanup
+    %% anything.
+    ok = send_on_channel0(Sock, #'connection.close_ok'{}, Protocol),
+    State;
 handle_method0(#'connection.close_ok'{},
                State = #v1{connection_state = closed}) ->
     self() ! terminate_connection,
@@ -654,23 +705,8 @@ handle_method0(_Method, #v1{connection_state = S}) ->
     rabbit_misc:protocol_error(
       channel_error, "unexpected method in connection state ~w", [S]).
 
-send_on_channel0(Sock, Method) ->
-    ok = rabbit_writer:internal_send_command(Sock, 0, Method).
-
-format_listeners(Listeners) ->
-    list_to_binary(
-      rabbit_misc:intersperse(
-        $,,
-        [io_lib:format("~s:~w", [Host, Port]) ||
-            #listener{host = Host, port = Port} <- Listeners])).
-
-compute_redirects(true) -> [];
-compute_redirects(false) ->
-    Node = node(),
-    LNode = rabbit_load:pick(),
-    if Node == LNode -> [];
-       true -> rabbit_networking:node_listeners(LNode)
-    end.
+send_on_channel0(Sock, Method, Protocol) ->
+    ok = rabbit_writer:internal_send_command(Sock, 0, Method, Protocol).
 
 %%--------------------------------------------------------------------------
 
@@ -704,6 +740,10 @@ i(state, #v1{connection_state = S}) ->
     S;
 i(channels, #v1{}) ->
     length(all_channels());
+i(protocol, #v1{connection = #connection{protocol = none}}) ->
+    none;
+i(protocol, #v1{connection = #connection{protocol = Protocol}}) ->
+    Protocol:version();
 i(user, #v1{connection = #connection{user = #user{username = Username}}}) ->
     Username;
 i(user, #v1{connection = #connection{user = none}}) ->
@@ -722,15 +762,18 @@ i(Item, #v1{}) ->
 
 %%--------------------------------------------------------------------------
 
-send_to_new_channel(Channel, AnalyzedFrame, State) ->
+send_to_new_channel(Channel, AnalyzedFrame,
+                    State = #v1{queue_collector = Collector}) ->
     #v1{sock = Sock, connection = #connection{
                        frame_max = FrameMax,
                        user = #user{username = Username},
-                       vhost = VHost}} = State,
-    WriterPid = rabbit_writer:start(Sock, Channel, FrameMax),
-    ChPid = rabbit_framing_channel:start_link(
-              fun rabbit_channel:start_link/5,
-              [Channel, self(), WriterPid, Username, VHost]),
+                       vhost = VHost,
+                       protocol = Protocol}} = State,
+    {ok, WriterPid} = rabbit_writer:start(Sock, Channel, FrameMax, Protocol),
+    {ok, ChPid} = rabbit_framing_channel:start_link(
+                    fun rabbit_channel:start_link/6,
+                    [Channel, self(), WriterPid, Username, VHost, Collector],
+                    Protocol),
     put({channel, Channel}, {chpid, ChPid}),
     put({chpid, ChPid}, {channel, Channel}),
     ok = rabbit_framing_channel:process(ChPid, AnalyzedFrame).
@@ -746,25 +789,27 @@ handle_exception(State = #v1{connection_state = CS}, Channel, Reason) ->
     log_channel_error(CS, Channel, Reason),
     send_exception(State, Channel, Reason).
 
-send_exception(State, Channel, Reason) ->
-    {ShouldClose, CloseChannel, CloseMethod} = map_exception(Channel, Reason),
+send_exception(State = #v1{connection = #connection{protocol = Protocol}},
+               Channel, Reason) ->
+    {ShouldClose, CloseChannel, CloseMethod} =
+        map_exception(Channel, Reason, Protocol),
     NewState = case ShouldClose of
                    true  -> terminate_channels(),
                             close_connection(State);
                    false -> close_channel(Channel, State)
                end,
     ok = rabbit_writer:internal_send_command(
-           NewState#v1.sock, CloseChannel, CloseMethod),
+           NewState#v1.sock, CloseChannel, CloseMethod, Protocol),
     NewState.
 
-map_exception(Channel, Reason) ->
+map_exception(Channel, Reason, Protocol) ->
     {SuggestedClose, ReplyCode, ReplyText, FailedMethod} =
-        lookup_amqp_exception(Reason),
+        lookup_amqp_exception(Reason, Protocol),
     ShouldClose = SuggestedClose or (Channel == 0),
     {ClassId, MethodId} = case FailedMethod of
                               {_, _} -> FailedMethod;
-                              none -> {0, 0};
-                              _ -> rabbit_framing:method_id(FailedMethod)
+                              none   -> {0, 0};
+                              _      -> Protocol:method_id(FailedMethod)
                           end,
     {CloseChannel, CloseMethod} =
         case ShouldClose of
@@ -779,22 +824,16 @@ map_exception(Channel, Reason) ->
         end,
     {ShouldClose, CloseChannel, CloseMethod}.
 
-%% FIXME: this clause can go when we move to AMQP spec >=8.1
-lookup_amqp_exception(#amqp_error{name        = precondition_failed,
-                                  explanation = Expl,
-                                  method      = Method}) ->
-    ExplBin = amqp_exception_explanation(<<"PRECONDITION_FAILED">>, Expl),
-    {false, 406, ExplBin, Method};
 lookup_amqp_exception(#amqp_error{name        = Name,
                                   explanation = Expl,
-                                  method      = Method}) ->
-    {ShouldClose, Code, Text} = rabbit_framing:lookup_amqp_exception(Name),
+                                  method      = Method},
+                      Protocol) ->
+    {ShouldClose, Code, Text} = Protocol:lookup_amqp_exception(Name),
     ExplBin = amqp_exception_explanation(Text, Expl),
     {ShouldClose, Code, ExplBin, Method};
-lookup_amqp_exception(Other) ->
+lookup_amqp_exception(Other, Protocol) ->
     rabbit_log:warning("Non-AMQP exit reason '~p'~n", [Other]),
-    {ShouldClose, Code, Text} =
-        rabbit_framing:lookup_amqp_exception(internal_error),
+    {ShouldClose, Code, Text} = Protocol:lookup_amqp_exception(internal_error),
     {ShouldClose, Code, Text, none}.
 
 amqp_exception_explanation(Text, Expl) ->
diff --git a/src/rabbit_router.erl b/src/rabbit_router.erl
index 03979d6c..d50b9f31 100644
--- a/src/rabbit_router.erl
+++ b/src/rabbit_router.erl
@@ -41,7 +41,13 @@
 
 -ifdef(use_specs).
 
--spec(deliver/2 :: ([pid()], delivery()) -> {routing_result(), [pid()]}).
+-export_type([routing_key/0, routing_result/0]).
+
+-type(routing_key() :: binary()).
+-type(routing_result() :: 'routed' | 'unroutable' | 'not_delivered').
+
+-spec(deliver/2 ::
+        ([pid()], rabbit_types:delivery()) -> {routing_result(), [pid()]}).
 
 -endif.
 
@@ -57,14 +63,17 @@ deliver(QPids, Delivery = #delivery{mandatory = false,
     %% is preserved. This scales much better than the non-immediate
     %% case below.
     delegate:invoke_no_result(
-      QPids, fun(Pid) -> rabbit_amqqueue:deliver(Pid, Delivery) end),
+      QPids, fun (Pid) -> rabbit_amqqueue:deliver(Pid, Delivery) end),
     {routed, QPids};
 
 deliver(QPids, Delivery) ->
     {Success, _} =
         delegate:invoke(QPids,
-                        fun(Pid) -> rabbit_amqqueue:deliver(Pid, Delivery) end),
-    {Routed, Handled} = lists:foldl(fun fold_deliveries/2, {false, []}, Success),
+                        fun (Pid) -> 
+                                rabbit_amqqueue:deliver(Pid, Delivery) 
+                        end),
+    {Routed, Handled} =
+        lists:foldl(fun fold_deliveries/2, {false, []}, Success),
     check_delivery(Delivery#delivery.mandatory, Delivery#delivery.immediate,
                    {Routed, Handled}).
 
@@ -87,13 +96,13 @@ match_routing_key(Name, RoutingKey) ->
     lookup_qpids(mnesia:dirty_select(rabbit_route, [{MatchHead, [], ['$1']}])).
 
 lookup_qpids(Queues) ->
-    sets:fold(
-      fun(Key, Acc) ->
+    lists:foldl(
+      fun (Key, Acc) ->
               case mnesia:dirty_read({rabbit_queue, Key}) of
                   [#amqqueue{pid = QPid}] -> [QPid | Acc];
                   []                      -> Acc
               end
-      end, [], sets:from_list(Queues)).
+      end, [], lists:usort(Queues)).
 
 %%--------------------------------------------------------------------
 
diff --git a/src/rabbit_sasl_report_file_h.erl b/src/rabbit_sasl_report_file_h.erl
index 434cdae0..eb2037c2 100644
--- a/src/rabbit_sasl_report_file_h.erl
+++ b/src/rabbit_sasl_report_file_h.erl
@@ -33,7 +33,8 @@
 
 -behaviour(gen_event).
 
--export([init/1, handle_event/2, handle_call/2, handle_info/2, terminate/2, code_change/3]).
+-export([init/1, handle_event/2, handle_call/2, handle_info/2, terminate/2,
+         code_change/3]).
 
 %% rabbit_sasl_report_file_h is a wrapper around the sasl_report_file_h
 %% module because the original's init/1 does not match properly
diff --git a/src/rabbit_sup.erl b/src/rabbit_sup.erl
index 2c5e5112..97613d17 100644
--- a/src/rabbit_sup.erl
+++ b/src/rabbit_sup.erl
@@ -34,7 +34,7 @@
 -behaviour(supervisor).
 
 -export([start_link/0, start_child/1, start_child/2, start_child/3,
-         start_restartable_child/1, start_restartable_child/2]).
+         start_restartable_child/1, start_restartable_child/2, stop_child/1]).
 
 -export([init/1]).
 
@@ -69,5 +69,11 @@ start_restartable_child(Mod, Args) ->
                  transient, infinity, supervisor, [rabbit_restartable_sup]}),
     ok.
 
+stop_child(ChildId) ->
+    case supervisor:terminate_child(?SERVER, ChildId) of
+        ok -> supervisor:delete_child(?SERVER, ChildId);
+        E  -> E
+    end.
+
 init([]) ->
     {ok, {{one_for_all, 0, 1}, []}}.
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
index 76ebd982..56aca1d6 100644
--- a/src/rabbit_tests.erl
+++ b/src/rabbit_tests.erl
@@ -41,8 +41,12 @@
 -import(lists).
 
 -include("rabbit.hrl").
+-include("rabbit_framing.hrl").
 -include_lib("kernel/include/file.hrl").
 
+-define(PERSISTENT_MSG_STORE,     msg_store_persistent).
+-define(TRANSIENT_MSG_STORE,      msg_store_transient).
+
 test_content_prop_roundtrip(Datum, Binary) ->
     Types =  [element(1, E) || E <- Datum],
     Values = [element(2, E) || E <- Datum],
@@ -50,14 +54,20 @@ test_content_prop_roundtrip(Datum, Binary) ->
     Binary = rabbit_binary_generator:encode_properties(Types, Values). %% assertion
 
 all_tests() ->
+    application:set_env(rabbit, file_handles_high_watermark, 10, infinity),
+    passed = test_backing_queue(),
     passed = test_priority_queue(),
+    passed = test_bpqueue(),
     passed = test_pg_local(),
     passed = test_unfold(),
+    passed = test_supervisor_delayed_restart(),
     passed = test_parsing(),
+    passed = test_content_framing(),
     passed = test_topic_matching(),
     passed = test_log_management(),
     passed = test_app_management(),
     passed = test_log_management_during_startup(),
+    passed = test_memory_pressure(),
     passed = test_cluster_management(),
     passed = test_user_management(),
     passed = test_server_status(),
@@ -204,6 +214,143 @@ test_priority_queue(Q) ->
      priority_queue:to_list(Q),
      priority_queue_out_all(Q)}.
 
+test_bpqueue() ->
+    Q = bpqueue:new(),
+    true = bpqueue:is_empty(Q),
+    0 = bpqueue:len(Q),
+    [] = bpqueue:to_list(Q),
+
+    Q1 = bpqueue_test(fun bpqueue:in/3, fun bpqueue:out/1,
+                      fun bpqueue:to_list/1,
+                      fun bpqueue:foldl/3, fun bpqueue:map_fold_filter_l/4),
+    Q2 = bpqueue_test(fun bpqueue:in_r/3, fun bpqueue:out_r/1,
+                      fun (QR) -> lists:reverse(
+                                    [{P, lists:reverse(L)} ||
+                                        {P, L} <- bpqueue:to_list(QR)])
+                      end,
+                      fun bpqueue:foldr/3, fun bpqueue:map_fold_filter_r/4),
+
+    [{foo, [1, 2]}, {bar, [3]}] = bpqueue:to_list(bpqueue:join(Q, Q1)),
+    [{bar, [3]}, {foo, [2, 1]}] = bpqueue:to_list(bpqueue:join(Q2, Q)),
+    [{foo, [1, 2]}, {bar, [3, 3]}, {foo, [2,1]}] =
+        bpqueue:to_list(bpqueue:join(Q1, Q2)),
+
+    [{foo, [1, 2]}, {bar, [3]}, {foo, [1, 2]}, {bar, [3]}] =
+        bpqueue:to_list(bpqueue:join(Q1, Q1)),
+
+    [{foo, [1, 2]}, {bar, [3]}] =
+        bpqueue:to_list(
+          bpqueue:from_list(
+            [{x, []}, {foo, [1]}, {y, []}, {foo, [2]}, {bar, [3]}, {z, []}])),
+
+    [{undefined, [a]}] = bpqueue:to_list(bpqueue:from_list([{undefined, [a]}])),
+
+    {4, [a,b,c,d]} =
+        bpqueue:foldl(
+          fun (Prefix, Value, {Prefix, Acc}) ->
+                  {Prefix + 1, [Value | Acc]}
+          end,
+          {0, []}, bpqueue:from_list([{0,[d]}, {1,[c]}, {2,[b]}, {3,[a]}])),
+
+    [{bar,3}, {foo,2}, {foo,1}] =
+        bpqueue:foldr(fun (P, V, I) -> [{P,V} | I] end, [], Q2),
+
+    BPQL = [{foo,[1,2,2]}, {bar,[3,4,5]}, {foo,[5,6,7]}],
+    BPQ = bpqueue:from_list(BPQL),
+
+    %% no effect
+    {BPQL, 0} = bpqueue_mffl([none], {none, []}, BPQ),
+    {BPQL, 0} = bpqueue_mffl([foo,bar], {none, [1]}, BPQ),
+    {BPQL, 0} = bpqueue_mffl([bar], {none, [3]}, BPQ),
+    {BPQL, 0} = bpqueue_mffr([bar], {foo, [5]}, BPQ),
+
+    %% process 1 item
+    {[{foo,[-1,2,2]}, {bar,[3,4,5]}, {foo,[5,6,7]}], 1} =
+        bpqueue_mffl([foo,bar], {foo, [2]}, BPQ),
+    {[{foo,[1,2,2]}, {bar,[-3,4,5]}, {foo,[5,6,7]}], 1} =
+        bpqueue_mffl([bar], {bar, [4]}, BPQ),
+    {[{foo,[1,2,2]}, {bar,[3,4,5]}, {foo,[5,6,-7]}], 1} =
+        bpqueue_mffr([foo,bar], {foo, [6]}, BPQ),
+    {[{foo,[1,2,2]}, {bar,[3,4]}, {baz,[-5]}, {foo,[5,6,7]}], 1} =
+        bpqueue_mffr([bar], {baz, [4]}, BPQ),
+
+    %% change prefix
+    {[{bar,[-1,-2,-2,-3,-4,-5,-5,-6,-7]}], 9} =
+        bpqueue_mffl([foo,bar], {bar, []}, BPQ),
+    {[{bar,[-1,-2,-2,3,4,5]}, {foo,[5,6,7]}], 3} =
+        bpqueue_mffl([foo], {bar, [5]}, BPQ),
+    {[{bar,[-1,-2,-2,3,4,5,-5,-6]}, {foo,[7]}], 5} =
+        bpqueue_mffl([foo], {bar, [7]}, BPQ),
+    {[{foo,[1,2,2,-3,-4]}, {bar,[5]}, {foo,[5,6,7]}], 2} =
+        bpqueue_mffl([bar], {foo, [5]}, BPQ),
+    {[{bar,[-1,-2,-2,3,4,5,-5,-6,-7]}], 6} =
+        bpqueue_mffl([foo], {bar, []}, BPQ),
+    {[{foo,[1,2,2,-3,-4,-5,5,6,7]}], 3} =
+        bpqueue_mffl([bar], {foo, []}, BPQ),
+
+    %% edge cases
+    {[{foo,[-1,-2,-2]}, {bar,[3,4,5]}, {foo,[5,6,7]}], 3} =
+        bpqueue_mffl([foo], {foo, [5]}, BPQ),
+    {[{foo,[1,2,2]}, {bar,[3,4,5]}, {foo,[-5,-6,-7]}], 3} =
+        bpqueue_mffr([foo], {foo, [2]}, BPQ),
+
+    passed.
+
+bpqueue_test(In, Out, List, Fold, MapFoldFilter) ->
+    Q = bpqueue:new(),
+    {empty, _Q} = Out(Q),
+
+    ok = Fold(fun (Prefix, Value, ok) -> {error, Prefix, Value} end, ok, Q),
+    {Q1M, 0} = MapFoldFilter(fun(_P)     -> throw(explosion) end,
+                             fun(_V, _N) -> throw(explosion) end, 0, Q),
+    [] = bpqueue:to_list(Q1M),
+
+    Q1 = In(bar, 3, In(foo, 2, In(foo, 1, Q))),
+    false = bpqueue:is_empty(Q1),
+    3 = bpqueue:len(Q1),
+    [{foo, [1, 2]}, {bar, [3]}] = List(Q1),
+
+    {{value, foo, 1}, Q3}  = Out(Q1),
+    {{value, foo, 2}, Q4}  = Out(Q3),
+    {{value, bar, 3}, _Q5} = Out(Q4),
+
+    F = fun (QN) ->
+                MapFoldFilter(fun (foo) -> true;
+                                  (_)   -> false
+                              end,
+                              fun (2, _Num) -> stop;
+                                  (V, Num)  -> {bar, -V, V - Num} end,
+                              0, QN)
+        end,
+    {Q6, 0} = F(Q),
+    [] = bpqueue:to_list(Q6),
+    {Q7, 1} = F(Q1),
+    [{bar, [-1]}, {foo, [2]}, {bar, [3]}] = List(Q7),
+
+    Q1.
+
+bpqueue_mffl(FF1A, FF2A, BPQ) ->
+    bpqueue_mff(fun bpqueue:map_fold_filter_l/4, FF1A, FF2A, BPQ).
+
+bpqueue_mffr(FF1A, FF2A, BPQ) ->
+    bpqueue_mff(fun bpqueue:map_fold_filter_r/4, FF1A, FF2A, BPQ).
+
+bpqueue_mff(Fold, FF1A, FF2A, BPQ) ->
+    FF1 = fun (Prefixes) ->
+                  fun (P) -> lists:member(P, Prefixes) end
+          end,
+    FF2 = fun ({Prefix, Stoppers}) ->
+                  fun (Val, Num) ->
+                          case lists:member(Val, Stoppers) of
+                              true -> stop;
+                              false -> {Prefix, -Val, 1 + Num}
+                          end
+                  end
+          end,
+    Queue_to_list = fun ({LHS, RHS}) -> {bpqueue:to_list(LHS), RHS} end,
+
+    Queue_to_list(Fold(FF1(FF1A), FF2(FF2A), 0, BPQ)).
+
 test_simple_n_element_queue(N) ->
     Items = lists:seq(1, N),
     Q = priority_queue_in_all(priority_queue:new(), Items),
@@ -351,6 +498,43 @@ test_field_values() ->
        >>),
     passed.
 
+%% Test that content frames don't exceed frame-max
+test_content_framing(FrameMax, BodyBin) ->
+    [Header | Frames] =
+        rabbit_binary_generator:build_simple_content_frames(
+          1,
+          rabbit_binary_generator:ensure_content_encoded(
+            rabbit_basic:build_content(#'P_basic'{}, BodyBin),
+            rabbit_framing_amqp_0_9_1),
+          FrameMax,
+          rabbit_framing_amqp_0_9_1),
+    %% header is formatted correctly and the size is the total of the
+    %% fragments
+    <<_FrameHeader:7/binary, _ClassAndWeight:4/binary,
+      BodySize:64/unsigned, _Rest/binary>> = list_to_binary(Header),
+    BodySize = size(BodyBin),
+    true = lists:all(
+             fun (ContentFrame) ->
+                     FrameBinary = list_to_binary(ContentFrame),
+                     %% assert
+                     <<_TypeAndChannel:3/binary,
+                       Size:32/unsigned, _Payload:Size/binary, 16#CE>> =
+                         FrameBinary,
+                     size(FrameBinary) =< FrameMax
+             end, Frames),
+    passed.
+
+test_content_framing() ->
+    %% no content
+    passed = test_content_framing(4096, <<>>),
+    %% easily fit in one frame
+    passed = test_content_framing(4096, <<"Easy">>),
+    %% exactly one frame (empty frame = 8 bytes)
+    passed = test_content_framing(11, <<"One">>),
+    %% more than one frame
+    passed = test_content_framing(11, <<"More than one frame">>),
+    passed.
+
 test_topic_match(P, R) ->
     test_topic_match(P, R, true).
 
@@ -559,19 +743,19 @@ test_cluster_management() ->
 
     ok = control_action(reset, []),
     lists:foreach(fun (Arg) ->
-                          ok = control_action(cluster, Arg),
+                          ok = control_action(force_cluster, Arg),
                           ok
                   end,
                   ClusteringSequence),
     lists:foreach(fun (Arg) ->
                           ok = control_action(reset, []),
-                          ok = control_action(cluster, Arg),
+                          ok = control_action(force_cluster, Arg),
                           ok
                   end,
                   ClusteringSequence),
     ok = control_action(reset, []),
     lists:foreach(fun (Arg) ->
-                          ok = control_action(cluster, Arg),
+                          ok = control_action(force_cluster, Arg),
                           ok = control_action(start_app, []),
                           ok = control_action(stop_app, []),
                           ok
@@ -579,7 +763,7 @@ test_cluster_management() ->
                   ClusteringSequence),
     lists:foreach(fun (Arg) ->
                           ok = control_action(reset, []),
-                          ok = control_action(cluster, Arg),
+                          ok = control_action(force_cluster, Arg),
                           ok = control_action(start_app, []),
                           ok = control_action(stop_app, []),
                           ok
@@ -590,13 +774,13 @@ test_cluster_management() ->
     ok = control_action(reset, []),
     ok = control_action(start_app, []),
     ok = control_action(stop_app, []),
-    ok = control_action(cluster, ["invalid1@invalid",
-                                  "invalid2@invalid"]),
+    ok = control_action(force_cluster, ["invalid1@invalid",
+                                        "invalid2@invalid"]),
 
     %% join a non-existing cluster as a ram node
     ok = control_action(reset, []),
-    ok = control_action(cluster, ["invalid1@invalid",
-                                  "invalid2@invalid"]),
+    ok = control_action(force_cluster, ["invalid1@invalid",
+                                        "invalid2@invalid"]),
 
     SecondaryNode = rabbit_misc:makenode("hare"),
     case net_adm:ping(SecondaryNode) of
@@ -621,18 +805,26 @@ test_cluster_management2(SecondaryNode) ->
 
     %% join cluster as a ram node
     ok = control_action(reset, []),
-    ok = control_action(cluster, [SecondaryNodeS, "invalid1@invalid"]),
+    ok = control_action(force_cluster, [SecondaryNodeS, "invalid1@invalid"]),
     ok = control_action(start_app, []),
     ok = control_action(stop_app, []),
 
     %% change cluster config while remaining in same cluster
-    ok = control_action(cluster, ["invalid2@invalid", SecondaryNodeS]),
+    ok = control_action(force_cluster, ["invalid2@invalid", SecondaryNodeS]),
     ok = control_action(start_app, []),
     ok = control_action(stop_app, []),
 
     %% join non-existing cluster as a ram node
-    ok = control_action(cluster, ["invalid1@invalid",
-                                  "invalid2@invalid"]),
+    ok = control_action(force_cluster, ["invalid1@invalid",
+                                        "invalid2@invalid"]),
+    ok = control_action(start_app, []),
+    ok = control_action(stop_app, []),
+
+    %% join empty cluster as a ram node
+    ok = control_action(cluster, []),
+    ok = control_action(start_app, []),
+    ok = control_action(stop_app, []),
+
     %% turn ram node into disk node
     ok = control_action(reset, []),
     ok = control_action(cluster, [SecondaryNodeS, NodeS]),
@@ -640,8 +832,8 @@ test_cluster_management2(SecondaryNode) ->
     ok = control_action(stop_app, []),
 
     %% convert a disk node into a ram node
-    ok = control_action(cluster, ["invalid1@invalid",
-                                  "invalid2@invalid"]),
+    ok = control_action(force_cluster, ["invalid1@invalid",
+                                        "invalid2@invalid"]),
 
     %% turn a disk node into a ram node
     ok = control_action(reset, []),
@@ -746,17 +938,17 @@ test_user_management() ->
     passed.
 
 test_server_status() ->
-
     %% create a few things so there is some useful information to list
     Writer = spawn(fun () -> receive shutdown -> ok end end),
-    Ch = rabbit_channel:start_link(1, self(), Writer, <<"user">>, <<"/">>),
-    [Q, Q2] = [#amqqueue{} = rabbit_amqqueue:declare(
+    {ok, Ch} = rabbit_channel:start_link(1, self(), Writer,
+                                         <<"user">>, <<"/">>, self()),
+    [Q, Q2] = [Queue || Name <- [<<"foo">>, <<"bar">>],
+                        {new, Queue = #amqqueue{}} <-
+                            [rabbit_amqqueue:declare(
                                rabbit_misc:r(<<"/">>, queue, Name),
-                               false, false, []) ||
-                  Name <- [<<"foo">>, <<"bar">>]],
+                               false, false, [], none)]],
 
-    ok = rabbit_amqqueue:claim_queue(Q, self()),
-    ok = rabbit_amqqueue:basic_consume(Q, true, self(), Ch, undefined,
+    ok = rabbit_amqqueue:basic_consume(Q, true, Ch, undefined,
                                        <<"ctag">>, true, undefined),
 
     %% list queues
@@ -823,7 +1015,7 @@ test_hooks() ->
     {[arg1, arg2], 1, 3} = get(arg_hook_test_fired),
 
     %% Invoking Pids
-    Remote = fun() ->
+    Remote = fun () ->
         receive
             {rabbitmq_hook,[remote_test,test,[],Target]} ->
                 Target ! invoked
@@ -840,11 +1032,143 @@ test_hooks() ->
     end,
     passed.
 
+test_memory_pressure_receiver(Pid) ->
+    receive
+        shutdown ->
+            ok;
+        {send_command, Method} ->
+            ok = case Method of
+                     #'channel.flow'{}    -> ok;
+                     #'basic.qos_ok'{}    -> ok;
+                     #'channel.open_ok'{} -> ok
+                 end,
+            Pid ! Method,
+            test_memory_pressure_receiver(Pid);
+        sync ->
+            Pid ! sync,
+            test_memory_pressure_receiver(Pid)
+    end.
+
+test_memory_pressure_receive_flow(Active) ->
+    receive #'channel.flow'{active = Active} -> ok
+    after 1000 -> throw(failed_to_receive_channel_flow)
+    end,
+    receive #'channel.flow'{} ->
+            throw(pipelining_sync_commands_detected)
+    after 0 ->
+            ok
+    end.
+
+test_memory_pressure_sync(Ch, Writer) ->
+    ok = rabbit_channel:do(Ch, #'basic.qos'{}),
+    Writer ! sync,
+    receive sync -> ok after 1000 -> throw(failed_to_receive_writer_sync) end,
+    receive #'basic.qos_ok'{} -> ok
+    after 1000 -> throw(failed_to_receive_basic_qos_ok)
+    end.
+
+test_memory_pressure_spawn() ->
+    Me = self(),
+    Writer = spawn(fun () -> test_memory_pressure_receiver(Me) end),
+    {ok, Ch} = rabbit_channel:start_link(1, self(), Writer,
+                                         <<"user">>, <<"/">>, self()),
+    ok = rabbit_channel:do(Ch, #'channel.open'{}),
+    MRef = erlang:monitor(process, Ch),
+    receive #'channel.open_ok'{} -> ok
+    after 1000 -> throw(failed_to_receive_channel_open_ok)
+    end,
+    {Writer, Ch, MRef}.
+
+expect_normal_channel_termination(MRef, Ch) ->
+    receive {'DOWN', MRef, process, Ch, normal} -> ok
+    after 1000 -> throw(channel_failed_to_exit)
+    end.
+
+gobble_channel_exit() ->
+    receive {channel_exit, _, _} -> ok
+    after 1000 -> throw(channel_exit_not_received)
+    end.
+
+test_memory_pressure() ->
+    {Writer0, Ch0, MRef0} = test_memory_pressure_spawn(),
+    [ok = rabbit_channel:conserve_memory(Ch0, Conserve) ||
+        Conserve <- [false, false, true, false, true, true, false]],
+    ok = test_memory_pressure_sync(Ch0, Writer0),
+    receive {'DOWN', MRef0, process, Ch0, Info0} ->
+            throw({channel_died_early, Info0})
+    after 0 -> ok
+    end,
+
+    %% we should have just 1 active=false waiting for us
+    ok = test_memory_pressure_receive_flow(false),
+
+    %% if we reply with flow_ok, we should immediately get an
+    %% active=true back
+    ok = rabbit_channel:do(Ch0, #'channel.flow_ok'{active = false}),
+    ok = test_memory_pressure_receive_flow(true),
+
+    %% if we publish at this point, the channel should die
+    Content = rabbit_basic:build_content(#'P_basic'{}, <<>>),
+    ok = rabbit_channel:do(Ch0, #'basic.publish'{}, Content),
+    expect_normal_channel_termination(MRef0, Ch0),
+    gobble_channel_exit(),
+
+    {Writer1, Ch1, MRef1} = test_memory_pressure_spawn(),
+    ok = rabbit_channel:conserve_memory(Ch1, true),
+    ok = test_memory_pressure_receive_flow(false),
+    ok = rabbit_channel:do(Ch1, #'channel.flow_ok'{active = false}),
+    ok = test_memory_pressure_sync(Ch1, Writer1),
+    ok = rabbit_channel:conserve_memory(Ch1, false),
+    ok = test_memory_pressure_receive_flow(true),
+    %% send back the wrong flow_ok. Channel should die.
+    ok = rabbit_channel:do(Ch1, #'channel.flow_ok'{active = false}),
+    expect_normal_channel_termination(MRef1, Ch1),
+    gobble_channel_exit(),
+
+    {_Writer2, Ch2, MRef2} = test_memory_pressure_spawn(),
+    %% just out of the blue, send a flow_ok. Life should end.
+    ok = rabbit_channel:do(Ch2, #'channel.flow_ok'{active = true}),
+    expect_normal_channel_termination(MRef2, Ch2),
+    gobble_channel_exit(),
+
+    {_Writer3, Ch3, MRef3} = test_memory_pressure_spawn(),
+    ok = rabbit_channel:conserve_memory(Ch3, true),
+    ok = test_memory_pressure_receive_flow(false),
+    receive {'DOWN', MRef3, process, Ch3, _} ->
+            ok
+    after 12000 ->
+            throw(channel_failed_to_exit)
+    end,
+    gobble_channel_exit(),
+
+    alarm_handler:set_alarm({vm_memory_high_watermark, []}),
+    Me = self(),
+    Writer4 = spawn(fun () -> test_memory_pressure_receiver(Me) end),
+    {ok, Ch4} = rabbit_channel:start_link(1, self(), Writer4,
+                                          <<"user">>, <<"/">>, self()),
+    ok = rabbit_channel:do(Ch4, #'channel.open'{}),
+    MRef4 = erlang:monitor(process, Ch4),
+    Writer4 ! sync,
+    receive sync -> ok after 1000 -> throw(failed_to_receive_writer_sync) end,
+    receive #'channel.open_ok'{} -> throw(unexpected_channel_open_ok)
+    after 0 -> ok
+    end,
+    alarm_handler:clear_alarm(vm_memory_high_watermark),
+    Writer4 ! sync,
+    receive sync -> ok after 1000 -> throw(failed_to_receive_writer_sync) end,
+    receive #'channel.open_ok'{} -> ok
+    after 1000 -> throw(failed_to_receive_channel_open_ok)
+    end,
+    rabbit_channel:shutdown(Ch4),
+    expect_normal_channel_termination(MRef4, Ch4),
+
+    passed.
+
 test_delegates_async(SecondaryNode) ->
     Self = self(),
-    Sender = fun(Pid) -> Pid ! {invoked, Self} end,
+    Sender = fun (Pid) -> Pid ! {invoked, Self} end,
 
-    Responder = make_responder(fun({invoked, Pid}) -> Pid ! response end),
+    Responder = make_responder(fun ({invoked, Pid}) -> Pid ! response end),
 
     ok = delegate:invoke_no_result(spawn(Responder), Sender),
     ok = delegate:invoke_no_result(spawn(SecondaryNode, Responder), Sender),
@@ -857,10 +1181,11 @@ test_delegates_async(SecondaryNode) ->
 
     passed.
 
-make_responder(FMsg) ->
-    fun() ->
+make_responder(FMsg) -> make_responder(FMsg, timeout).
+make_responder(FMsg, Throw) ->
+    fun () ->
         receive Msg -> FMsg(Msg)
-        after 1000 -> throw(timeout)
+        after 1000 -> throw(Throw)
         end
     end.
 
@@ -887,24 +1212,28 @@ must_exit(Fun) ->
     end.
 
 test_delegates_sync(SecondaryNode) ->
-    Sender = fun(Pid) -> gen_server:call(Pid, invoked) end,
-    BadSender = fun(_Pid) -> exit(exception) end,
+    Sender = fun (Pid) -> gen_server:call(Pid, invoked) end,
+    BadSender = fun (_Pid) -> exit(exception) end,
 
-    Responder = make_responder(fun({'$gen_call', From, invoked}) ->
+    Responder = make_responder(fun ({'$gen_call', From, invoked}) ->
                                    gen_server:reply(From, response)
                                end),
 
+    BadResponder = make_responder(fun ({'$gen_call', From, invoked}) ->
+                                          gen_server:reply(From, response)
+                                  end, bad_responder_died),
+
     response = delegate:invoke(spawn(Responder), Sender),
     response = delegate:invoke(spawn(SecondaryNode, Responder), Sender),
 
-    must_exit(fun() -> delegate:invoke(spawn(Responder), BadSender) end),
-    must_exit(fun() ->
-        delegate:invoke(spawn(SecondaryNode, Responder), BadSender) end),
+    must_exit(fun () -> delegate:invoke(spawn(BadResponder), BadSender) end),
+    must_exit(fun () ->
+        delegate:invoke(spawn(SecondaryNode, BadResponder), BadSender) end),
 
     LocalGoodPids = spawn_responders(node(), Responder, 2),
     RemoteGoodPids = spawn_responders(SecondaryNode, Responder, 2),
-    LocalBadPids = spawn_responders(node(), Responder, 2),
-    RemoteBadPids = spawn_responders(SecondaryNode, Responder, 2),
+    LocalBadPids = spawn_responders(node(), BadResponder, 2),
+    RemoteBadPids = spawn_responders(SecondaryNode, BadResponder, 2),
 
     {GoodRes, []} = delegate:invoke(LocalGoodPids ++ RemoteGoodPids, Sender),
     true = lists:all(fun ({_, response}) -> true end, GoodRes),
@@ -1010,6 +1339,586 @@ handle_hook(HookName, Handler, Args) ->
     A = atom_to_list(HookName) ++ "_" ++ atom_to_list(Handler) ++ "_fired",
     put(list_to_atom(A), Args).
 bad_handle_hook(_, _, _) ->
-    bad:bad().
+    exit(bad_handle_hook_called).
 extra_arg_hook(Hookname, Handler, Args, Extra1, Extra2) ->
     handle_hook(Hookname, Handler, {Args, Extra1, Extra2}).
+
+test_supervisor_delayed_restart() ->
+    test_sup:test_supervisor_delayed_restart().
+
+test_backing_queue() ->
+    case application:get_env(rabbit, backing_queue_module) of
+        {ok, rabbit_variable_queue} ->
+            {ok, FileSizeLimit} =
+                application:get_env(rabbit, msg_store_file_size_limit),
+            application:set_env(rabbit, msg_store_file_size_limit, 512,
+                                infinity),
+            {ok, MaxJournal} =
+                application:get_env(rabbit, queue_index_max_journal_entries),
+            application:set_env(rabbit, queue_index_max_journal_entries, 128,
+                                infinity),
+            passed = test_msg_store(),
+            application:set_env(rabbit, msg_store_file_size_limit,
+                                FileSizeLimit, infinity),
+            passed = test_queue_index(),
+            passed = test_variable_queue(),
+            passed = test_queue_recover(),
+            application:set_env(rabbit, queue_index_max_journal_entries,
+                                MaxJournal, infinity),
+            passed;
+        _ ->
+            passed
+    end.
+
+restart_msg_store_empty() ->
+    ok = rabbit_variable_queue:stop_msg_store(),
+    ok = rabbit_variable_queue:start_msg_store(
+           undefined, {fun (ok) -> finished end, ok}).
+
+guid_bin(X) ->
+    erlang:md5(term_to_binary(X)).
+
+msg_store_contains(Atom, Guids) ->
+    Atom = lists:foldl(
+             fun (Guid, Atom1) when Atom1 =:= Atom ->
+                     rabbit_msg_store:contains(?PERSISTENT_MSG_STORE, Guid) end,
+             Atom, Guids).
+
+msg_store_sync(Guids) ->
+    Ref = make_ref(),
+    Self = self(),
+    ok = rabbit_msg_store:sync(?PERSISTENT_MSG_STORE, Guids,
+                               fun () -> Self ! {sync, Ref} end),
+    receive
+        {sync, Ref} -> ok
+    after
+        10000 ->
+            io:format("Sync from msg_store missing for guids ~p~n", [Guids]),
+            throw(timeout)
+    end.
+
+msg_store_read(Guids, MSCState) ->
+    lists:foldl(fun (Guid, MSCStateM) ->
+                        {{ok, Guid}, MSCStateN} = rabbit_msg_store:read(
+                                                    ?PERSISTENT_MSG_STORE,
+                                                    Guid, MSCStateM),
+                        MSCStateN
+                end, MSCState, Guids).
+
+msg_store_write(Guids, MSCState) ->
+    lists:foldl(fun (Guid, {ok, MSCStateN}) ->
+                        rabbit_msg_store:write(?PERSISTENT_MSG_STORE,
+                                               Guid, Guid, MSCStateN)
+                end, {ok, MSCState}, Guids).
+
+msg_store_remove(Guids) ->
+    rabbit_msg_store:remove(?PERSISTENT_MSG_STORE, Guids).
+
+foreach_with_msg_store_client(MsgStore, Ref, Fun, L) ->
+    rabbit_msg_store:client_terminate(
+      lists:foldl(fun (Guid, MSCState) -> Fun(Guid, MsgStore, MSCState) end,
+                  rabbit_msg_store:client_init(MsgStore, Ref), L)).
+
+test_msg_store() ->
+    restart_msg_store_empty(),
+    Self = self(),
+    Guids = [guid_bin(M) || M <- lists:seq(1,100)],
+    {Guids1stHalf, Guids2ndHalf} = lists:split(50, Guids),
+    %% check we don't contain any of the msgs we're about to publish
+    false = msg_store_contains(false, Guids),
+    Ref = rabbit_guid:guid(),
+    MSCState = rabbit_msg_store:client_init(?PERSISTENT_MSG_STORE, Ref),
+    %% publish the first half
+    {ok, MSCState1} = msg_store_write(Guids1stHalf, MSCState),
+    %% sync on the first half
+    ok = msg_store_sync(Guids1stHalf),
+    %% publish the second half
+    {ok, MSCState2} = msg_store_write(Guids2ndHalf, MSCState1),
+    %% sync on the first half again - the msg_store will be dirty, but
+    %% we won't need the fsync
+    ok = msg_store_sync(Guids1stHalf),
+    %% check they're all in there
+    true = msg_store_contains(true, Guids),
+    %% publish the latter half twice so we hit the caching and ref count code
+    {ok, MSCState3} = msg_store_write(Guids2ndHalf, MSCState2),
+    %% check they're still all in there
+    true = msg_store_contains(true, Guids),
+    %% sync on the 2nd half, but do lots of individual syncs to try
+    %% and cause coalescing to happen
+    ok = lists:foldl(
+           fun (Guid, ok) -> rabbit_msg_store:sync(
+                                ?PERSISTENT_MSG_STORE,
+                                [Guid], fun () -> Self ! {sync, Guid} end)
+           end, ok, Guids2ndHalf),
+    lists:foldl(
+      fun(Guid, ok) ->
+              receive
+                  {sync, Guid} -> ok
+              after
+                  10000 ->
+                      io:format("Sync from msg_store missing (guid: ~p)~n",
+                                [Guid]),
+                      throw(timeout)
+              end
+      end, ok, Guids2ndHalf),
+    %% it's very likely we're not dirty here, so the 1st half sync
+    %% should hit a different code path
+    ok = msg_store_sync(Guids1stHalf),
+    %% read them all
+    MSCState4 = msg_store_read(Guids, MSCState3),
+    %% read them all again - this will hit the cache, not disk
+    MSCState5 = msg_store_read(Guids, MSCState4),
+    %% remove them all
+    ok = rabbit_msg_store:remove(?PERSISTENT_MSG_STORE, Guids),
+    %% check first half doesn't exist
+    false = msg_store_contains(false, Guids1stHalf),
+    %% check second half does exist
+    true = msg_store_contains(true, Guids2ndHalf),
+    %% read the second half again
+    MSCState6 = msg_store_read(Guids2ndHalf, MSCState5),
+    %% release the second half, just for fun (aka code coverage)
+    ok = rabbit_msg_store:release(?PERSISTENT_MSG_STORE, Guids2ndHalf),
+    %% read the second half again, just for fun (aka code coverage)
+    MSCState7 = msg_store_read(Guids2ndHalf, MSCState6),
+    ok = rabbit_msg_store:client_terminate(MSCState7),
+    %% stop and restart, preserving every other msg in 2nd half
+    ok = rabbit_variable_queue:stop_msg_store(),
+    ok = rabbit_variable_queue:start_msg_store(
+           [], {fun ([]) -> finished;
+                    ([Guid|GuidsTail])
+                      when length(GuidsTail) rem 2 == 0 ->
+                        {Guid, 1, GuidsTail};
+                    ([Guid|GuidsTail]) ->
+                        {Guid, 0, GuidsTail}
+                end, Guids2ndHalf}),
+    %% check we have the right msgs left
+    lists:foldl(
+      fun (Guid, Bool) ->
+              not(Bool = rabbit_msg_store:contains(?PERSISTENT_MSG_STORE, Guid))
+      end, false, Guids2ndHalf),
+    %% restart empty
+    restart_msg_store_empty(),
+    %% check we don't contain any of the msgs
+    false = msg_store_contains(false, Guids),
+    %% publish the first half again
+    MSCState8 = rabbit_msg_store:client_init(?PERSISTENT_MSG_STORE, Ref),
+    {ok, MSCState9} = msg_store_write(Guids1stHalf, MSCState8),
+    %% this should force some sort of sync internally otherwise misread
+    ok = rabbit_msg_store:client_terminate(
+           msg_store_read(Guids1stHalf, MSCState9)),
+    ok = rabbit_msg_store:remove(?PERSISTENT_MSG_STORE, Guids1stHalf),
+    %% restart empty
+    restart_msg_store_empty(), %% now safe to reuse guids
+    %% push a lot of msgs in... at least 100 files worth
+    {ok, FileSize} = application:get_env(rabbit, msg_store_file_size_limit),
+    PayloadSizeBits = 65536,
+    BigCount = trunc(100 * FileSize / (PayloadSizeBits div 8)),
+    GuidsBig = [guid_bin(X) || X <- lists:seq(1, BigCount)],
+    Payload = << 0:PayloadSizeBits >>,
+    ok = foreach_with_msg_store_client(
+           ?PERSISTENT_MSG_STORE, Ref,
+           fun (Guid, MsgStore, MSCStateM) ->
+                   {ok, MSCStateN} = rabbit_msg_store:write(
+                                       MsgStore, Guid, Payload, MSCStateM),
+                   MSCStateN
+           end, GuidsBig),
+    %% now read them to ensure we hit the fast client-side reading
+    ok = foreach_with_msg_store_client(
+           ?PERSISTENT_MSG_STORE, Ref,
+           fun (Guid, MsgStore, MSCStateM) ->
+                   {{ok, Payload}, MSCStateN} = rabbit_msg_store:read(
+                                                  MsgStore, Guid, MSCStateM),
+                   MSCStateN
+           end, GuidsBig),
+    %% .., then 3s by 1...
+    ok = msg_store_remove([guid_bin(X) || X <- lists:seq(BigCount, 1, -3)]),
+    %% .., then remove 3s by 2, from the young end first. This hits
+    %% GC (under 50% good data left, but no empty files. Must GC).
+    ok = msg_store_remove([guid_bin(X) || X <- lists:seq(BigCount-1, 1, -3)]),
+    %% .., then remove 3s by 3, from the young end first. This hits
+    %% GC...
+    ok = msg_store_remove([guid_bin(X) || X <- lists:seq(BigCount-2, 1, -3)]),
+    %% ensure empty
+    false = msg_store_contains(false, GuidsBig),
+    %% restart empty
+    restart_msg_store_empty(),
+    passed.
+
+queue_name(Name) ->
+    rabbit_misc:r(<<"/">>, queue, Name).
+
+test_queue() ->
+    queue_name(<<"test">>).
+
+init_test_queue() ->
+    rabbit_queue_index:init(
+      test_queue(), true, false,
+      fun (Guid) ->
+              rabbit_msg_store:contains(?PERSISTENT_MSG_STORE, Guid)
+      end).
+
+restart_test_queue(Qi) ->
+    _ = rabbit_queue_index:terminate([], Qi),
+    ok = rabbit_variable_queue:stop(),
+    ok = rabbit_variable_queue:start([test_queue()]),
+    init_test_queue().
+
+empty_test_queue() ->
+    ok = rabbit_variable_queue:stop(),
+    ok = rabbit_variable_queue:start([]),
+    {0, _Terms, Qi} = init_test_queue(),
+    _ = rabbit_queue_index:delete_and_terminate(Qi),
+    ok.
+
+with_empty_test_queue(Fun) ->
+    ok = empty_test_queue(),
+    {0, _Terms, Qi} = init_test_queue(),
+    rabbit_queue_index:delete_and_terminate(Fun(Qi)).
+
+queue_index_publish(SeqIds, Persistent, Qi) ->
+    Ref = rabbit_guid:guid(),
+    MsgStore = case Persistent of
+                   true  -> ?PERSISTENT_MSG_STORE;
+                   false -> ?TRANSIENT_MSG_STORE
+               end,
+    {A, B, MSCStateEnd} =
+        lists:foldl(
+          fun (SeqId, {QiN, SeqIdsGuidsAcc, MSCStateN}) ->
+                  Guid = rabbit_guid:guid(),
+                  QiM = rabbit_queue_index:publish(
+                          Guid, SeqId, Persistent, QiN),
+                  {ok, MSCStateM} = rabbit_msg_store:write(MsgStore, Guid,
+                                                           Guid, MSCStateN),
+                  {QiM, [{SeqId, Guid} | SeqIdsGuidsAcc], MSCStateM}
+          end, {Qi, [], rabbit_msg_store:client_init(MsgStore, Ref)}, SeqIds),
+    ok = rabbit_msg_store:client_delete_and_terminate(
+           MSCStateEnd, MsgStore, Ref),
+    {A, B}.
+
+verify_read_with_published(_Delivered, _Persistent, [], _) ->
+    ok;
+verify_read_with_published(Delivered, Persistent,
+                           [{Guid, SeqId, Persistent, Delivered}|Read],
+                           [{SeqId, Guid}|Published]) ->
+    verify_read_with_published(Delivered, Persistent, Read, Published);
+verify_read_with_published(_Delivered, _Persistent, _Read, _Published) ->
+    ko.
+
+test_queue_index() ->
+    SegmentSize = rabbit_queue_index:next_segment_boundary(0),
+    TwoSegs = SegmentSize + SegmentSize,
+    MostOfASegment = trunc(SegmentSize*0.75),
+    SeqIdsA = lists:seq(0, MostOfASegment-1),
+    SeqIdsB = lists:seq(MostOfASegment, 2*MostOfASegment),
+    SeqIdsC = lists:seq(0, trunc(SegmentSize/2)),
+    SeqIdsD = lists:seq(0, SegmentSize*4),
+
+    with_empty_test_queue(
+      fun (Qi0) ->
+              {0, 0, Qi1} = rabbit_queue_index:bounds(Qi0),
+              {Qi2, SeqIdsGuidsA} = queue_index_publish(SeqIdsA, false, Qi1),
+              {0, SegmentSize, Qi3} = rabbit_queue_index:bounds(Qi2),
+              {ReadA, Qi4} = rabbit_queue_index:read(0, SegmentSize, Qi3),
+              ok = verify_read_with_published(false, false, ReadA,
+                                              lists:reverse(SeqIdsGuidsA)),
+              %% should get length back as 0, as all the msgs were transient
+              {0, _Terms1, Qi6} = restart_test_queue(Qi4),
+              {0, 0, Qi7} = rabbit_queue_index:bounds(Qi6),
+              {Qi8, SeqIdsGuidsB} = queue_index_publish(SeqIdsB, true, Qi7),
+              {0, TwoSegs, Qi9} = rabbit_queue_index:bounds(Qi8),
+              {ReadB, Qi10} = rabbit_queue_index:read(0, SegmentSize, Qi9),
+              ok = verify_read_with_published(false, true, ReadB,
+                                              lists:reverse(SeqIdsGuidsB)),
+              %% should get length back as MostOfASegment
+              LenB = length(SeqIdsB),
+              {LenB, _Terms2, Qi12} = restart_test_queue(Qi10),
+              {0, TwoSegs, Qi13} = rabbit_queue_index:bounds(Qi12),
+              Qi14 = rabbit_queue_index:deliver(SeqIdsB, Qi13),
+              {ReadC, Qi15} = rabbit_queue_index:read(0, SegmentSize, Qi14),
+              ok = verify_read_with_published(true, true, ReadC,
+                                              lists:reverse(SeqIdsGuidsB)),
+              Qi16 = rabbit_queue_index:ack(SeqIdsB, Qi15),
+              Qi17 = rabbit_queue_index:flush(Qi16),
+              %% Everything will have gone now because #pubs == #acks
+              {0, 0, Qi18} = rabbit_queue_index:bounds(Qi17),
+              %% should get length back as 0 because all persistent
+              %% msgs have been acked
+              {0, _Terms3, Qi19} = restart_test_queue(Qi18),
+              Qi19
+      end),
+
+    %% These next bits are just to hit the auto deletion of segment files.
+    %% First, partials:
+    %% a) partial pub+del+ack, then move to new segment
+    with_empty_test_queue(
+      fun (Qi0) ->
+              {Qi1, _SeqIdsGuidsC} = queue_index_publish(SeqIdsC,
+                                                         false, Qi0),
+              Qi2 = rabbit_queue_index:deliver(SeqIdsC, Qi1),
+              Qi3 = rabbit_queue_index:ack(SeqIdsC, Qi2),
+              Qi4 = rabbit_queue_index:flush(Qi3),
+              {Qi5, _SeqIdsGuidsC1} = queue_index_publish([SegmentSize],
+                                                          false, Qi4),
+              Qi5
+      end),
+
+    %% b) partial pub+del, then move to new segment, then ack all in old segment
+    with_empty_test_queue(
+      fun (Qi0) ->
+              {Qi1, _SeqIdsGuidsC2} = queue_index_publish(SeqIdsC,
+                                                          false, Qi0),
+              Qi2 = rabbit_queue_index:deliver(SeqIdsC, Qi1),
+              {Qi3, _SeqIdsGuidsC3} = queue_index_publish([SegmentSize],
+                                                          false, Qi2),
+              Qi4 = rabbit_queue_index:ack(SeqIdsC, Qi3),
+              rabbit_queue_index:flush(Qi4)
+      end),
+
+    %% c) just fill up several segments of all pubs, then +dels, then +acks
+    with_empty_test_queue(
+      fun (Qi0) ->
+              {Qi1, _SeqIdsGuidsD} = queue_index_publish(SeqIdsD,
+                                                          false, Qi0),
+              Qi2 = rabbit_queue_index:deliver(SeqIdsD, Qi1),
+              Qi3 = rabbit_queue_index:ack(SeqIdsD, Qi2),
+              rabbit_queue_index:flush(Qi3)
+      end),
+
+    %% d) get messages in all states to a segment, then flush, then do
+    %% the same again, don't flush and read. This will hit all
+    %% possibilities in combining the segment with the journal.
+    with_empty_test_queue(
+      fun (Qi0) ->
+              {Qi1, [Seven,Five,Four|_]} = queue_index_publish([0,1,2,4,5,7],
+                                                               false, Qi0),
+              Qi2 = rabbit_queue_index:deliver([0,1,4], Qi1),
+              Qi3 = rabbit_queue_index:ack([0], Qi2),
+              Qi4 = rabbit_queue_index:flush(Qi3),
+              {Qi5, [Eight,Six|_]} = queue_index_publish([3,6,8], false, Qi4),
+              Qi6 = rabbit_queue_index:deliver([2,3,5,6], Qi5),
+              Qi7 = rabbit_queue_index:ack([1,2,3], Qi6),
+              {[], Qi8} = rabbit_queue_index:read(0, 4, Qi7),
+              {ReadD, Qi9} = rabbit_queue_index:read(4, 7, Qi8),
+              ok = verify_read_with_published(true, false, ReadD,
+                                              [Four, Five, Six]),
+              {ReadE, Qi10} = rabbit_queue_index:read(7, 9, Qi9),
+              ok = verify_read_with_published(false, false, ReadE,
+                                              [Seven, Eight]),
+              Qi10
+      end),
+
+    %% e) as for (d), but use terminate instead of read, which will
+    %% exercise journal_minus_segment, not segment_plus_journal.
+    with_empty_test_queue(
+      fun (Qi0) ->
+              {Qi1, _SeqIdsGuidsE} = queue_index_publish([0,1,2,4,5,7],
+                                                         true, Qi0),
+              Qi2 = rabbit_queue_index:deliver([0,1,4], Qi1),
+              Qi3 = rabbit_queue_index:ack([0], Qi2),
+              {5, _Terms9, Qi4} = restart_test_queue(Qi3),
+              {Qi5, _SeqIdsGuidsF} = queue_index_publish([3,6,8], true, Qi4),
+              Qi6 = rabbit_queue_index:deliver([2,3,5,6], Qi5),
+              Qi7 = rabbit_queue_index:ack([1,2,3], Qi6),
+              {5, _Terms10, Qi8} = restart_test_queue(Qi7),
+              Qi8
+      end),
+
+    ok = rabbit_variable_queue:stop(),
+    ok = rabbit_variable_queue:start([]),
+
+    passed.
+
+variable_queue_publish(IsPersistent, Count, VQ) ->
+    lists:foldl(
+      fun (_N, VQN) ->
+              rabbit_variable_queue:publish(
+                rabbit_basic:message(
+                  rabbit_misc:r(<<>>, exchange, <<>>),
+                  <<>>, #'P_basic'{delivery_mode = case IsPersistent of
+                                                       true  -> 2;
+                                                       false -> 1
+                                                   end}, <<>>), VQN)
+      end, VQ, lists:seq(1, Count)).
+
+variable_queue_fetch(Count, IsPersistent, IsDelivered, Len, VQ) ->
+    lists:foldl(fun (N, {VQN, AckTagsAcc}) ->
+                        Rem = Len - N,
+                        {{#basic_message { is_persistent = IsPersistent },
+                          IsDelivered, AckTagN, Rem}, VQM} =
+                            rabbit_variable_queue:fetch(true, VQN),
+                        {VQM, [AckTagN | AckTagsAcc]}
+                end, {VQ, []}, lists:seq(1, Count)).
+
+assert_prop(List, Prop, Value) ->
+    Value = proplists:get_value(Prop, List).
+
+assert_props(List, PropVals) ->
+    [assert_prop(List, Prop, Value) || {Prop, Value} <- PropVals].
+
+with_fresh_variable_queue(Fun) ->
+    ok = empty_test_queue(),
+    VQ = rabbit_variable_queue:init(test_queue(), true, false),
+    S0 = rabbit_variable_queue:status(VQ),
+    assert_props(S0, [{q1, 0}, {q2, 0},
+                      {delta, {delta, undefined, 0, undefined}},
+                      {q3, 0}, {q4, 0},
+                      {len, 0}]),
+    _ = rabbit_variable_queue:delete_and_terminate(Fun(VQ)),
+    passed.
+    
+test_variable_queue() ->
+    [passed = with_fresh_variable_queue(F) ||
+        F <- [fun test_variable_queue_dynamic_duration_change/1,
+              fun test_variable_queue_partial_segments_delta_thing/1,
+              fun test_variable_queue_all_the_bits_not_covered_elsewhere1/1,
+              fun test_variable_queue_all_the_bits_not_covered_elsewhere2/1]],
+    passed.
+
+test_variable_queue_dynamic_duration_change(VQ0) ->
+    SegmentSize = rabbit_queue_index:next_segment_boundary(0),
+
+    %% start by sending in a couple of segments worth
+    Len = 2*SegmentSize,
+    VQ1 = variable_queue_publish(false, Len, VQ0),
+
+    %% squeeze and relax queue
+    Churn = Len div 32,
+    VQ2 = publish_fetch_and_ack(Churn, Len, VQ1),
+    {Duration, VQ3} = rabbit_variable_queue:ram_duration(VQ2),
+    VQ7 = lists:foldl(
+            fun (Duration1, VQ4) ->
+                    {_Duration, VQ5} = rabbit_variable_queue:ram_duration(VQ4),
+                    io:format("~p:~n~p~n",
+                              [Duration1, rabbit_variable_queue:status(VQ5)]),
+                    VQ6 = rabbit_variable_queue:set_ram_duration_target(
+                            Duration1, VQ5),
+                    publish_fetch_and_ack(Churn, Len, VQ6)
+            end, VQ3, [Duration / 4, 0, Duration / 4, infinity]),
+
+    %% drain
+    {VQ8, AckTags} = variable_queue_fetch(Len, false, false, Len, VQ7),
+    VQ9 = rabbit_variable_queue:ack(AckTags, VQ8),
+    {empty, VQ10} = rabbit_variable_queue:fetch(true, VQ9),
+
+    VQ10.
+
+publish_fetch_and_ack(0, _Len, VQ0) ->
+    VQ0;
+publish_fetch_and_ack(N, Len, VQ0) ->
+    VQ1 = variable_queue_publish(false, 1, VQ0),
+    {{_Msg, false, AckTag, Len}, VQ2} = rabbit_variable_queue:fetch(true, VQ1),
+    publish_fetch_and_ack(N-1, Len, rabbit_variable_queue:ack([AckTag], VQ2)).
+
+test_variable_queue_partial_segments_delta_thing(VQ0) ->
+    SegmentSize = rabbit_queue_index:next_segment_boundary(0),
+    HalfSegment = SegmentSize div 2,
+    OneAndAHalfSegment = SegmentSize + HalfSegment,
+    VQ1 = variable_queue_publish(true, OneAndAHalfSegment, VQ0),
+    {_Duration, VQ2} = rabbit_variable_queue:ram_duration(VQ1),
+    VQ3 = check_variable_queue_status(
+            rabbit_variable_queue:set_ram_duration_target(0, VQ2),
+            %% one segment in q3 as betas, and half a segment in delta
+            [{delta, {delta, SegmentSize, HalfSegment, OneAndAHalfSegment}},
+             {q3, SegmentSize},
+             {len, SegmentSize + HalfSegment}]),
+    VQ4 = rabbit_variable_queue:set_ram_duration_target(infinity, VQ3),
+    VQ5 = check_variable_queue_status(
+            variable_queue_publish(true, 1, VQ4),
+            %% one alpha, but it's in the same segment as the deltas
+            [{q1, 1},
+             {delta, {delta, SegmentSize, HalfSegment, OneAndAHalfSegment}},
+             {q3, SegmentSize},
+             {len, SegmentSize + HalfSegment + 1}]),
+    {VQ6, AckTags} = variable_queue_fetch(SegmentSize, true, false,
+                                          SegmentSize + HalfSegment + 1, VQ5),
+    VQ7 = check_variable_queue_status(
+            VQ6,
+            %% the half segment should now be in q3 as betas
+            [{q1, 1},
+             {delta, {delta, undefined, 0, undefined}},
+             {q3, HalfSegment},
+             {len, HalfSegment + 1}]),
+    {VQ8, AckTags1} = variable_queue_fetch(HalfSegment + 1, true, false,
+                                           HalfSegment + 1, VQ7),
+    VQ9 = rabbit_variable_queue:ack(AckTags ++ AckTags1, VQ8),
+    %% should be empty now
+    {empty, VQ10} = rabbit_variable_queue:fetch(true, VQ9),
+    VQ10.
+
+check_variable_queue_status(VQ0, Props) ->
+    VQ1 = variable_queue_wait_for_shuffling_end(VQ0),
+    S = rabbit_variable_queue:status(VQ1),
+    io:format("~p~n", [S]),
+    assert_props(S, Props),
+    VQ1.
+
+variable_queue_wait_for_shuffling_end(VQ) ->
+    case rabbit_variable_queue:needs_idle_timeout(VQ) of
+        true  -> variable_queue_wait_for_shuffling_end(
+                  rabbit_variable_queue:idle_timeout(VQ));
+        false -> VQ
+    end.
+
+test_variable_queue_all_the_bits_not_covered_elsewhere1(VQ0) ->
+    Count = 2 * rabbit_queue_index:next_segment_boundary(0),
+    VQ1 = variable_queue_publish(true, Count, VQ0),
+    VQ2 = variable_queue_publish(false, Count, VQ1),
+    VQ3 = rabbit_variable_queue:set_ram_duration_target(0, VQ2),
+    {VQ4, _AckTags}  = variable_queue_fetch(Count, true, false,
+                                            Count + Count, VQ3),
+    {VQ5, _AckTags1} = variable_queue_fetch(Count, false, false,
+                                            Count, VQ4),
+    _VQ6 = rabbit_variable_queue:terminate(VQ5),
+    VQ7 = rabbit_variable_queue:init(test_queue(), true, true),
+    {{_Msg1, true, _AckTag1, Count1}, VQ8} =
+        rabbit_variable_queue:fetch(true, VQ7),
+    VQ9 = variable_queue_publish(false, 1, VQ8),
+    VQ10 = rabbit_variable_queue:set_ram_duration_target(0, VQ9),
+    {VQ11, _AckTags2} = variable_queue_fetch(Count1, true, true, Count, VQ10),
+    {VQ12, _AckTags3} = variable_queue_fetch(1, false, false, 1, VQ11),
+    VQ12.
+
+test_variable_queue_all_the_bits_not_covered_elsewhere2(VQ0) ->
+    VQ1 = rabbit_variable_queue:set_ram_duration_target(0, VQ0),
+    VQ2 = variable_queue_publish(false, 4, VQ1),
+    {VQ3, AckTags} = variable_queue_fetch(2, false, false, 4, VQ2),
+    VQ4 = rabbit_variable_queue:requeue(AckTags, VQ3),
+    VQ5 = rabbit_variable_queue:idle_timeout(VQ4),
+    _VQ6 = rabbit_variable_queue:terminate(VQ5),
+    VQ7 = rabbit_variable_queue:init(test_queue(), true, true),
+    {empty, VQ8} = rabbit_variable_queue:fetch(false, VQ7),
+    VQ8.
+
+test_queue_recover() ->
+    Count = 2 * rabbit_queue_index:next_segment_boundary(0),
+    TxID = rabbit_guid:guid(),
+    {new, #amqqueue { pid = QPid, name = QName }} =
+        rabbit_amqqueue:declare(test_queue(), true, false, [], none),
+    Msg = rabbit_basic:message(rabbit_misc:r(<<>>, exchange, <<>>),
+                               <<>>, #'P_basic'{delivery_mode = 2}, <<>>),
+    Delivery = #delivery{mandatory = false, immediate = false, txn = TxID,
+                         sender = self(), message = Msg},
+    [true = rabbit_amqqueue:deliver(QPid, Delivery) ||
+        _ <- lists:seq(1, Count)],
+    rabbit_amqqueue:commit_all([QPid], TxID, self()),
+    exit(QPid, kill),
+    MRef = erlang:monitor(process, QPid),
+    receive {'DOWN', MRef, process, QPid, _Info} -> ok
+    after 10000 -> exit(timeout_waiting_for_queue_death)
+    end,
+    rabbit_amqqueue:stop(),
+    ok = rabbit_amqqueue:start(),
+    rabbit_amqqueue:with_or_die(
+      QName,
+      fun (Q1 = #amqqueue { pid = QPid1 }) ->
+              CountMinusOne = Count - 1,
+              {ok, CountMinusOne, {QName, QPid1, _AckTag, true, _Msg}} =
+                  rabbit_amqqueue:basic_get(Q1, self(), false),
+              exit(QPid1, shutdown),
+              VQ1 = rabbit_variable_queue:init(QName, true, true),
+              {{_Msg1, true, _AckTag1, CountMinusOne}, VQ2} =
+                  rabbit_variable_queue:fetch(true, VQ1),
+              _VQ3 = rabbit_variable_queue:delete_and_terminate(VQ2),
+              rabbit_amqqueue:internal_delete(QName)
+      end),
+    passed.
diff --git a/src/rabbit_types.erl b/src/rabbit_types.erl
new file mode 100644
index 00000000..3aaf1917
--- /dev/null
+++ b/src/rabbit_types.erl
@@ -0,0 +1,147 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2010 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2010 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(rabbit_types).
+
+-include("rabbit.hrl").
+
+-ifdef(use_specs).
+
+-export_type([txn/0, maybe/1, info/0, info_key/0, message/0, basic_message/0,
+              delivery/0, content/0, decoded_content/0, undecoded_content/0,
+              unencoded_content/0, encoded_content/0, vhost/0, ctag/0,
+              amqp_error/0, r/1, r2/2, r3/3, ssl_socket/0, listener/0,
+              binding/0, amqqueue/0, exchange/0, connection/0, protocol/0,
+              user/0, error/1, ok_or_error/1, ok_or_error2/2, ok/1]).
+
+-type(maybe(T) :: T | 'none').
+-type(vhost() :: binary()).
+-type(ctag() :: binary()).
+
+%% TODO: make this more precise by tying specific class_ids to
+%% specific properties
+-type(undecoded_content() ::
+      #content{class_id              :: rabbit_framing:amqp_class_id(),
+               properties            :: 'none',
+               properties_bin        :: binary(),
+               payload_fragments_rev :: [binary()]} |
+      #content{class_id              :: rabbit_framing:amqp_class_id(),
+               properties            :: rabbit_framing:amqp_property_record(),
+               properties_bin        :: 'none',
+               payload_fragments_rev :: [binary()]}).
+-type(unencoded_content() :: undecoded_content()).
+-type(decoded_content() ::
+      #content{class_id              :: rabbit_framing:amqp_class_id(),
+               properties            :: rabbit_framing:amqp_property_record(),
+               properties_bin        :: maybe(binary()),
+               payload_fragments_rev :: [binary()]}).
+-type(encoded_content() ::
+      #content{class_id       :: rabbit_framing:amqp_class_id(),
+               properties     :: maybe(rabbit_framing:amqp_property_record()),
+               properties_bin        :: binary(),
+               payload_fragments_rev :: [binary()]}).
+-type(content() :: undecoded_content() | decoded_content()).
+-type(basic_message() ::
+      #basic_message{exchange_name  :: rabbit_exchange:name(),
+                     routing_key    :: rabbit_router:routing_key(),
+                     content        :: content(),
+                     guid           :: rabbit_guid:guid(),
+                     is_persistent  :: boolean()}).
+-type(message() :: basic_message()).
+-type(delivery() ::
+      #delivery{mandatory :: boolean(),
+                immediate :: boolean(),
+                txn       :: maybe(txn()),
+                sender    :: pid(),
+                message   :: message()}).
+
+%% this is really an abstract type, but dialyzer does not support them
+-type(txn() :: rabbit_guid:guid()).
+
+-type(info_key() :: atom()).
+-type(info() :: {info_key(), any()}).
+
+-type(amqp_error() ::
+      #amqp_error{name        :: rabbit_framing:amqp_exception(),
+                  explanation :: string(),
+                  method      :: rabbit_framing:amqp_method_name()}).
+
+-type(r(Kind) ::
+        r2(vhost(), Kind)).
+-type(r2(VirtualHost, Kind) ::
+        r3(VirtualHost, Kind, rabbit_misc:resource_name())).
+-type(r3(VirtualHost, Kind, Name) ::
+        #resource{virtual_host :: VirtualHost,
+                  kind         :: Kind,
+                  name         :: Name}).
+
+-type(ssl_socket() :: #ssl_socket{}).
+
+-type(listener() ::
+      #listener{node     :: node(),
+                protocol :: atom(),
+                host     :: rabbit_networking:hostname(),
+                port     :: rabbit_networking:ip_port()}).
+
+-type(binding() ::
+      #binding{exchange_name    :: rabbit_exchange:name(),
+               queue_name       :: rabbit_amqqueue:name(),
+               key              :: rabbit_exchange:binding_key()}).
+
+-type(amqqueue() ::
+      #amqqueue{name            :: rabbit_amqqueue:name(),
+                durable         :: boolean(),
+                auto_delete     :: boolean(),
+                exclusive_owner :: rabbit_types:maybe(pid()),
+                arguments       :: rabbit_framing:amqp_table(),
+                pid             :: rabbit_types:maybe(pid())}).
+
+-type(exchange() ::
+      #exchange{name        :: rabbit_exchange:name(),
+                type        :: rabbit_exchange:type(),
+                durable     :: boolean(),
+                auto_delete :: boolean(),
+                arguments   :: rabbit_framing:amqp_table()}).
+
+-type(connection() :: pid()).
+
+-type(protocol() :: atom()).
+
+-type(user() ::
+      #user{username :: rabbit_access_control:username(),
+            password :: rabbit_access_control:password()}).
+
+-type(ok(A) :: {'ok', A}).
+-type(error(A) :: {'error', A}).
+-type(ok_or_error(A) :: 'ok' | error(A)).
+-type(ok_or_error2(A, B) :: ok(A) | error(B)).
+
+-endif. % use_specs
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
new file mode 100644
index 00000000..0f52eee8
--- /dev/null
+++ b/src/rabbit_variable_queue.erl
@@ -0,0 +1,1433 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2010 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2010 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(rabbit_variable_queue).
+
+-export([init/3, terminate/1, delete_and_terminate/1,
+         purge/1, publish/2, publish_delivered/3, fetch/2, ack/2,
+         tx_publish/3, tx_ack/3, tx_rollback/2, tx_commit/3,
+         requeue/2, len/1, is_empty/1,
+         set_ram_duration_target/2, ram_duration/1,
+         needs_idle_timeout/1, idle_timeout/1, handle_pre_hibernate/1,
+         status/1]).
+
+-export([start/1, stop/0]).
+
+%% exported for testing only
+-export([start_msg_store/2, stop_msg_store/0]).
+
+%%----------------------------------------------------------------------------
+%% Definitions:
+
+%% alpha: this is a message where both the message itself, and its
+%%        position within the queue are held in RAM
+%%
+%% beta: this is a message where the message itself is only held on
+%%        disk, but its position within the queue is held in RAM.
+%%
+%% gamma: this is a message where the message itself is only held on
+%%        disk, but its position is both in RAM and on disk.
+%%
+%% delta: this is a collection of messages, represented by a single
+%%        term, where the messages and their position are only held on
+%%        disk.
+%%
+%% Note that for persistent messages, the message and its position
+%% within the queue are always held on disk, *in addition* to being in
+%% one of the above classifications.
+%%
+%% Also note that within this code, the term gamma never
+%% appears. Instead, gammas are defined by betas who have had their
+%% queue position recorded on disk.
+%%
+%% In general, messages move q1 -> q2 -> delta -> q3 -> q4, though
+%% many of these steps are frequently skipped. q1 and q4 only hold
+%% alphas, q2 and q3 hold both betas and gammas (as queues of queues,
+%% using the bpqueue module where the block prefix determines whether
+%% they're betas or gammas). When a message arrives, its
+%% classification is determined. It is then added to the rightmost
+%% appropriate queue.
+%%
+%% If a new message is determined to be a beta or gamma, q1 is
+%% empty. If a new message is determined to be a delta, q1 and q2 are
+%% empty (and actually q4 too).
+%%
+%% When removing messages from a queue, if q4 is empty then q3 is read
+%% directly. If q3 becomes empty then the next segment's worth of
+%% messages from delta are read into q3, reducing the size of
+%% delta. If the queue is non empty, either q4 or q3 contain
+%% entries. It is never permitted for delta to hold all the messages
+%% in the queue.
+%%
+%% The duration indicated to us by the memory_monitor is used to
+%% calculate, given our current ingress and egress rates, how many
+%% messages we should hold in RAM. When we need to push alphas to
+%% betas or betas to gammas, we favour writing out messages that are
+%% further from the head of the queue. This minimises writes to disk,
+%% as the messages closer to the tail of the queue stay in the queue
+%% for longer, thus do not need to be replaced as quickly by sending
+%% other messages to disk.
+%%
+%% Whilst messages are pushed to disk and forgotten from RAM as soon
+%% as requested by a new setting of the queue RAM duration, the
+%% inverse is not true: we only load messages back into RAM as
+%% demanded as the queue is read from. Thus only publishes to the
+%% queue will take up available spare capacity.
+%%
+%% When we report our duration to the memory monitor, we calculate
+%% average ingress and egress rates over the last two samples, and
+%% then calculate our duration based on the sum of the ingress and
+%% egress rates. More than two samples could be used, but it's a
+%% balance between responding quickly enough to changes in
+%% producers/consumers versus ignoring temporary blips. The problem
+%% with temporary blips is that with just a few queues, they can have
+%% substantial impact on the calculation of the average duration and
+%% hence cause unnecessary I/O. Another alternative is to increase the
+%% amqqueue_process:RAM_DURATION_UPDATE_PERIOD to beyond 5
+%% seconds. However, that then runs the risk of being too slow to
+%% inform the memory monitor of changes. Thus a 5 second interval,
+%% plus a rolling average over the last two samples seems to work
+%% well in practice.
+%%
+%% The sum of the ingress and egress rates is used because the egress
+%% rate alone is not sufficient. Adding in the ingress rate means that
+%% queues which are being flooded by messages are given more memory,
+%% resulting in them being able to process the messages faster (by
+%% doing less I/O, or at least deferring it) and thus helping keep
+%% their mailboxes empty and thus the queue as a whole is more
+%% responsive. If such a queue also has fast but previously idle
+%% consumers, the consumer can then start to be driven as fast as it
+%% can go, whereas if only egress rate was being used, the incoming
+%% messages may have to be written to disk and then read back in,
+%% resulting in the hard disk being a bottleneck in driving the
+%% consumers. Generally, we want to give Rabbit every chance of
+%% getting rid of messages as fast as possible and remaining
+%% responsive, and using only the egress rate impacts that goal.
+%%
+%% If a queue is full of transient messages, then the transition from
+%% betas to deltas will be potentially very expensive as millions of
+%% entries must be written to disk by the queue_index module. This can
+%% badly stall the queue. In order to avoid this, the proportion of
+%% gammas / (betas+gammas) must not be lower than (betas+gammas) /
+%% (alphas+betas+gammas). As the queue grows or available memory
+%% shrinks, the latter ratio increases, requiring the conversion of
+%% more gammas to betas in order to maintain the invariant. At the
+%% point at which betas and gammas must be converted to deltas, there
+%% should be very few betas remaining, thus the transition is fast (no
+%% work needs to be done for the gamma -> delta transition).
+%%
+%% The conversion of betas to gammas is done in batches of exactly
+%% ?IO_BATCH_SIZE. This value should not be too small, otherwise the
+%% frequent operations on the queues of q2 and q3 will not be
+%% effectively amortised (switching the direction of queue access
+%% defeats amortisation), nor should it be too big, otherwise
+%% converting a batch stalls the queue for too long. Therefore, it
+%% must be just right. ram_index_count is used here and is the number
+%% of betas.
+%%
+%% The conversion from alphas to betas is also chunked, but only to
+%% ensure no more than ?IO_BATCH_SIZE alphas are converted to betas at
+%% any one time. This further smooths the effects of changes to the
+%% target_ram_msg_count and ensures the queue remains responsive
+%% even when there is a large amount of IO work to do. The
+%% idle_timeout callback is utilised to ensure that conversions are
+%% done as promptly as possible whilst ensuring the queue remains
+%% responsive.
+%%
+%% In the queue we keep track of both messages that are pending
+%% delivery and messages that are pending acks. This ensures that
+%% purging (deleting the former) and deletion (deleting the former and
+%% the latter) are both cheap and do require any scanning through qi
+%% segments.
+%%
+%% Notes on Clean Shutdown
+%% (This documents behaviour in variable_queue, queue_index and
+%% msg_store.)
+%%
+%% In order to try to achieve as fast a start-up as possible, if a
+%% clean shutdown occurs, we try to save out state to disk to reduce
+%% work on startup. In the msg_store this takes the form of the
+%% index_module's state, plus the file_summary ets table, and client
+%% refs. In the VQ, this takes the form of the count of persistent
+%% messages in the queue and references into the msg_stores. The
+%% queue_index adds to these terms the details of its segments and
+%% stores the terms in the queue directory.
+%%
+%% Two message stores are used. One is created for persistent messages
+%% to durable queues that must survive restarts, and the other is used
+%% for all other messages that just happen to need to be written to
+%% disk. On start up we can therefore nuke the transient message
+%% store, and be sure that the messages in the persistent store are
+%% all that we need.
+%%
+%% The references to the msg_stores are there so that the msg_store
+%% knows to only trust its saved state if all of the queues it was
+%% previously talking to come up cleanly. Likewise, the queues
+%% themselves (esp queue_index) skips work in init if all the queues
+%% and msg_store were shutdown cleanly. This gives both good speed
+%% improvements and also robustness so that if anything possibly went
+%% wrong in shutdown (or there was subsequent manual tampering), all
+%% messages and queues that can be recovered are recovered, safely.
+%%
+%% To delete transient messages lazily, the variable_queue, on
+%% startup, stores the next_seq_id reported by the queue_index as the
+%% transient_threshold. From that point on, whenever it's reading a
+%% message off disk via the queue_index, if the seq_id is below this
+%% threshold and the message is transient then it drops the message
+%% (the message itself won't exist on disk because it would have been
+%% stored in the transient msg_store which would have had its saved
+%% state nuked on startup). This avoids the expensive operation of
+%% scanning the entire queue on startup in order to delete transient
+%% messages that were only pushed to disk to save memory.
+%%
+%%----------------------------------------------------------------------------
+
+-behaviour(rabbit_backing_queue).
+
+-record(vqstate,
+        { q1,
+          q2,
+          delta,
+          q3,
+          q4,
+          next_seq_id,
+          pending_ack,
+          index_state,
+          msg_store_clients,
+          on_sync,
+          durable,
+          transient_threshold,
+
+          len,
+          persistent_count,
+
+          duration_target,
+          target_ram_msg_count,
+          ram_msg_count,
+          ram_msg_count_prev,
+          ram_index_count,
+          out_counter,
+          in_counter,
+          rates
+         }).
+
+-record(rates, { egress, ingress, avg_egress, avg_ingress, timestamp }).
+
+-record(msg_status,
+        { seq_id,
+          guid,
+          msg,
+          is_persistent,
+          is_delivered,
+          msg_on_disk,
+          index_on_disk
+         }).
+
+-record(delta,
+        { start_seq_id, %% start_seq_id is inclusive
+          count,
+          end_seq_id    %% end_seq_id is exclusive
+         }).
+
+-record(tx, { pending_messages, pending_acks }).
+
+-record(sync, { acks_persistent, acks_all, pubs, funs }).
+
+%% When we discover, on publish, that we should write some indices to
+%% disk for some betas, the RAM_INDEX_BATCH_SIZE sets the number of
+%% betas that we must be due to write indices for before we do any
+%% work at all. This is both a minimum and a maximum - we don't write
+%% fewer than RAM_INDEX_BATCH_SIZE indices out in one go, and we don't
+%% write more - we can always come back on the next publish to do
+%% more.
+-define(IO_BATCH_SIZE, 64).
+-define(PERSISTENT_MSG_STORE, msg_store_persistent).
+-define(TRANSIENT_MSG_STORE,  msg_store_transient).
+
+-include("rabbit.hrl").
+
+%%----------------------------------------------------------------------------
+
+-ifdef(use_specs).
+
+-type(timestamp() :: {non_neg_integer(), non_neg_integer(), non_neg_integer()}).
+-type(seq_id()  :: non_neg_integer()).
+-type(ack()     :: seq_id() | 'blank_ack').
+
+-type(rates() :: #rates { egress      :: {timestamp(), non_neg_integer()},
+                          ingress     :: {timestamp(), non_neg_integer()},
+                          avg_egress  :: float(),
+                          avg_ingress :: float(),
+                          timestamp   :: timestamp() }).
+
+-type(delta() :: #delta { start_seq_id :: non_neg_integer(),
+                          count        :: non_neg_integer (),
+                          end_seq_id   :: non_neg_integer() }).
+
+-type(sync() :: #sync { acks_persistent :: [[seq_id()]],
+                        acks_all        :: [[seq_id()]],
+                        pubs            :: [[rabbit_guid:guid()]],
+                        funs            :: [fun (() -> any())] }).
+
+-type(state() :: #vqstate {
+             q1                   :: queue(),
+             q2                   :: bpqueue:bpqueue(),
+             delta                :: delta(),
+             q3                   :: bpqueue:bpqueue(),
+             q4                   :: queue(),
+             next_seq_id          :: seq_id(),
+             pending_ack          :: dict:dictionary(),
+             index_state          :: any(),
+             msg_store_clients    :: 'undefined' | {{any(), binary()},
+                                                    {any(), binary()}},
+             on_sync              :: sync(),
+             durable              :: boolean(),
+
+             len                  :: non_neg_integer(),
+             persistent_count     :: non_neg_integer(),
+
+             transient_threshold  :: non_neg_integer(),
+             duration_target      :: number() | 'infinity',
+             target_ram_msg_count :: non_neg_integer() | 'infinity',
+             ram_msg_count        :: non_neg_integer(),
+             ram_msg_count_prev   :: non_neg_integer(),
+             ram_index_count      :: non_neg_integer(),
+             out_counter          :: non_neg_integer(),
+             in_counter           :: non_neg_integer(),
+             rates                :: rates() }).
+
+-include("rabbit_backing_queue_spec.hrl").
+
+-endif.
+
+-define(BLANK_DELTA, #delta { start_seq_id = undefined,
+                              count        = 0,
+                              end_seq_id   = undefined }).
+-define(BLANK_DELTA_PATTERN(Z), #delta { start_seq_id = Z,
+                                         count        = 0,
+                                         end_seq_id   = Z }).
+
+-define(BLANK_SYNC, #sync { acks_persistent = [],
+                            acks_all        = [],
+                            pubs            = [],
+                            funs            = [] }).
+
+%%----------------------------------------------------------------------------
+%% Public API
+%%----------------------------------------------------------------------------
+
+start(DurableQueues) ->
+    {AllTerms, StartFunState} = rabbit_queue_index:recover(DurableQueues),
+    start_msg_store(
+      [Ref || Terms <- AllTerms,
+              begin
+                  Ref = proplists:get_value(persistent_ref, Terms),
+                  Ref =/= undefined
+              end],
+      StartFunState).
+
+stop() -> stop_msg_store().
+
+start_msg_store(Refs, StartFunState) ->
+    ok = rabbit_sup:start_child(?TRANSIENT_MSG_STORE, rabbit_msg_store,
+                                [?TRANSIENT_MSG_STORE, rabbit_mnesia:dir(),
+                                 undefined,  {fun (ok) -> finished end, ok}]),
+    ok = rabbit_sup:start_child(?PERSISTENT_MSG_STORE, rabbit_msg_store,
+                                [?PERSISTENT_MSG_STORE, rabbit_mnesia:dir(),
+                                 Refs, StartFunState]).
+
+stop_msg_store() ->
+    ok = rabbit_sup:stop_child(?PERSISTENT_MSG_STORE),
+    ok = rabbit_sup:stop_child(?TRANSIENT_MSG_STORE).
+
+init(QueueName, IsDurable, Recover) ->
+    {DeltaCount, Terms, IndexState} =
+        rabbit_queue_index:init(
+          QueueName, Recover,
+          rabbit_msg_store:successfully_recovered_state(?PERSISTENT_MSG_STORE),
+          fun (Guid) ->
+                  rabbit_msg_store:contains(?PERSISTENT_MSG_STORE, Guid)
+          end),
+    {LowSeqId, NextSeqId, IndexState1} = rabbit_queue_index:bounds(IndexState),
+
+    {PRef, TRef, Terms1} =
+        case [persistent_ref, transient_ref] -- proplists:get_keys(Terms) of
+            [] -> {proplists:get_value(persistent_ref, Terms),
+                   proplists:get_value(transient_ref, Terms),
+                   Terms};
+            _  -> {rabbit_guid:guid(), rabbit_guid:guid(), []}
+        end,
+    DeltaCount1 = proplists:get_value(persistent_count, Terms1, DeltaCount),
+    Delta = case DeltaCount1 == 0 andalso DeltaCount /= undefined of
+                true  -> ?BLANK_DELTA;
+                false -> #delta { start_seq_id = LowSeqId,
+                                  count        = DeltaCount1,
+                                  end_seq_id   = NextSeqId }
+            end,
+    Now = now(),
+    PersistentClient =
+        case IsDurable of
+            true  -> rabbit_msg_store:client_init(?PERSISTENT_MSG_STORE, PRef);
+            false -> undefined
+        end,
+    TransientClient  = rabbit_msg_store:client_init(?TRANSIENT_MSG_STORE, TRef),
+    State = #vqstate {
+      q1                   = queue:new(),
+      q2                   = bpqueue:new(),
+      delta                = Delta,
+      q3                   = bpqueue:new(),
+      q4                   = queue:new(),
+      next_seq_id          = NextSeqId,
+      pending_ack          = dict:new(),
+      index_state          = IndexState1,
+      msg_store_clients    = {{PersistentClient, PRef},
+                              {TransientClient, TRef}},
+      on_sync              = ?BLANK_SYNC,
+      durable              = IsDurable,
+      transient_threshold  = NextSeqId,
+
+      len                  = DeltaCount1,
+      persistent_count     = DeltaCount1,
+
+      duration_target      = infinity,
+      target_ram_msg_count = infinity,
+      ram_msg_count        = 0,
+      ram_msg_count_prev   = 0,
+      ram_index_count      = 0,
+      out_counter          = 0,
+      in_counter           = 0,
+      rates                = #rates { egress      = {Now, 0},
+                                      ingress     = {Now, DeltaCount1},
+                                      avg_egress  = 0.0,
+                                      avg_ingress = 0.0,
+                                      timestamp   = Now } },
+    a(maybe_deltas_to_betas(State)).
+
+terminate(State) ->
+    State1 = #vqstate { persistent_count  = PCount,
+                        index_state       = IndexState,
+                        msg_store_clients = {{MSCStateP, PRef},
+                                             {MSCStateT, TRef}} } =
+        remove_pending_ack(true, tx_commit_index(State)),
+    case MSCStateP of
+        undefined -> ok;
+        _         -> rabbit_msg_store:client_terminate(MSCStateP)
+    end,
+    rabbit_msg_store:client_terminate(MSCStateT),
+    Terms = [{persistent_ref, PRef},
+             {transient_ref, TRef},
+             {persistent_count, PCount}],
+    a(State1 #vqstate { index_state       = rabbit_queue_index:terminate(
+                                              Terms, IndexState),
+                        msg_store_clients = undefined }).
+
+%% the only difference between purge and delete is that delete also
+%% needs to delete everything that's been delivered and not ack'd.
+delete_and_terminate(State) ->
+    %% TODO: there is no need to interact with qi at all - which we do
+    %% as part of 'purge' and 'remove_pending_ack', other than
+    %% deleting it.
+    {_PurgeCount, State1} = purge(State),
+    State2 = #vqstate { index_state         = IndexState,
+                        msg_store_clients   = {{MSCStateP, PRef},
+                                               {MSCStateT, TRef}} } =
+        remove_pending_ack(false, State1),
+    IndexState1 = rabbit_queue_index:delete_and_terminate(IndexState),
+    case MSCStateP of
+        undefined -> ok;
+        _         -> rabbit_msg_store:client_delete_and_terminate(
+                       MSCStateP, ?PERSISTENT_MSG_STORE, PRef),
+                     rabbit_msg_store:client_terminate(MSCStateP)
+    end,
+    rabbit_msg_store:client_delete_and_terminate(
+      MSCStateT, ?TRANSIENT_MSG_STORE, TRef),
+    a(State2 #vqstate { index_state       = IndexState1,
+                        msg_store_clients = undefined }).
+
+purge(State = #vqstate { q4 = Q4, index_state = IndexState, len = Len }) ->
+    %% TODO: when there are no pending acks, which is a common case,
+    %% we could simply wipe the qi instead of issuing delivers and
+    %% acks for all the messages.
+    IndexState1 = remove_queue_entries(fun rabbit_misc:queue_fold/3, Q4,
+                                       IndexState),
+    State1 = #vqstate { q1 = Q1, index_state = IndexState2 } =
+        purge_betas_and_deltas(State #vqstate { q4          = queue:new(),
+                                                index_state = IndexState1 }),
+    IndexState3 = remove_queue_entries(fun rabbit_misc:queue_fold/3, Q1,
+                                       IndexState2),
+    {Len, a(State1 #vqstate { q1               = queue:new(),
+                              index_state      = IndexState3,
+                              len              = 0,
+                              ram_msg_count    = 0,
+                              ram_index_count  = 0,
+                              persistent_count = 0 })}.
+
+publish(Msg, State) ->
+    {_SeqId, State1} = publish(Msg, false, false, State),
+    a(reduce_memory_use(State1)).
+
+publish_delivered(false, _Msg, State = #vqstate { len = 0 }) ->
+    {blank_ack, a(State)};
+publish_delivered(true, Msg = #basic_message { is_persistent = IsPersistent },
+                  State = #vqstate { len               = 0,
+                                     next_seq_id       = SeqId,
+                                     out_counter       = OutCount,
+                                     in_counter        = InCount,
+                                     persistent_count  = PCount,
+                                     pending_ack       = PA,
+                                     durable           = IsDurable }) ->
+    IsPersistent1 = IsDurable andalso IsPersistent,
+    MsgStatus = (msg_status(IsPersistent1, SeqId, Msg))
+        #msg_status { is_delivered = true },
+    {MsgStatus1, State1} = maybe_write_to_disk(false, false, MsgStatus, State),
+    PA1 = record_pending_ack(m(MsgStatus1), PA),
+    PCount1 = PCount + one_if(IsPersistent1),
+    {SeqId, a(State1 #vqstate { next_seq_id       = SeqId    + 1,
+                                out_counter       = OutCount + 1,
+                                in_counter        = InCount  + 1,
+                                persistent_count  = PCount1,
+                                pending_ack       = PA1 })}.
+
+fetch(AckRequired, State = #vqstate { q4               = Q4,
+                                      ram_msg_count    = RamMsgCount,
+                                      out_counter      = OutCount,
+                                      index_state      = IndexState,
+                                      len              = Len,
+                                      persistent_count = PCount,
+                                      pending_ack      = PA }) ->
+    case queue:out(Q4) of
+        {empty, _Q4} ->
+            case fetch_from_q3_to_q4(State) of
+                {empty, State1} = Result -> a(State1), Result;
+                {loaded, State1}         -> fetch(AckRequired, State1)
+            end;
+        {{value, MsgStatus = #msg_status {
+                   msg = Msg, guid = Guid, seq_id = SeqId,
+                   is_persistent = IsPersistent, is_delivered = IsDelivered,
+                   msg_on_disk = MsgOnDisk, index_on_disk = IndexOnDisk }},
+         Q4a} ->
+
+            %% 1. Mark it delivered if necessary
+            IndexState1 = maybe_write_delivered(
+                            IndexOnDisk andalso not IsDelivered,
+                            SeqId, IndexState),
+
+            %% 2. Remove from msg_store and queue index, if necessary
+            MsgStore = find_msg_store(IsPersistent),
+            Rem = fun () -> ok = rabbit_msg_store:remove(MsgStore, [Guid]) end,
+            Ack = fun () -> rabbit_queue_index:ack([SeqId], IndexState1) end,
+            IndexState2 =
+                case {AckRequired, MsgOnDisk, IndexOnDisk, IsPersistent} of
+                    {false, true, false,     _} -> Rem(), IndexState1;
+                    {false, true,  true,     _} -> Rem(), Ack();
+                    { true, true,  true, false} -> Ack();
+                    _                           -> IndexState1
+                end,
+
+            %% 3. If an ack is required, add something sensible to PA
+            {AckTag, PA1} = case AckRequired of
+                                true  -> PA2 = record_pending_ack(
+                                                 MsgStatus #msg_status {
+                                                   is_delivered = true }, PA),
+                                         {SeqId, PA2};
+                                false -> {blank_ack, PA}
+                            end,
+
+            PCount1 = PCount - one_if(IsPersistent andalso not AckRequired),
+            Len1 = Len - 1,
+            {{Msg, IsDelivered, AckTag, Len1},
+             a(State #vqstate { q4               = Q4a,
+                                ram_msg_count    = RamMsgCount - 1,
+                                out_counter      = OutCount + 1,
+                                index_state      = IndexState2,
+                                len              = Len1,
+                                persistent_count = PCount1,
+                                pending_ack      = PA1 })}
+    end.
+
+ack(AckTags, State) ->
+    a(ack(fun rabbit_msg_store:remove/2,
+          fun (_AckEntry, State1) -> State1 end,
+          AckTags, State)).
+
+tx_publish(Txn, Msg = #basic_message { is_persistent = IsPersistent },
+           State = #vqstate { durable           = IsDurable,
+                              msg_store_clients = MSCState }) ->
+    Tx = #tx { pending_messages = Pubs } = lookup_tx(Txn),
+    store_tx(Txn, Tx #tx { pending_messages = [Msg | Pubs] }),
+    a(case IsPersistent andalso IsDurable of
+          true  -> MsgStatus = msg_status(true, undefined, Msg),
+                   {#msg_status { msg_on_disk = true }, MSCState1} =
+                       maybe_write_msg_to_disk(false, MsgStatus, MSCState),
+                   State #vqstate { msg_store_clients = MSCState1 };
+          false -> State
+      end).
+
+tx_ack(Txn, AckTags, State) ->
+    Tx = #tx { pending_acks = Acks } = lookup_tx(Txn),
+    store_tx(Txn, Tx #tx { pending_acks = [AckTags | Acks] }),
+    State.
+
+tx_rollback(Txn, State = #vqstate { durable = IsDurable }) ->
+    #tx { pending_acks = AckTags, pending_messages = Pubs } = lookup_tx(Txn),
+    erase_tx(Txn),
+    ok = case IsDurable of
+             true  -> rabbit_msg_store:remove(?PERSISTENT_MSG_STORE,
+                                              persistent_guids(Pubs));
+             false -> ok
+         end,
+    {lists:append(AckTags), a(State)}.
+
+tx_commit(Txn, Fun, State = #vqstate { durable = IsDurable }) ->
+    #tx { pending_acks = AckTags, pending_messages = Pubs } = lookup_tx(Txn),
+    erase_tx(Txn),
+    PubsOrdered = lists:reverse(Pubs),
+    AckTags1 = lists:append(AckTags),
+    PersistentGuids = persistent_guids(PubsOrdered),
+    HasPersistentPubs = PersistentGuids =/= [],
+    {AckTags1,
+     a(case IsDurable andalso HasPersistentPubs of
+           true  -> ok = rabbit_msg_store:sync(
+                           ?PERSISTENT_MSG_STORE, PersistentGuids,
+                           msg_store_callback(PersistentGuids,
+                                              PubsOrdered, AckTags1, Fun)),
+                    State;
+           false -> tx_commit_post_msg_store(
+                      HasPersistentPubs, PubsOrdered, AckTags1, Fun, State)
+       end)}.
+
+requeue(AckTags, State) ->
+    a(reduce_memory_use(
+        ack(fun rabbit_msg_store:release/2,
+            fun (#msg_status { msg = Msg }, State1) ->
+                    {_SeqId, State2} = publish(Msg, true, false, State1),
+                    State2;
+                ({IsPersistent, Guid}, State1) ->
+                    #vqstate { msg_store_clients = MSCState } = State1,
+                    {{ok, Msg = #basic_message{}}, MSCState1} =
+                        read_from_msg_store(MSCState, IsPersistent, Guid),
+                    State2 = State1 #vqstate { msg_store_clients = MSCState1 },
+                    {_SeqId, State3} = publish(Msg, true, true, State2),
+                    State3
+            end,
+            AckTags, State))).
+
+len(#vqstate { len = Len }) -> Len.
+
+is_empty(State) -> 0 == len(State).
+
+set_ram_duration_target(DurationTarget,
+                        State = #vqstate {
+                          rates = #rates { avg_egress  = AvgEgressRate,
+                                           avg_ingress = AvgIngressRate },
+                          target_ram_msg_count = TargetRamMsgCount }) ->
+    Rate = AvgEgressRate + AvgIngressRate,
+    TargetRamMsgCount1 =
+        case DurationTarget of
+            infinity  -> infinity;
+            _         -> trunc(DurationTarget * Rate) %% msgs = sec * msgs/sec
+        end,
+    State1 = State #vqstate { target_ram_msg_count = TargetRamMsgCount1,
+                              duration_target      = DurationTarget },
+    a(case TargetRamMsgCount1 == infinity orelse
+          (TargetRamMsgCount =/= infinity andalso
+           TargetRamMsgCount1 >= TargetRamMsgCount) of
+          true  -> State1;
+          false -> reduce_memory_use(State1)
+      end).
+
+ram_duration(State = #vqstate {
+               rates              = #rates { egress    = Egress,
+                                             ingress   = Ingress,
+                                             timestamp = Timestamp } = Rates,
+               in_counter         = InCount,
+               out_counter        = OutCount,
+               ram_msg_count      = RamMsgCount,
+               duration_target    = DurationTarget,
+               ram_msg_count_prev = RamMsgCountPrev }) ->
+    Now = now(),
+    {AvgEgressRate,   Egress1} = update_rate(Now, Timestamp, OutCount, Egress),
+    {AvgIngressRate, Ingress1} = update_rate(Now, Timestamp, InCount, Ingress),
+
+    Duration = %% msgs / (msgs/sec) == sec
+        case AvgEgressRate == 0 andalso AvgIngressRate == 0 of
+            true  -> infinity;
+            false -> (RamMsgCountPrev + RamMsgCount) /
+                         (2 * (AvgEgressRate + AvgIngressRate))
+        end,
+
+    {Duration, set_ram_duration_target(
+                 DurationTarget,
+                 State #vqstate {
+                   rates              = Rates #rates {
+                                          egress      = Egress1,
+                                          ingress     = Ingress1,
+                                          avg_egress  = AvgEgressRate,
+                                          avg_ingress = AvgIngressRate,
+                                          timestamp   = Now },
+                   in_counter         = 0,
+                   out_counter        = 0,
+                   ram_msg_count_prev = RamMsgCount })}.
+
+needs_idle_timeout(State = #vqstate { on_sync = ?BLANK_SYNC }) ->
+    {Res, _State} = reduce_memory_use(fun (_Quota, State1) -> State1 end,
+                                      fun (_Quota, State1) -> State1 end,
+                                      fun (State1)         -> State1 end,
+                                      State),
+    Res;
+needs_idle_timeout(_State) ->
+    true.
+
+idle_timeout(State) -> a(reduce_memory_use(tx_commit_index(State))).
+
+handle_pre_hibernate(State = #vqstate { index_state = IndexState }) ->
+    State #vqstate { index_state = rabbit_queue_index:flush(IndexState) }.
+
+status(#vqstate { q1 = Q1, q2 = Q2, delta = Delta, q3 = Q3, q4 = Q4,
+                  len                  = Len,
+                  pending_ack          = PA,
+                  on_sync              = #sync { funs = From },
+                  target_ram_msg_count = TargetRamMsgCount,
+                  ram_msg_count        = RamMsgCount,
+                  ram_index_count      = RamIndexCount,
+                  next_seq_id          = NextSeqId,
+                  persistent_count     = PersistentCount,
+                  rates                = #rates {
+                    avg_egress  = AvgEgressRate,
+                    avg_ingress = AvgIngressRate } }) ->
+    [ {q1                   , queue:len(Q1)},
+      {q2                   , bpqueue:len(Q2)},
+      {delta                , Delta},
+      {q3                   , bpqueue:len(Q3)},
+      {q4                   , queue:len(Q4)},
+      {len                  , Len},
+      {pending_acks         , dict:size(PA)},
+      {outstanding_txns     , length(From)},
+      {target_ram_msg_count , TargetRamMsgCount},
+      {ram_msg_count        , RamMsgCount},
+      {ram_index_count      , RamIndexCount},
+      {next_seq_id          , NextSeqId},
+      {persistent_count     , PersistentCount},
+      {avg_egress_rate      , AvgEgressRate},
+      {avg_ingress_rate     , AvgIngressRate} ].
+
+%%----------------------------------------------------------------------------
+%% Minor helpers
+%%----------------------------------------------------------------------------
+
+a(State = #vqstate { q1 = Q1, q2 = Q2, delta = Delta, q3 = Q3, q4 = Q4,
+                     len                  = Len,
+                     persistent_count     = PersistentCount,
+                     ram_msg_count        = RamMsgCount,
+                     ram_index_count      = RamIndexCount }) ->
+    E1 = queue:is_empty(Q1),
+    E2 = bpqueue:is_empty(Q2),
+    ED = Delta#delta.count == 0,
+    E3 = bpqueue:is_empty(Q3),
+    E4 = queue:is_empty(Q4),
+    LZ = Len == 0,
+
+    true = E1 or not E3,
+    true = E2 or not ED,
+    true = ED or not E3,
+    true = LZ == (E3 and E4),
+
+    true = Len             >= 0,
+    true = PersistentCount >= 0,
+    true = RamMsgCount     >= 0,
+    true = RamIndexCount   >= 0,
+
+    State.
+
+m(MsgStatus = #msg_status { msg           = Msg,
+                            is_persistent = IsPersistent,
+                            msg_on_disk   = MsgOnDisk,
+                            index_on_disk = IndexOnDisk }) ->
+    true = (not IsPersistent) or IndexOnDisk,
+    true = (not IndexOnDisk) or MsgOnDisk,
+    true = (Msg =/= undefined) or MsgOnDisk,
+
+    MsgStatus.
+
+one_if(true ) -> 1;
+one_if(false) -> 0.
+
+cons_if(true,   E, L) -> [E | L];
+cons_if(false, _E, L) -> L.
+
+msg_status(IsPersistent, SeqId, Msg = #basic_message { guid = Guid }) ->
+    #msg_status { seq_id = SeqId, guid = Guid, msg = Msg,
+                  is_persistent = IsPersistent, is_delivered = false,
+                  msg_on_disk = false, index_on_disk = false }.
+
+find_msg_store(true)  -> ?PERSISTENT_MSG_STORE;
+find_msg_store(false) -> ?TRANSIENT_MSG_STORE.
+
+with_msg_store_state({{MSCStateP, PRef}, MSCStateT}, true, Fun) ->
+    {Result, MSCStateP1} = Fun(?PERSISTENT_MSG_STORE, MSCStateP),
+    {Result, {{MSCStateP1, PRef}, MSCStateT}};
+with_msg_store_state({MSCStateP, {MSCStateT, TRef}}, false, Fun) ->
+    {Result, MSCStateT1} = Fun(?TRANSIENT_MSG_STORE, MSCStateT),
+    {Result, {MSCStateP, {MSCStateT1, TRef}}}.
+
+read_from_msg_store(MSCState, IsPersistent, Guid) ->
+    with_msg_store_state(
+      MSCState, IsPersistent,
+      fun (MsgStore, MSCState1) ->
+              rabbit_msg_store:read(MsgStore, Guid, MSCState1)
+      end).
+
+maybe_write_delivered(false, _SeqId, IndexState) ->
+    IndexState;
+maybe_write_delivered(true, SeqId, IndexState) ->
+    rabbit_queue_index:deliver([SeqId], IndexState).
+
+lookup_tx(Txn) -> case get({txn, Txn}) of
+                      undefined -> #tx { pending_messages = [],
+                                         pending_acks     = [] };
+                      V         -> V
+                  end.
+
+store_tx(Txn, Tx) -> put({txn, Txn}, Tx).
+
+erase_tx(Txn) -> erase({txn, Txn}).
+
+persistent_guids(Pubs) ->
+    [Guid || #basic_message { guid = Guid, is_persistent = true } <- Pubs].
+
+betas_from_index_entries(List, TransientThreshold, IndexState) ->
+    {Filtered, Delivers, Acks} =
+        lists:foldr(
+          fun ({Guid, SeqId, IsPersistent, IsDelivered},
+               {Filtered1, Delivers1, Acks1}) ->
+                  case SeqId < TransientThreshold andalso not IsPersistent of
+                      true  -> {Filtered1,
+                                cons_if(not IsDelivered, SeqId, Delivers1),
+                                [SeqId | Acks1]};
+                      false -> {[m(#msg_status { msg           = undefined,
+                                                 guid          = Guid,
+                                                 seq_id        = SeqId,
+                                                 is_persistent = IsPersistent,
+                                                 is_delivered  = IsDelivered,
+                                                 msg_on_disk   = true,
+                                                 index_on_disk = true
+                                               }) | Filtered1],
+                                Delivers1,
+                                Acks1}
+                  end
+          end, {[], [], []}, List),
+    {bpqueue:from_list([{true, Filtered}]),
+     rabbit_queue_index:ack(Acks,
+                            rabbit_queue_index:deliver(Delivers, IndexState))}.
+
+%% the first arg is the older delta
+combine_deltas(?BLANK_DELTA_PATTERN(X), ?BLANK_DELTA_PATTERN(Y)) ->
+    ?BLANK_DELTA;
+combine_deltas(?BLANK_DELTA_PATTERN(X), #delta { start_seq_id = Start,
+                                                 count        = Count,
+                                                 end_seq_id   = End } = B) ->
+    true = Start + Count =< End, %% ASSERTION
+    B;
+combine_deltas(#delta { start_seq_id = Start,
+                        count        = Count,
+                        end_seq_id   = End } = A, ?BLANK_DELTA_PATTERN(Y)) ->
+    true = Start + Count =< End, %% ASSERTION
+    A;
+combine_deltas(#delta { start_seq_id = StartLow,
+                        count        = CountLow,
+                        end_seq_id   = EndLow },
+               #delta { start_seq_id = StartHigh,
+                        count        = CountHigh,
+                        end_seq_id   = EndHigh }) ->
+    Count = CountLow + CountHigh,
+    true = (StartLow =< StartHigh) %% ASSERTIONS
+        andalso ((StartLow + CountLow) =< EndLow)
+        andalso ((StartHigh + CountHigh) =< EndHigh)
+        andalso ((StartLow + Count) =< EndHigh),
+    #delta { start_seq_id = StartLow, count = Count, end_seq_id = EndHigh }.
+
+beta_fold(Fun, Init, Q) ->
+    bpqueue:foldr(fun (_Prefix, Value, Acc) -> Fun(Value, Acc) end, Init, Q).
+
+update_rate(Now, Then, Count, {OThen, OCount}) ->
+    %% avg over the current period and the previous
+    {1000000.0 * (Count + OCount) / timer:now_diff(Now, OThen), {Then, Count}}.
+
+%%----------------------------------------------------------------------------
+%% Internal major helpers for Public API
+%%----------------------------------------------------------------------------
+
+msg_store_callback(PersistentGuids, Pubs, AckTags, Fun) ->
+    Self = self(),
+    F = fun () -> rabbit_amqqueue:maybe_run_queue_via_backing_queue(
+                    Self, fun (StateN) -> tx_commit_post_msg_store(
+                                            true, Pubs, AckTags, Fun, StateN)
+                          end)
+        end,
+    fun () -> spawn(fun () -> ok = rabbit_misc:with_exit_handler(
+                                     fun () -> rabbit_msg_store:remove(
+                                                 ?PERSISTENT_MSG_STORE,
+                                                 PersistentGuids)
+                                     end, F)
+                    end)
+    end.
+
+tx_commit_post_msg_store(HasPersistentPubs, Pubs, AckTags, Fun,
+                         State = #vqstate {
+                           on_sync     = OnSync = #sync {
+                                           acks_persistent = SPAcks,
+                                           acks_all        = SAcks,
+                                           pubs            = SPubs,
+                                           funs            = SFuns },
+                           pending_ack = PA,
+                           durable     = IsDurable }) ->
+    PersistentAcks =
+        case IsDurable of
+            true  -> [AckTag || AckTag <- AckTags,
+                                case dict:fetch(AckTag, PA) of
+                                    #msg_status {}        -> false;
+                                    {IsPersistent, _Guid} -> IsPersistent
+                                end];
+            false -> []
+        end,
+    case IsDurable andalso (HasPersistentPubs orelse PersistentAcks =/= []) of
+        true  -> State #vqstate { on_sync = #sync {
+                                    acks_persistent = [PersistentAcks | SPAcks],
+                                    acks_all        = [AckTags | SAcks],
+                                    pubs            = [Pubs | SPubs],
+                                    funs            = [Fun | SFuns] }};
+        false -> State1 = tx_commit_index(
+                            State #vqstate { on_sync = #sync {
+                                               acks_persistent = [],
+                                               acks_all        = [AckTags],
+                                               pubs            = [Pubs],
+                                               funs            = [Fun] } }),
+                 State1 #vqstate { on_sync = OnSync }
+    end.
+
+tx_commit_index(State = #vqstate { on_sync = ?BLANK_SYNC }) ->
+    State;
+tx_commit_index(State = #vqstate { on_sync = #sync {
+                                     acks_persistent = SPAcks,
+                                     acks_all        = SAcks,
+                                     pubs            = SPubs,
+                                     funs            = SFuns },
+                                   durable = IsDurable }) ->
+    PAcks = lists:append(SPAcks),
+    Acks  = lists:append(SAcks),
+    Pubs  = lists:append(lists:reverse(SPubs)),
+    {SeqIds, State1 = #vqstate { index_state = IndexState }} =
+        lists:foldl(
+          fun (Msg = #basic_message { is_persistent = IsPersistent },
+               {SeqIdsAcc, State2}) ->
+                  IsPersistent1 = IsDurable andalso IsPersistent,
+                  {SeqId, State3} = publish(Msg, false, IsPersistent1, State2),
+                  {cons_if(IsPersistent1, SeqId, SeqIdsAcc), State3}
+          end, {PAcks, ack(Acks, State)}, Pubs),
+    IndexState1 = rabbit_queue_index:sync(SeqIds, IndexState),
+    [ Fun() || Fun <- lists:reverse(SFuns) ],
+    reduce_memory_use(
+      State1 #vqstate { index_state = IndexState1, on_sync = ?BLANK_SYNC }).
+
+purge_betas_and_deltas(State = #vqstate { q3          = Q3,
+                                          index_state = IndexState }) ->
+    case bpqueue:is_empty(Q3) of
+        true  -> State;
+        false -> IndexState1 = remove_queue_entries(fun beta_fold/3, Q3,
+                                                    IndexState),
+                 purge_betas_and_deltas(
+                   maybe_deltas_to_betas(
+                     State #vqstate { q3          = bpqueue:new(),
+                                      index_state = IndexState1 }))
+    end.
+
+remove_queue_entries(Fold, Q, IndexState) ->
+    {GuidsByStore, Delivers, Acks} =
+        Fold(fun remove_queue_entries1/2, {orddict:new(), [], []}, Q),
+    ok = orddict:fold(fun (MsgStore, Guids, ok) ->
+                              rabbit_msg_store:remove(MsgStore, Guids)
+                      end, ok, GuidsByStore),
+    rabbit_queue_index:ack(Acks,
+                           rabbit_queue_index:deliver(Delivers, IndexState)).
+
+remove_queue_entries1(
+  #msg_status { guid = Guid, seq_id = SeqId,
+                is_delivered = IsDelivered, msg_on_disk = MsgOnDisk,
+                index_on_disk = IndexOnDisk, is_persistent = IsPersistent },
+  {GuidsByStore, Delivers, Acks}) ->
+    {case MsgOnDisk of
+         true  -> rabbit_misc:orddict_cons(find_msg_store(IsPersistent), Guid,
+                                           GuidsByStore);
+         false -> GuidsByStore
+     end,
+     cons_if(IndexOnDisk andalso not IsDelivered, SeqId, Delivers),
+     cons_if(IndexOnDisk, SeqId, Acks)}.
+
+%%----------------------------------------------------------------------------
+%% Internal gubbins for publishing
+%%----------------------------------------------------------------------------
+
+publish(Msg = #basic_message { is_persistent = IsPersistent },
+        IsDelivered, MsgOnDisk,
+        State = #vqstate { q1 = Q1, q3 = Q3, q4 = Q4,
+                           next_seq_id      = SeqId,
+                           len              = Len,
+                           in_counter       = InCount,
+                           persistent_count = PCount,
+                           durable          = IsDurable,
+                           ram_msg_count    = RamMsgCount }) ->
+    IsPersistent1 = IsDurable andalso IsPersistent,
+    MsgStatus = (msg_status(IsPersistent1, SeqId, Msg))
+        #msg_status { is_delivered = IsDelivered, msg_on_disk = MsgOnDisk },
+    {MsgStatus1, State1} = maybe_write_to_disk(false, false, MsgStatus, State),
+    State2 = case bpqueue:is_empty(Q3) of
+                 false -> State1 #vqstate { q1 = queue:in(m(MsgStatus1), Q1) };
+                 true  -> State1 #vqstate { q4 = queue:in(m(MsgStatus1), Q4) }
+             end,
+    PCount1 = PCount + one_if(IsPersistent1),
+    {SeqId, State2 #vqstate { next_seq_id      = SeqId   + 1,
+                              len              = Len     + 1,
+                              in_counter       = InCount + 1,
+                              persistent_count = PCount1,
+                              ram_msg_count    = RamMsgCount + 1}}.
+
+maybe_write_msg_to_disk(_Force, MsgStatus = #msg_status {
+                                  msg_on_disk = true }, MSCState) ->
+    {MsgStatus, MSCState};
+maybe_write_msg_to_disk(Force, MsgStatus = #msg_status {
+                                 msg = Msg, guid = Guid,
+                                 is_persistent = IsPersistent }, MSCState)
+  when Force orelse IsPersistent ->
+    {ok, MSCState1} =
+        with_msg_store_state(
+          MSCState, IsPersistent,
+          fun (MsgStore, MSCState2) ->
+                  Msg1 = Msg #basic_message {
+                           %% don't persist any recoverable decoded properties
+                           content = rabbit_binary_parser:clear_decoded_content(
+                                       Msg #basic_message.content)},
+                  rabbit_msg_store:write(MsgStore, Guid, Msg1, MSCState2)
+          end),
+    {MsgStatus #msg_status { msg_on_disk = true }, MSCState1};
+maybe_write_msg_to_disk(_Force, MsgStatus, MSCState) ->
+    {MsgStatus, MSCState}.
+
+maybe_write_index_to_disk(_Force, MsgStatus = #msg_status {
+                                    index_on_disk = true }, IndexState) ->
+    true = MsgStatus #msg_status.msg_on_disk, %% ASSERTION
+    {MsgStatus, IndexState};
+maybe_write_index_to_disk(Force, MsgStatus = #msg_status {
+                                   guid = Guid, seq_id = SeqId,
+                                   is_persistent = IsPersistent,
+                                   is_delivered = IsDelivered }, IndexState)
+  when Force orelse IsPersistent ->
+    true = MsgStatus #msg_status.msg_on_disk, %% ASSERTION
+    IndexState1 = rabbit_queue_index:publish(Guid, SeqId, IsPersistent,
+                                             IndexState),
+    {MsgStatus #msg_status { index_on_disk = true },
+     maybe_write_delivered(IsDelivered, SeqId, IndexState1)};
+maybe_write_index_to_disk(_Force, MsgStatus, IndexState) ->
+    {MsgStatus, IndexState}.
+
+maybe_write_to_disk(ForceMsg, ForceIndex, MsgStatus,
+                    State = #vqstate { index_state       = IndexState,
+                                       msg_store_clients = MSCState }) ->
+    {MsgStatus1, MSCState1}   = maybe_write_msg_to_disk(
+                                  ForceMsg, MsgStatus, MSCState),
+    {MsgStatus2, IndexState1} = maybe_write_index_to_disk(
+                                  ForceIndex, MsgStatus1, IndexState),
+    {MsgStatus2, State #vqstate { index_state       = IndexState1,
+                                  msg_store_clients = MSCState1 }}.
+
+%%----------------------------------------------------------------------------
+%% Internal gubbins for acks
+%%----------------------------------------------------------------------------
+
+record_pending_ack(#msg_status { guid = Guid, seq_id = SeqId,
+                                 is_persistent = IsPersistent,
+                                 msg_on_disk = MsgOnDisk } = MsgStatus, PA) ->
+    AckEntry = case MsgOnDisk of
+                   true  -> {IsPersistent, Guid};
+                   false -> MsgStatus
+               end,
+    dict:store(SeqId, AckEntry, PA).
+
+remove_pending_ack(KeepPersistent,
+                   State = #vqstate { pending_ack = PA,
+                                      index_state = IndexState }) ->
+    {SeqIds, GuidsByStore} = dict:fold(fun accumulate_ack/3,
+                                       {[], orddict:new()}, PA),
+    State1 = State #vqstate { pending_ack = dict:new() },
+    case KeepPersistent of
+        true  -> case orddict:find(?TRANSIENT_MSG_STORE, GuidsByStore) of
+                     error       -> State1;
+                     {ok, Guids} -> ok = rabbit_msg_store:remove(
+                                           ?TRANSIENT_MSG_STORE, Guids),
+                                    State1
+                 end;
+        false -> IndexState1 = rabbit_queue_index:ack(SeqIds, IndexState),
+                 ok = orddict:fold(
+                        fun (MsgStore, Guids, ok) ->
+                                rabbit_msg_store:remove(MsgStore, Guids)
+                        end, ok, GuidsByStore),
+                 State1 #vqstate { index_state = IndexState1 }
+    end.
+
+ack(_MsgStoreFun, _Fun, [], State) ->
+    State;
+ack(MsgStoreFun, Fun, AckTags, State) ->
+    {{SeqIds, GuidsByStore}, State1 = #vqstate { index_state      = IndexState,
+                                                 persistent_count = PCount }} =
+        lists:foldl(
+          fun (SeqId, {Acc, State2 = #vqstate { pending_ack = PA }}) ->
+                  {ok, AckEntry} = dict:find(SeqId, PA),
+                  {accumulate_ack(SeqId, AckEntry, Acc),
+                   Fun(AckEntry, State2 #vqstate {
+                                   pending_ack = dict:erase(SeqId, PA) })}
+          end, {{[], orddict:new()}, State}, AckTags),
+    IndexState1 = rabbit_queue_index:ack(SeqIds, IndexState),
+    ok = orddict:fold(fun (MsgStore, Guids, ok) ->
+                              MsgStoreFun(MsgStore, Guids)
+                      end, ok, GuidsByStore),
+    PCount1 = PCount - case orddict:find(?PERSISTENT_MSG_STORE, GuidsByStore) of
+                           error       -> 0;
+                           {ok, Guids} -> length(Guids)
+                       end,
+    State1 #vqstate { index_state      = IndexState1,
+                      persistent_count = PCount1 }.
+
+accumulate_ack(_SeqId, #msg_status { is_persistent = false, %% ASSERTIONS
+                                     msg_on_disk   = false,
+                                     index_on_disk = false }, Acc) ->
+    Acc;
+accumulate_ack(SeqId, {IsPersistent, Guid}, {SeqIdsAcc, Dict}) ->
+    {cons_if(IsPersistent, SeqId, SeqIdsAcc),
+     rabbit_misc:orddict_cons(find_msg_store(IsPersistent), Guid, Dict)}.
+
+%%----------------------------------------------------------------------------
+%% Phase changes
+%%----------------------------------------------------------------------------
+
+%% Determine whether a reduction in memory use is necessary, and call
+%% functions to perform the required phase changes. The function can
+%% also be used to just do the former, by passing in dummy phase
+%% change functions.
+%%
+%% The function does not report on any needed beta->delta conversions,
+%% though the conversion function for that is called as necessary. The
+%% reason is twofold. Firstly, this is safe because the conversion is
+%% only ever necessary just after a transition to a
+%% target_ram_msg_count of zero or after an incremental alpha->beta
+%% conversion. In the former case the conversion is performed straight
+%% away (i.e. any betas present at the time are converted to deltas),
+%% and in the latter case the need for a conversion is flagged up
+%% anyway. Secondly, this is necessary because we do not have a
+%% precise and cheap predicate for determining whether a beta->delta
+%% conversion is necessary - due to the complexities of retaining up
+%% one segment's worth of messages in q3 - and thus would risk
+%% perpetually reporting the need for a conversion when no such
+%% conversion is needed. That in turn could cause an infinite loop.
+reduce_memory_use(AlphaBetaFun, BetaGammaFun, BetaDeltaFun, State) ->
+    {Reduce, State1} = case chunk_size(State #vqstate.ram_msg_count,
+                                       State #vqstate.target_ram_msg_count) of
+                           0  -> {false, State};
+                           S1 -> {true, AlphaBetaFun(S1, State)}
+                       end,
+    case State1 #vqstate.target_ram_msg_count of
+        infinity -> {Reduce, State1};
+        0        -> {Reduce, BetaDeltaFun(State1)};
+        _        -> case chunk_size(State1 #vqstate.ram_index_count,
+                                   permitted_ram_index_count(State1)) of
+                        ?IO_BATCH_SIZE = S2 -> {true, BetaGammaFun(S2, State1)};
+                        _                   -> {Reduce, State1}
+                    end
+    end.
+
+reduce_memory_use(State) ->
+    {_, State1} = reduce_memory_use(fun push_alphas_to_betas/2,
+                                    fun limit_ram_index/2,
+                                    fun push_betas_to_deltas/1,
+                                    State),
+    State1.
+
+limit_ram_index(Quota, State = #vqstate { q2 = Q2, q3 = Q3,
+                                          index_state = IndexState,
+                                          ram_index_count = RamIndexCount }) ->
+    {Q2a, {Quota1, IndexState1}} = limit_ram_index(
+                                     fun bpqueue:map_fold_filter_r/4,
+                                     Q2, {Quota, IndexState}),
+    %% TODO: we shouldn't be writing index entries for messages that
+    %% can never end up in delta due them residing in the only segment
+    %% held by q3.
+    {Q3a, {Quota2, IndexState2}} = limit_ram_index(
+                                     fun bpqueue:map_fold_filter_r/4,
+                                     Q3, {Quota1, IndexState1}),
+    State #vqstate { q2 = Q2a, q3 = Q3a,
+                     index_state = IndexState2,
+                     ram_index_count = RamIndexCount - (Quota - Quota2) }.
+
+limit_ram_index(_MapFoldFilterFun, Q, {0, IndexState}) ->
+    {Q, {0, IndexState}};
+limit_ram_index(MapFoldFilterFun, Q, {Quota, IndexState}) ->
+    MapFoldFilterFun(
+      fun erlang:'not'/1,
+      fun (MsgStatus, {0, _IndexStateN}) ->
+              false = MsgStatus #msg_status.index_on_disk, %% ASSERTION
+              stop;
+          (MsgStatus, {N, IndexStateN}) when N > 0 ->
+              false = MsgStatus #msg_status.index_on_disk, %% ASSERTION
+              {MsgStatus1, IndexStateN1} =
+                  maybe_write_index_to_disk(true, MsgStatus, IndexStateN),
+              {true, m(MsgStatus1), {N-1, IndexStateN1}}
+      end, {Quota, IndexState}, Q).
+
+permitted_ram_index_count(#vqstate { len = 0 }) ->
+    infinity;
+permitted_ram_index_count(#vqstate { len   = Len,
+                                     q2    = Q2,
+                                     q3    = Q3,
+                                     delta = #delta { count = DeltaCount } }) ->
+    BetaLen = bpqueue:len(Q2) + bpqueue:len(Q3),
+    BetaLen - trunc(BetaLen * BetaLen / (Len - DeltaCount)).
+
+chunk_size(Current, Permitted)
+  when Permitted =:= infinity orelse Permitted >= Current ->
+    0;
+chunk_size(Current, Permitted) ->
+    lists:min([Current - Permitted, ?IO_BATCH_SIZE]).
+
+fetch_from_q3_to_q4(State = #vqstate {
+                      q1                = Q1,
+                      q2                = Q2,
+                      delta             = #delta { count = DeltaCount },
+                      q3                = Q3,
+                      q4                = Q4,
+                      ram_msg_count     = RamMsgCount,
+                      ram_index_count   = RamIndexCount,
+                      msg_store_clients = MSCState }) ->
+    case bpqueue:out(Q3) of
+        {empty, _Q3} ->
+            {empty, State};
+        {{value, IndexOnDisk, MsgStatus = #msg_status {
+                                msg = undefined, guid = Guid,
+                                is_persistent = IsPersistent }}, Q3a} ->
+            {{ok, Msg = #basic_message {}}, MSCState1} =
+                read_from_msg_store(MSCState, IsPersistent, Guid),
+            Q4a = queue:in(m(MsgStatus #msg_status { msg = Msg }), Q4),
+            RamIndexCount1 = RamIndexCount - one_if(not IndexOnDisk),
+            true = RamIndexCount1 >= 0, %% ASSERTION
+            State1 = State #vqstate { q3                = Q3a,
+                                      q4                = Q4a,
+                                      ram_msg_count     = RamMsgCount + 1,
+                                      ram_index_count   = RamIndexCount1,
+                                      msg_store_clients = MSCState1 },
+            State2 =
+                case {bpqueue:is_empty(Q3a), 0 == DeltaCount} of
+                    {true, true} ->
+                        %% q3 is now empty, it wasn't before; delta is
+                        %% still empty. So q2 must be empty, and q1
+                        %% can now be joined onto q4
+                        true = bpqueue:is_empty(Q2), %% ASSERTION
+                        State1 #vqstate { q1 = queue:new(),
+                                          q4 = queue:join(Q4a, Q1) };
+                    {true, false} ->
+                        maybe_deltas_to_betas(State1);
+                    {false, _} ->
+                        %% q3 still isn't empty, we've not touched
+                        %% delta, so the invariants between q1, q2,
+                        %% delta and q3 are maintained
+                        State1
+                end,
+            {loaded, State2}
+    end.
+
+maybe_deltas_to_betas(State = #vqstate { delta = ?BLANK_DELTA_PATTERN(X) }) ->
+    State;
+maybe_deltas_to_betas(State = #vqstate {
+                        q2                   = Q2,
+                        delta                = Delta,
+                        q3                   = Q3,
+                        index_state          = IndexState,
+                        target_ram_msg_count = TargetRamMsgCount,
+                        transient_threshold  = TransientThreshold }) ->
+    case bpqueue:is_empty(Q3) orelse (TargetRamMsgCount /= 0) of
+        false ->
+            State;
+        true ->
+            #delta { start_seq_id = DeltaSeqId,
+                     count        = DeltaCount,
+                     end_seq_id   = DeltaSeqIdEnd } = Delta,
+            DeltaSeqId1 =
+                lists:min([rabbit_queue_index:next_segment_boundary(DeltaSeqId),
+                           DeltaSeqIdEnd]),
+            {List, IndexState1} =
+                rabbit_queue_index:read(DeltaSeqId, DeltaSeqId1, IndexState),
+            {Q3a, IndexState2} = betas_from_index_entries(
+                                   List, TransientThreshold, IndexState1),
+            State1 = State #vqstate { index_state = IndexState2 },
+            case bpqueue:len(Q3a) of
+                0 ->
+                    %% we ignored every message in the segment due to
+                    %% it being transient and below the threshold
+                    maybe_deltas_to_betas(
+                      State #vqstate {
+                        delta = Delta #delta { start_seq_id = DeltaSeqId1 }});
+                Q3aLen ->
+                    Q3b = bpqueue:join(Q3, Q3a),
+                    case DeltaCount - Q3aLen of
+                        0 ->
+                            %% delta is now empty, but it wasn't
+                            %% before, so can now join q2 onto q3
+                            State1 #vqstate { q2    = bpqueue:new(),
+                                              delta = ?BLANK_DELTA,
+                                              q3    = bpqueue:join(Q3b, Q2) };
+                        N when N > 0 ->
+                            Delta1 = #delta { start_seq_id = DeltaSeqId1,
+                                              count        = N,
+                                              end_seq_id   = DeltaSeqIdEnd },
+                            State1 #vqstate { delta = Delta1,
+                                              q3    = Q3b }
+                    end
+            end
+    end.
+
+push_alphas_to_betas(Quota, State) ->
+    { Quota1, State1} = maybe_push_q1_to_betas(Quota,  State),
+    {_Quota2, State2} = maybe_push_q4_to_betas(Quota1, State1),
+    State2.
+
+maybe_push_q1_to_betas(Quota, State = #vqstate { q1 = Q1 }) ->
+    maybe_push_alphas_to_betas(
+      fun queue:out/1,
+      fun (MsgStatus = #msg_status { index_on_disk = IndexOnDisk },
+           Q1a, State1 = #vqstate { q3 = Q3, delta = #delta { count = 0 } }) ->
+              State1 #vqstate { q1 = Q1a,
+                                q3 = bpqueue:in(IndexOnDisk, MsgStatus, Q3) };
+          (MsgStatus = #msg_status { index_on_disk = IndexOnDisk },
+           Q1a, State1 = #vqstate { q2 = Q2 }) ->
+              State1 #vqstate { q1 = Q1a,
+                                q2 = bpqueue:in(IndexOnDisk, MsgStatus, Q2) }
+      end, Quota, Q1, State).
+
+maybe_push_q4_to_betas(Quota, State = #vqstate { q4 = Q4 }) ->
+    maybe_push_alphas_to_betas(
+      fun queue:out_r/1,
+      fun (MsgStatus = #msg_status { index_on_disk = IndexOnDisk },
+           Q4a, State1 = #vqstate { q3 = Q3 }) ->
+              State1 #vqstate { q3 = bpqueue:in_r(IndexOnDisk, MsgStatus, Q3),
+                                q4 = Q4a }
+      end, Quota, Q4, State).
+
+maybe_push_alphas_to_betas(_Generator, _Consumer, Quota, _Q,
+                           State = #vqstate {
+                             ram_msg_count        = RamMsgCount,
+                             target_ram_msg_count = TargetRamMsgCount })
+  when Quota =:= 0 orelse
+       TargetRamMsgCount =:= infinity orelse TargetRamMsgCount >= RamMsgCount ->
+    {Quota, State};
+maybe_push_alphas_to_betas(Generator, Consumer, Quota, Q, State) ->
+    case Generator(Q) of
+        {empty, _Q} ->
+            {Quota, State};
+        {{value, MsgStatus}, Qa} ->
+            {MsgStatus1 = #msg_status { msg_on_disk = true,
+                                        index_on_disk = IndexOnDisk },
+             State1 = #vqstate { ram_msg_count   = RamMsgCount,
+                                 ram_index_count = RamIndexCount }} =
+                maybe_write_to_disk(true, false, MsgStatus, State),
+            MsgStatus2 = m(MsgStatus1 #msg_status { msg = undefined }),
+            RamIndexCount1 = RamIndexCount + one_if(not IndexOnDisk),
+            State2 = State1 #vqstate { ram_msg_count = RamMsgCount - 1,
+                                       ram_index_count = RamIndexCount1 },
+            maybe_push_alphas_to_betas(Generator, Consumer, Quota - 1, Qa,
+                                       Consumer(MsgStatus2, Qa, State2))
+    end.
+
+push_betas_to_deltas(State = #vqstate { q2              = Q2,
+                                        delta           = Delta,
+                                        q3              = Q3,
+                                        index_state     = IndexState,
+                                        ram_index_count = RamIndexCount }) ->
+    {Delta2, Q2a, RamIndexCount2, IndexState2} =
+        push_betas_to_deltas(fun (Q2MinSeqId) -> Q2MinSeqId end,
+                             fun bpqueue:out/1, Q2,
+                             RamIndexCount, IndexState),
+    {Delta3, Q3a, RamIndexCount3, IndexState3} =
+        push_betas_to_deltas(fun rabbit_queue_index:next_segment_boundary/1,
+                             fun bpqueue:out_r/1, Q3,
+                             RamIndexCount2, IndexState2),
+    Delta4 = combine_deltas(Delta3, combine_deltas(Delta, Delta2)),
+    State #vqstate { q2              = Q2a,
+                     delta           = Delta4,
+                     q3              = Q3a,
+                     index_state     = IndexState3,
+                     ram_index_count = RamIndexCount3 }.
+
+push_betas_to_deltas(LimitFun, Generator, Q, RamIndexCount, IndexState) ->
+    case bpqueue:out(Q) of
+        {empty, _Q} ->
+            {?BLANK_DELTA, Q, RamIndexCount, IndexState};
+        {{value, _IndexOnDisk1, #msg_status { seq_id = MinSeqId }}, _Qa} ->
+            {{value, _IndexOnDisk2, #msg_status { seq_id = MaxSeqId }}, _Qb} =
+                bpqueue:out_r(Q),
+            Limit = LimitFun(MinSeqId),
+            case MaxSeqId < Limit of
+                true  -> {?BLANK_DELTA, Q, RamIndexCount, IndexState};
+                false -> {Len, Qc, RamIndexCount1, IndexState1} =
+                             push_betas_to_deltas(Generator, Limit, Q, 0,
+                                                  RamIndexCount, IndexState),
+                         {#delta { start_seq_id = Limit,
+                                   count        = Len,
+                                   end_seq_id   = MaxSeqId + 1 },
+                          Qc, RamIndexCount1, IndexState1}
+            end
+    end.
+
+push_betas_to_deltas(Generator, Limit, Q, Count, RamIndexCount, IndexState) ->
+    case Generator(Q) of
+        {empty, _Q} ->
+            {Count, Q, RamIndexCount, IndexState};
+        {{value, _IndexOnDisk, #msg_status { seq_id = SeqId }}, _Qa}
+          when SeqId < Limit ->
+            {Count, Q, RamIndexCount, IndexState};
+        {{value, IndexOnDisk, MsgStatus}, Qa} ->
+            {RamIndexCount1, IndexState1} =
+                case IndexOnDisk of
+                    true  -> {RamIndexCount, IndexState};
+                    false -> {#msg_status { index_on_disk = true },
+                              IndexState2} =
+                                 maybe_write_index_to_disk(true, MsgStatus,
+                                                           IndexState),
+                             {RamIndexCount - 1, IndexState2}
+                end,
+            push_betas_to_deltas(
+              Generator, Limit, Qa, Count + 1, RamIndexCount1, IndexState1)
+    end.
diff --git a/src/rabbit_writer.erl b/src/rabbit_writer.erl
index 54c60f5b..f90ee734 100644
--- a/src/rabbit_writer.erl
+++ b/src/rabbit_writer.erl
@@ -33,14 +33,14 @@
 -include("rabbit.hrl").
 -include("rabbit_framing.hrl").
 
--export([start/3, start_link/3, shutdown/1, mainloop/1]).
+-export([start/4, start_link/4, shutdown/1, mainloop/1]).
 -export([send_command/2, send_command/3, send_command_and_signal_back/3,
          send_command_and_signal_back/4, send_command_and_notify/5]).
--export([internal_send_command/3, internal_send_command/5]).
+-export([internal_send_command/4, internal_send_command/6]).
 
 -import(gen_tcp).
 
--record(wstate, {sock, channel, frame_max}).
+-record(wstate, {sock, channel, frame_max, protocol}).
 
 -define(HIBERNATE_AFTER, 5000).
 
@@ -48,34 +48,53 @@
 
 -ifdef(use_specs).
 
--spec(start/3 :: (socket(), channel_number(), non_neg_integer()) -> pid()).
--spec(start_link/3 :: (socket(), channel_number(), non_neg_integer()) -> pid()).
--spec(send_command/2 :: (pid(), amqp_method()) -> 'ok').
--spec(send_command/3 :: (pid(), amqp_method(), content()) -> 'ok').
--spec(send_command_and_signal_back/3 :: (pid(), amqp_method(), pid()) -> 'ok').
+-spec(start/4 ::
+        (rabbit_net:socket(), rabbit_channel:channel_number(),
+         non_neg_integer(), rabbit_types:protocol())
+        -> rabbit_types:ok(pid())).
+-spec(start_link/4 ::
+        (rabbit_net:socket(), rabbit_channel:channel_number(),
+         non_neg_integer(), rabbit_types:protocol())
+        -> rabbit_types:ok(pid())).
+-spec(send_command/2 ::
+        (pid(), rabbit_framing:amqp_method_record()) -> 'ok').
+-spec(send_command/3 ::
+        (pid(), rabbit_framing:amqp_method_record(), rabbit_types:content())
+        -> 'ok').
+-spec(send_command_and_signal_back/3 ::
+        (pid(), rabbit_framing:amqp_method(), pid()) -> 'ok').
 -spec(send_command_and_signal_back/4 ::
-      (pid(), amqp_method(), content(), pid()) -> 'ok').
+        (pid(), rabbit_framing:amqp_method(), rabbit_types:content(), pid())
+        -> 'ok').
 -spec(send_command_and_notify/5 ::
-      (pid(), pid(), pid(), amqp_method(), content()) -> 'ok').
--spec(internal_send_command/3 ::
-      (socket(), channel_number(), amqp_method()) -> 'ok').
--spec(internal_send_command/5 ::
-      (socket(), channel_number(), amqp_method(),
-       content(), non_neg_integer()) -> 'ok').
+        (pid(), pid(), pid(), rabbit_framing:amqp_method_record(),
+         rabbit_types:content())
+        -> 'ok').
+-spec(internal_send_command/4 ::
+        (rabbit_net:socket(), rabbit_channel:channel_number(),
+         rabbit_framing:amqp_method_record(), rabbit_types:protocol())
+        -> 'ok').
+-spec(internal_send_command/6 ::
+        (rabbit_net:socket(), rabbit_channel:channel_number(),
+         rabbit_framing:amqp_method_record(), rabbit_types:content(),
+         non_neg_integer(), rabbit_types:protocol())
+        -> 'ok').
 
 -endif.
 
 %%----------------------------------------------------------------------------
 
-start(Sock, Channel, FrameMax) ->
-    spawn(?MODULE, mainloop, [#wstate{sock = Sock,
-                                      channel = Channel,
-                                      frame_max = FrameMax}]).
-
-start_link(Sock, Channel, FrameMax) ->
-    spawn_link(?MODULE, mainloop, [#wstate{sock = Sock,
+start(Sock, Channel, FrameMax, Protocol) ->
+    {ok, spawn(?MODULE, mainloop, [#wstate{sock = Sock,
                                            channel = Channel,
-                                           frame_max = FrameMax}]).
+                                           frame_max = FrameMax,
+                                           protocol = Protocol}])}.
+
+start_link(Sock, Channel, FrameMax, Protocol) ->
+    {ok, spawn_link(?MODULE, mainloop, [#wstate{sock = Sock,
+                                                channel = Channel,
+                                                frame_max = FrameMax,
+                                                protocol = Protocol}])}.
 
 mainloop(State) ->
     receive
@@ -85,35 +104,40 @@ mainloop(State) ->
     end.
 
 handle_message({send_command, MethodRecord},
-               State = #wstate{sock = Sock, channel = Channel}) ->
-    ok = internal_send_command_async(Sock, Channel, MethodRecord),
+               State = #wstate{sock = Sock, channel = Channel,
+                               protocol = Protocol}) ->
+    ok = internal_send_command_async(Sock, Channel, MethodRecord, Protocol),
     State;
 handle_message({send_command, MethodRecord, Content},
                State = #wstate{sock = Sock,
                                channel = Channel,
-                               frame_max = FrameMax}) ->
+                               frame_max = FrameMax,
+                               protocol = Protocol}) ->
     ok = internal_send_command_async(Sock, Channel, MethodRecord,
-                                     Content, FrameMax),
+                                     Content, FrameMax, Protocol),
     State;
 handle_message({send_command_and_signal_back, MethodRecord, Parent},
-               State = #wstate{sock = Sock, channel = Channel}) ->
-    ok = internal_send_command_async(Sock, Channel, MethodRecord),
+               State = #wstate{sock = Sock, channel = Channel,
+                               protocol = Protocol}) ->
+    ok = internal_send_command_async(Sock, Channel, MethodRecord, Protocol),
     Parent ! rabbit_writer_send_command_signal,
     State;
 handle_message({send_command_and_signal_back, MethodRecord, Content, Parent},
                State = #wstate{sock = Sock,
                                channel = Channel,
-                               frame_max = FrameMax}) ->
+                               frame_max = FrameMax,
+                               protocol = Protocol}) ->
     ok = internal_send_command_async(Sock, Channel, MethodRecord,
-                                     Content, FrameMax),
+                                     Content, FrameMax, Protocol),
     Parent ! rabbit_writer_send_command_signal,
     State;
 handle_message({send_command_and_notify, QPid, ChPid, MethodRecord, Content},
                State = #wstate{sock = Sock,
                                channel = Channel,
-                               frame_max = FrameMax}) ->
+                               frame_max = FrameMax,
+                               protocol = Protocol}) ->
     ok = internal_send_command_async(Sock, Channel, MethodRecord,
-                                     Content, FrameMax),
+                                     Content, FrameMax, Protocol),
     rabbit_amqqueue:notify_sent(QPid, ChPid),
     State;
 handle_message({inet_reply, _, ok}, State) ->
@@ -149,34 +173,37 @@ send_command_and_notify(W, Q, ChPid, MethodRecord, Content) ->
 
 shutdown(W) ->
     W ! shutdown,
+    rabbit_misc:unlink_and_capture_exit(W),
     ok.
 
 %---------------------------------------------------------------------------
 
-assemble_frames(Channel, MethodRecord) ->
+assemble_frames(Channel, MethodRecord, Protocol) ->
     ?LOGMESSAGE(out, Channel, MethodRecord, none),
-    rabbit_binary_generator:build_simple_method_frame(Channel, MethodRecord).
+    rabbit_binary_generator:build_simple_method_frame(Channel, MethodRecord,
+                                                      Protocol).
 
-assemble_frames(Channel, MethodRecord, Content, FrameMax) ->
+assemble_frames(Channel, MethodRecord, Content, FrameMax, Protocol) ->
     ?LOGMESSAGE(out, Channel, MethodRecord, Content),
     MethodName = rabbit_misc:method_record_type(MethodRecord),
-    true = rabbit_framing:method_has_content(MethodName), % assertion
+    true = Protocol:method_has_content(MethodName), % assertion
     MethodFrame = rabbit_binary_generator:build_simple_method_frame(
-                    Channel, MethodRecord),
+                    Channel, MethodRecord, Protocol),
     ContentFrames = rabbit_binary_generator:build_simple_content_frames(
-                      Channel, Content, FrameMax),
+                      Channel, Content, FrameMax, Protocol),
     [MethodFrame | ContentFrames].
 
 tcp_send(Sock, Data) ->
     rabbit_misc:throw_on_error(inet_error,
                                fun () -> rabbit_net:send(Sock, Data) end).
 
-internal_send_command(Sock, Channel, MethodRecord) ->
-    ok = tcp_send(Sock, assemble_frames(Channel, MethodRecord)).
+internal_send_command(Sock, Channel, MethodRecord, Protocol) ->
+    ok = tcp_send(Sock, assemble_frames(Channel, MethodRecord, Protocol)).
 
-internal_send_command(Sock, Channel, MethodRecord, Content, FrameMax) ->
+internal_send_command(Sock, Channel, MethodRecord, Content, FrameMax,
+                      Protocol) ->
     ok = tcp_send(Sock, assemble_frames(Channel, MethodRecord,
-                                        Content, FrameMax)).
+                                        Content, FrameMax, Protocol)).
 
 %% gen_tcp:send/2 does a selective receive of {inet_reply, Sock,
 %% Status} to obtain the result. That is bad when it is called from
@@ -196,13 +223,14 @@ internal_send_command(Sock, Channel, MethodRecord, Content, FrameMax) ->
 %% Also note that the port has bounded buffers and port_command blocks
 %% when these are full. So the fact that we process the result
 %% asynchronously does not impact flow control.
-internal_send_command_async(Sock, Channel, MethodRecord) ->
-    true = port_cmd(Sock, assemble_frames(Channel, MethodRecord)),
+internal_send_command_async(Sock, Channel, MethodRecord, Protocol) ->
+    true = port_cmd(Sock, assemble_frames(Channel, MethodRecord, Protocol)),
     ok.
 
-internal_send_command_async(Sock, Channel, MethodRecord, Content, FrameMax) ->
+internal_send_command_async(Sock, Channel, MethodRecord, Content, FrameMax,
+                            Protocol) ->
     true = port_cmd(Sock, assemble_frames(Channel, MethodRecord,
-                                              Content, FrameMax)),
+                                          Content, FrameMax, Protocol)),
     ok.
 
 port_cmd(Sock, Data) ->
diff --git a/src/supervisor2.erl b/src/supervisor2.erl
index 55753512..fb4c9b02 100644
--- a/src/supervisor2.erl
+++ b/src/supervisor2.erl
@@ -4,27 +4,50 @@
 %% 1) the module name is supervisor2
 %%
 %% 2) there is a new strategy called
-%% simple_one_for_one_terminate. This is exactly the same as for
-%% simple_one_for_one, except that children *are* explicitly
-%% terminated as per the shutdown component of the child_spec.
+%%    simple_one_for_one_terminate. This is exactly the same as for
+%%    simple_one_for_one, except that children *are* explicitly
+%%    terminated as per the shutdown component of the child_spec.
 %%
-%% All modifications are (C) 2010 LShift Ltd.
+%% 3) child specifications can contain, as the restart type, a tuple
+%%    {permanent, Delay} | {transient, Delay} where Delay >= 0. The
+%%    delay, in seconds, indicates what should happen if a child, upon
+%%    being restarted, exceeds the MaxT and MaxR parameters. Thus, if
+%%    a child exits, it is restarted as normal. If it exits
+%%    sufficiently quickly and often to exceed the boundaries set by
+%%    the MaxT and MaxR parameters, and a Delay is specified, then
+%%    rather than stopping the supervisor, the supervisor instead
+%%    continues and tries to start up the child again, Delay seconds
+%%    later.
+%%
+%%    Note that you can never restart more frequently than the MaxT
+%%    and MaxR parameters allow: i.e. you must wait until *both* the
+%%    Delay has passed *and* the MaxT and MaxR parameters allow the
+%%    child to be restarted.
+%%
+%%    Also note that the Delay is a *minimum*. There is no guarantee
+%%    that the child will be restarted within that time, especially if
+%%    other processes are dying and being restarted at the same time -
+%%    essentially we have to wait for the delay to have passed and for
+%%    the MaxT and MaxR parameters to permit the child to be
+%%    restarted. This may require waiting for longer than Delay.
+%%
+%% All modifications are (C) 2010 Rabbit Technologies Ltd.
 %%
 %% %CopyrightBegin%
-%% 
+%%
 %% Copyright Ericsson AB 1996-2009. All Rights Reserved.
-%% 
+%%
 %% The contents of this file are subject to the Erlang Public License,
 %% Version 1.1, (the "License"); you may not use this file except in
 %% compliance with the License. You should have received a copy of the
 %% Erlang Public License along with this software. If not, it can be
 %% retrieved online at http://www.erlang.org/.
-%% 
+%%
 %% Software distributed under the License is distributed on an "AS IS"
 %% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
 %% the License for the specific language governing rights and limitations
 %% under the License.
-%% 
+%%
 %% %CopyrightEnd%
 %%
 -module(supervisor2).
@@ -43,6 +66,7 @@
 %% Internal exports
 -export([init/1, handle_call/3, handle_info/2, terminate/2, code_change/3]).
 -export([handle_cast/2]).
+-export([delayed_restart/2]).
 
 -define(DICT, dict).
 
@@ -119,6 +143,9 @@ check_childspecs(ChildSpecs) when is_list(ChildSpecs) ->
     end;
 check_childspecs(X) -> {error, {badarg, X}}.
 
+delayed_restart(Supervisor, RestartDetails) ->
+    gen_server:cast(Supervisor, {delayed_restart, RestartDetails}).
+
 %%% ---------------------------------------------------
 %%% 
 %%% Initialize the supervisor.
@@ -301,13 +328,13 @@ handle_call({terminate_child, Name}, _From, State) ->
 
 handle_call(which_children, _From, State) when ?is_simple(State) ->
     [#child{child_type = CT, modules = Mods}] = State#state.children,
-    Reply = lists:map(fun({Pid, _}) -> {undefined, Pid, CT, Mods} end,
+    Reply = lists:map(fun ({Pid, _}) -> {undefined, Pid, CT, Mods} end,
 		      ?DICT:to_list(State#state.dynamics)),
     {reply, Reply, State};
 
 handle_call(which_children, _From, State) ->
     Resp =
-	lists:map(fun(#child{pid = Pid, name = Name,
+	lists:map(fun (#child{pid = Pid, name = Name,
 			     child_type = ChildType, modules = Mods}) ->
 		    {Name, Pid, ChildType, Mods}
 		  end,
@@ -315,6 +342,20 @@ handle_call(which_children, _From, State) ->
     {reply, Resp, State}.
 
 
+handle_cast({delayed_restart, {RestartType, Reason, Child}}, State)
+  when ?is_simple(State) ->
+    {ok, NState} = do_restart(RestartType, Reason, Child, State),
+    {noreply, NState};
+handle_cast({delayed_restart, {RestartType, Reason, Child}}, State)
+  when not (?is_simple(State)) ->
+    case get_child(Child#child.name, State) of
+        {value, Child} ->
+            {ok, NState} = do_restart(RestartType, Reason, Child, State),
+            {noreply, NState};
+        _ ->
+            {noreply, State}
+    end;
+
 %%% Hopefully cause a function-clause as there is no API function
 %%% that utilizes cast.
 handle_cast(null, State) ->
@@ -415,7 +456,7 @@ update_childspec1([], Children, KeepOld) ->
     lists:reverse(Children ++ KeepOld).  
 
 update_chsp(OldCh, Children) ->
-    case lists:map(fun(Ch) when OldCh#child.name =:= Ch#child.name ->
+    case lists:map(fun (Ch) when OldCh#child.name =:= Ch#child.name ->
 			   Ch#child{pid = OldCh#child.pid};
 		      (Ch) ->
 			   Ch
@@ -480,6 +521,16 @@ restart_child(Pid, Reason, State) ->
 	    {ok, State}
     end.
 
+do_restart({RestartType, Delay}, Reason, Child, State) ->
+    case restart1(Child, State) of
+        {ok, NState} ->
+            {ok, NState};
+        {terminate, NState} ->
+            {ok, _TRef} = timer:apply_after(
+                            trunc(Delay*1000), ?MODULE, delayed_restart,
+                            [self(), {{RestartType, Delay}, Reason, Child}]),
+            {ok, NState}
+    end;
 do_restart(permanent, Reason, Child, State) ->
     report_error(child_terminated, Reason, Child, State#state.name),
     restart(Child, State);
@@ -500,14 +551,27 @@ do_restart(temporary, Reason, Child, State) ->
 restart(Child, State) ->
     case add_restart(State) of
 	{ok, NState} ->
-	    restart(NState#state.strategy, Child, NState);
+	    restart(NState#state.strategy, Child, NState, fun restart/2);
 	{terminate, NState} ->
 	    report_error(shutdown, reached_max_restart_intensity,
 			 Child, State#state.name),
 	    {shutdown, remove_child(Child, NState)}
     end.
 
-restart(Strategy, Child, State)
+restart1(Child, State) ->
+    case add_restart(State) of
+	{ok, NState} ->
+	    restart(NState#state.strategy, Child, NState, fun restart1/2);
+	{terminate, _NState} ->
+            %% we've reached the max restart intensity, but the
+            %% add_restart will have added to the restarts
+            %% field. Given we don't want to die here, we need to go
+            %% back to the old restarts field otherwise we'll never
+            %% attempt to restart later.
+            {terminate, State}
+    end.
+
+restart(Strategy, Child, State, Restart)
   when Strategy =:= simple_one_for_one orelse
        Strategy =:= simple_one_for_one_terminate ->
     #child{mfa = {M, F, A}} = Child,
@@ -521,9 +585,9 @@ restart(Strategy, Child, State)
 	    {ok, NState};
 	{error, Error} ->
 	    report_error(start_error, Error, Child, State#state.name),
-	    restart(Child, State)
+	    Restart(Child, State)
     end;
-restart(one_for_one, Child, State) ->
+restart(one_for_one, Child, State, Restart) ->
     case do_start_child(State#state.name, Child) of
 	{ok, Pid} ->
 	    NState = replace_child(Child#child{pid = Pid}, State),
@@ -533,25 +597,25 @@ restart(one_for_one, Child, State) ->
 	    {ok, NState};
 	{error, Reason} ->
 	    report_error(start_error, Reason, Child, State#state.name),
-	    restart(Child, State)
+	    Restart(Child, State)
     end;
-restart(rest_for_one, Child, State) ->
+restart(rest_for_one, Child, State, Restart) ->
     {ChAfter, ChBefore} = split_child(Child#child.pid, State#state.children),
     ChAfter2 = terminate_children(ChAfter, State#state.name),
     case start_children(ChAfter2, State#state.name) of
 	{ok, ChAfter3} ->
 	    {ok, State#state{children = ChAfter3 ++ ChBefore}};
 	{error, ChAfter3} ->
-	    restart(Child, State#state{children = ChAfter3 ++ ChBefore})
+	    Restart(Child, State#state{children = ChAfter3 ++ ChBefore})
     end;
-restart(one_for_all, Child, State) ->
+restart(one_for_all, Child, State, Restart) ->
     Children1 = del_child(Child#child.pid, State#state.children),
     Children2 = terminate_children(Children1, State#state.name),
     case start_children(Children2, State#state.name) of
 	{ok, NChs} ->
 	    {ok, State#state{children = NChs}};
 	{error, NChs} ->
-	    restart(Child, State#state{children = NChs})
+	    Restart(Child, State#state{children = NChs})
     end.
 
 %%-----------------------------------------------------------------
@@ -769,7 +833,9 @@ supname(N,_)      -> N.
 %%%    {Name, Func, RestartType, Shutdown, ChildType, Modules}
 %%% where Name is an atom
 %%%       Func is {Mod, Fun, Args} == {atom, atom, list}
-%%%       RestartType is permanent | temporary | transient
+%%%       RestartType is permanent | temporary | transient |
+%%%                      {permanent, Delay} |
+%%%                      {transient, Delay} where Delay >= 0
 %%%       Shutdown = integer() | infinity | brutal_kill
 %%%       ChildType = supervisor | worker
 %%%       Modules = [atom()] | dynamic
@@ -815,10 +881,17 @@ validFunc({M, F, A}) when is_atom(M),
                           is_list(A) -> true;
 validFunc(Func)                      -> throw({invalid_mfa, Func}).
 
-validRestartType(permanent)   -> true;
-validRestartType(temporary)   -> true;
-validRestartType(transient)   -> true;
-validRestartType(RestartType) -> throw({invalid_restart_type, RestartType}).
+validRestartType(permanent)          -> true;
+validRestartType(temporary)          -> true;
+validRestartType(transient)          -> true;
+validRestartType({permanent, Delay}) -> validDelay(Delay);
+validRestartType({transient, Delay}) -> validDelay(Delay);
+validRestartType(RestartType)        -> throw({invalid_restart_type,
+                                               RestartType}).
+
+validDelay(Delay) when is_number(Delay),
+                       Delay >= 0 -> true;
+validDelay(What)                  -> throw({invalid_delay, What}).
 
 validShutdown(Shutdown, _) 
   when is_integer(Shutdown), Shutdown > 0 -> true;
@@ -828,7 +901,7 @@ validShutdown(Shutdown, _)             -> throw({invalid_shutdown, Shutdown}).
 
 validMods(dynamic) -> true;
 validMods(Mods) when is_list(Mods) ->
-    lists:foreach(fun(Mod) ->
+    lists:foreach(fun (Mod) ->
 		    if
 			is_atom(Mod) -> ok;
 			true -> throw({invalid_module, Mod})
diff --git a/src/tcp_acceptor.erl b/src/tcp_acceptor.erl
index 3b23daa5..cc4982c9 100644
--- a/src/tcp_acceptor.erl
+++ b/src/tcp_acceptor.erl
@@ -75,6 +75,13 @@ handle_info({inet_async, LSock, Ref, {ok, Sock}},
         error_logger:info_msg("accepted TCP connection on ~s:~p from ~s:~p~n",
                               [inet_parse:ntoa(Address), Port,
                                inet_parse:ntoa(PeerAddress), PeerPort]),
+        %% In the event that somebody floods us with connections we can spew
+        %% the above message at error_logger faster than it can keep up.
+        %% So error_logger's mailbox grows unbounded until we eat all the
+        %% memory available and crash. So here's a meaningless synchronous call
+        %% to the underlying gen_event mechanism - when it returns the mailbox
+        %% is drained.
+        gen_event:which_handlers(error_logger),
         %% handle
         file_handle_cache:release_on_death(apply(M, F, A ++ [Sock]))
     catch {inet_error, Reason} ->
diff --git a/src/test_sup.erl b/src/test_sup.erl
new file mode 100644
index 00000000..f41793bc
--- /dev/null
+++ b/src/test_sup.erl
@@ -0,0 +1,94 @@
+%%   The contents of this file are subject to the Mozilla Public License
+%%   Version 1.1 (the "License"); you may not use this file except in
+%%   compliance with the License. You may obtain a copy of the License at
+%%   http://www.mozilla.org/MPL/
+%%
+%%   Software distributed under the License is distributed on an "AS IS"
+%%   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+%%   License for the specific language governing rights and limitations
+%%   under the License.
+%%
+%%   The Original Code is RabbitMQ.
+%%
+%%   The Initial Developers of the Original Code are LShift Ltd,
+%%   Cohesive Financial Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created before 22-Nov-2008 00:00:00 GMT by LShift Ltd,
+%%   Cohesive Financial Technologies LLC, or Rabbit Technologies Ltd
+%%   are Copyright (C) 2007-2008 LShift Ltd, Cohesive Financial
+%%   Technologies LLC, and Rabbit Technologies Ltd.
+%%
+%%   Portions created by LShift Ltd are Copyright (C) 2007-2010 LShift
+%%   Ltd. Portions created by Cohesive Financial Technologies LLC are
+%%   Copyright (C) 2007-2010 Cohesive Financial Technologies
+%%   LLC. Portions created by Rabbit Technologies Ltd are Copyright
+%%   (C) 2007-2010 Rabbit Technologies Ltd.
+%%
+%%   All Rights Reserved.
+%%
+%%   Contributor(s): ______________________________________.
+%%
+
+-module(test_sup).
+
+-behaviour(supervisor2).
+
+-export([test_supervisor_delayed_restart/0,
+         init/1, start_child/0]).
+
+test_supervisor_delayed_restart() ->
+    passed = with_sup(simple_one_for_one_terminate,
+                      fun (SupPid) ->
+                              {ok, _ChildPid} =
+                                  supervisor2:start_child(SupPid, []),
+                              test_supervisor_delayed_restart(SupPid)
+                      end),
+    passed = with_sup(one_for_one, fun test_supervisor_delayed_restart/1).
+
+test_supervisor_delayed_restart(SupPid) ->
+    ok = ping_child(SupPid),
+    ok = exit_child(SupPid),
+    timer:sleep(10),
+    ok = ping_child(SupPid),
+    ok = exit_child(SupPid),
+    timer:sleep(10),
+    timeout = ping_child(SupPid),
+    timer:sleep(1010),
+    ok = ping_child(SupPid),
+    passed.
+
+with_sup(RestartStrategy, Fun) ->
+    {ok, SupPid} = supervisor2:start_link(?MODULE, [RestartStrategy]),
+    Res = Fun(SupPid),
+    exit(SupPid, shutdown),
+    rabbit_misc:unlink_and_capture_exit(SupPid),
+    Res.
+
+init([RestartStrategy]) ->
+    {ok, {{RestartStrategy, 1, 1},
+          [{test, {test_sup, start_child, []}, {permanent, 1},
+            16#ffffffff, worker, [test_sup]}]}}.
+
+start_child() ->
+    {ok, proc_lib:spawn_link(fun run_child/0)}.
+
+ping_child(SupPid) ->
+    Ref = make_ref(),
+    get_child_pid(SupPid) ! {ping, Ref, self()},
+    receive {pong, Ref} -> ok
+    after 1000          -> timeout
+    end.
+
+exit_child(SupPid) ->
+    true = exit(get_child_pid(SupPid), abnormal),
+    ok.
+
+get_child_pid(SupPid) ->
+    [{_Id, ChildPid, worker, [test_sup]}] =
+        supervisor2:which_children(SupPid),
+    ChildPid.
+
+run_child() ->
+    receive {ping, Ref, Pid} -> Pid ! {pong, Ref},
+                                run_child()
+    end.
diff --git a/src/vm_memory_monitor.erl b/src/vm_memory_monitor.erl
index cd03fcc6..bbc3a8c0 100644
--- a/src/vm_memory_monitor.erl
+++ b/src/vm_memory_monitor.erl
@@ -72,8 +72,10 @@
 
 -ifdef(use_specs).
 
--spec(start_link/1 :: (float()) ->
-             ('ignore' | {'error', any()} | {'ok', pid()})).
+-spec(start_link/1 ::
+        (float()) -> 'ignore' |
+                     rabbit_types:error(any()) |
+                     rabbit_types:ok(pid())).
 -spec(update/0 :: () -> 'ok').
 -spec(get_total_memory/0 :: () -> (non_neg_integer() | 'unknown')).
 -spec(get_vm_limit/0 :: () -> (non_neg_integer() | 'unknown')).
diff --git a/src/worker_pool.erl b/src/worker_pool.erl
index 97e07545..01ce3535 100644
--- a/src/worker_pool.erl
+++ b/src/worker_pool.erl
@@ -52,7 +52,7 @@
 
 -ifdef(use_specs).
 
--spec(start_link/0 :: () -> {'ok', pid()} | 'ignore' | {'error', any()}).
+-spec(start_link/0 :: () -> 'ignore' | rabbit_types:ok_or_error2(pid(), any())).
 -spec(submit/1 :: (fun (() -> A) | {atom(), atom(), [any()]}) -> A).
 -spec(submit_async/1 ::
       (fun (() -> any()) | {atom(), atom(), [any()]}) -> 'ok').
diff --git a/src/worker_pool_sup.erl b/src/worker_pool_sup.erl
index 4ded63a8..afa21164 100644
--- a/src/worker_pool_sup.erl
+++ b/src/worker_pool_sup.erl
@@ -41,9 +41,9 @@
 
 -ifdef(use_specs).
 
--spec(start_link/0 :: () -> {'ok', pid()} | 'ignore' | {'error', any()}).
--spec(start_link/1 ::
-        (non_neg_integer()) -> {'ok', pid()} | 'ignore' | {'error', any()}).
+-spec(start_link/0 :: () -> 'ignore' | rabbit_types:ok_or_error2(pid(), any())).
+-spec(start_link/1 :: (non_neg_integer()) ->
+                           'ignore' | rabbit_types:ok_or_error2(pid(), any())).
 
 -endif.
 
diff --git a/src/worker_pool_worker.erl b/src/worker_pool_worker.erl
index 57901fd5..a61e4cc3 100644
--- a/src/worker_pool_worker.erl
+++ b/src/worker_pool_worker.erl
@@ -44,7 +44,8 @@
 
 -ifdef(use_specs).
 
--spec(start_link/1 :: (any()) -> {'ok', pid()} | 'ignore' | {'error', any()}).
+-spec(start_link/1 ::
+        (any()) -> {'ok', pid()} | 'ignore' | rabbit_types:error(any())).
 -spec(submit/2 :: (pid(), fun (() -> A) | {atom(), atom(), [any()]}) -> A).
 -spec(submit_async/2 ::
       (pid(), fun (() -> any()) | {atom(), atom(), [any()]}) -> 'ok').